Finished Attention

cb9ee239 · NLP-LOVE · GitHub · cb390998 · cb9ee239 · cb9ee239
5 changed file
--- a/NLP/16.6 Attention/datautil.py
+++ b/NLP/16.6 Attention/datautil.py
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jul 11 16:11:18 2017
+
+@author: 代码医生 qq群：40016981，公众号：xiangyuejiqiren
+@blog：http://blog.csdn.net/lijin6249
+"""
+
+import sys
+import os
+import matplotlib.mlab as mlab
+import matplotlib.pyplot as plt
+
+
+import numpy as np
+
+from tensorflow.python.platform import gfile
+from random import shuffle
+#from multiprocessing import Process, Lock
+#import time
+#from math import floor
+#import os
+import re 
+
+#获取文件列表
+def getRawFileList( path):
+    files = []
+    names = []
+    for f in os.listdir(path):
+        if not f.endswith("~") or not f == "":
+            files.append(os.path.join(path, f))
+            names.append(f)
+    return files,names
+#读取分词后的中文词
+def get_ch_lable(txt_file,Isch=True,normalize_digits=False):  
+    labels= list()#""
+    labelssz = []
+    with open(txt_file, 'rb') as f:
+        for label in f: 
+            linstr1 =label.decode('utf-8')
+            #labels =label.decode('gb2312').split()
+            #linstr1 = label.decode('gb2312')
+            if normalize_digits :
+                linstr1=re.sub('\d+',_NUM,linstr1)
+            notoken = basic_tokenizer(linstr1 )
+            if Isch:
+                notoken = fenci(notoken)
+            else:
+                notoken = notoken.split()
+            #labels =labels+notoken_ci#label.decode('gb2312')
+            labels.extend(notoken)
+            labelssz.append(len(labels))
+    return  labels,labelssz
+   
+    
+    
+#获取文件文本
+def get_ch_path_text(raw_data_dir,Isch=True,normalize_digits=False):
+    text_files,_ = getRawFileList(raw_data_dir)
+    labels = []
+    
+    training_dataszs = list([0])
+    #np.reshape(training_dataszs,(1,-1))
+    if len(text_files)== 0:
+        print("err:no files in ",raw_data_dir)
+        return labels
+    print(len(text_files),"files,one is",text_files[0])
+    shuffle(text_files)
+    
+    for text_file in text_files:
+        training_data,training_datasz =get_ch_lable(text_file,Isch,normalize_digits)
+        
+#        notoken = basic_tokenizer(training_data)
+#        notoken_ci = fenci(notoken)
+        training_ci = np.array(training_data)
+        training_ci = np.reshape(training_ci, [-1, ])
+        labels.append(training_ci)
+        
+        training_datasz =np.array( training_datasz)+training_dataszs[-1]
+        training_dataszs.extend(list(training_datasz))
+        print("here",training_dataszs)
+    return labels,training_dataszs
+    
+       
+def basic_tokenizer(sentence):    
+    _WORD_SPLIT = "([.,!?\"':;)(])"
+    _CHWORD_SPLIT = '、|。|，|‘|’'
+    str1 = ""
+    for i in re.split(_CHWORD_SPLIT,  sentence):
+        str1 = str1 +i
+    str2 = ""
+    for i in re.split(_WORD_SPLIT ,  str1):
+        str2 = str2 +i
+    return str2
+
+import jieba
+jieba.load_userdict("myjiebadict.txt")
+
+def fenci(training_data):
+    seg_list = jieba.cut(training_data)  # 默认是精确模式  
+    training_ci = " ".join(seg_list)
+    training_ci = training_ci.split()
+    #以空格将字符串分开
+    #training_ci = np.array(training_ci)
+    #training_ci = np.reshape(training_ci, [-1, ])
+    return training_ci
+
+
+
+import collections
+#系统字符，创建字典是需要加入
+_PAD = "_PAD"
+_GO = "_GO"
+_EOS = "_EOS"
+_UNK = "_UNK"
+
+PAD_ID = 0
+GO_ID = 1
+EOS_ID = 2
+UNK_ID = 3
+
+#文字字符替换，不属于系统字符
+_NUM = "_NUM"
+#Isch=true 中文， false 英文
+ #创建词典 max_vocabulary_size=500 500个词  
+def create_vocabulary(vocabulary_file, raw_data_dir, max_vocabulary_size,Isch=True, normalize_digits=True):
+    texts,textssz = get_ch_path_text(raw_data_dir,Isch,normalize_digits)
+    print( texts[0],len(texts)) 
+    print("行数",len(textssz),textssz)
+# texts ->
+    all_words = []  
+    for label in texts:  
+        print("词数",len(label))   
+        all_words += [word for word in label]     
+    print("词数",len(all_words))
+    
+    training_label, count, dictionary, reverse_dictionary = build_dataset(all_words,max_vocabulary_size)
+    print("reverse_dictionary",reverse_dictionary,len(reverse_dictionary))
+    if not gfile.Exists(vocabulary_file):
+        print("Creating vocabulary %s from data %s" % (vocabulary_file, data_dir))
+        if len(reverse_dictionary) > max_vocabulary_size:
+            reverse_dictionary = reverse_dictionary[:max_vocabulary_size]
+        with gfile.GFile(vocabulary_file, mode="w") as vocab_file:
+            for w in reverse_dictionary:
+                print(reverse_dictionary[w])
+                vocab_file.write(reverse_dictionary[w] + "\n")
+    else:
+        print("already have vocabulary!  do nothing !!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+    return training_label, count, dictionary, reverse_dictionary,textssz
+
+
+
+def build_dataset(words, n_words):
+  """Process raw inputs into a dataset."""
+  count = [[_PAD, -1],[_GO, -1],[_EOS, -1],[_UNK, -1]]
+  count.extend(collections.Counter(words).most_common(n_words - 1))
+  dictionary = dict()
+  for word, _ in count:
+    dictionary[word] = len(dictionary)
+  data = list()
+  unk_count = 0
+  for word in words:
+    if word in dictionary:
+      index = dictionary[word]
+    else:
+      index = 0  # dictionary['UNK']
+      unk_count += 1
+    data.append(index)
+  count[0][1] = unk_count
+  reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
+  return data, count, dictionary, reversed_dictionary
+    
+#把data中的内存问和答ids数据  放在不同的文件里    
+def create_seq2seqfile(data ,sorcefile,targetfile,textssz):
+    print("data",data,len(data))
+    with open(sorcefile,'w') as sor_f:
+        with open(targetfile,'w') as tar_f:
+            for i in range(len(textssz)-1):
+                print("textssz",i,textssz[i],textssz[i+1],data[textssz[i]:textssz[i+1]])                
+                if (i+1)%2:
+                    sor_f.write(str(data[textssz[i]:textssz[i+1]]).replace(',',' ')[1:-1]+'\n')
+                else:
+                    tar_f.write(str(data[textssz[i]:textssz[i+1]]).replace(',',' ')[1:-1]+'\n')
+
+
+def plot_scatter_lengths(title, x_title, y_title, x_lengths, y_lengths):
+	plt.scatter(x_lengths, y_lengths)
+	plt.title(title)
+	plt.xlabel(x_title)
+	plt.ylabel(y_title)
+	plt.ylim(0, max(y_lengths))
+	plt.xlim(0,max(x_lengths))
+	plt.show()
+
+def plot_histo_lengths(title, lengths):
+	mu = np.std(lengths)
+	sigma = np.mean(lengths)
+	x = np.array(lengths)
+	n, bins, patches = plt.hist(x,  50, facecolor='green', alpha=0.5)
+	y = mlab.normpdf(bins, mu, sigma)
+	plt.plot(bins, y, 'r--')
+	plt.title(title)
+	plt.xlabel("Length")
+	plt.ylabel("Number of Sequences")
+	plt.xlim(0,max(lengths))
+	plt.show()
+
+
+
+
+#将读好的对话文本按行分开，一行问，一行答。存为两个文件。training_data为总数据，textssz为每行的索引
+def splitFileOneline(training_data ,textssz):
+    source_file = os.path.join(data_dir+'fromids/', "data_source_test.txt")
+    target_file = os.path.join(data_dir+'toids/', "data_target_test.txt")
+    create_seq2seqfile(training_data,source_file ,target_file,textssz)
+
+
+
+def analysisfile(source_file,target_file):
+#分析文本    
+    source_lengths = []
+    target_lengths = []
+
+    with gfile.GFile(source_file, mode="r") as s_file:
+        with gfile.GFile(target_file, mode="r") as t_file:
+            source= s_file.readline()
+            target = t_file.readline()
+            counter = 0
+            
+            while source and target:
+                counter += 1
+                if counter % 100000 == 0:
+                    print("  reading data line %d" % counter)
+                    sys.stdout.flush()
+                num_source_ids = len(source.split())
+                source_lengths.append(num_source_ids)
+                num_target_ids = len(target.split()) + 1#plus 1 for EOS token
+                target_lengths.append(num_target_ids)
+                source, target = s_file.readline(), t_file.readline()
+    print(target_lengths,source_lengths)
+    if plot_histograms:
+        plot_histo_lengths("target lengths", target_lengths)
+        plot_histo_lengths("source_lengths", source_lengths)
+    if plot_scatter:
+        plot_scatter_lengths("target vs source length", "source length","target length", source_lengths, target_lengths)
+
+
+def initialize_vocabulary(vocabulary_path):
+  if gfile.Exists(vocabulary_path):
+    rev_vocab = []
+    #with gfile.GFile(vocabulary_path, mode="rb") as f:
+    with gfile.GFile(vocabulary_path, mode="r") as f:
+      rev_vocab.extend(f.readlines())
+    rev_vocab = [line.strip() for line in rev_vocab]
+    vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
+    return vocab, rev_vocab
+  else:
+    raise ValueError("Vocabulary file %s not found.", vocabulary_path)
+
+#将句子转成ids
+def sentence_to_ids(sentence, vocabulary,
+                           normalize_digits=True,Isch=True):
+
+    if normalize_digits :
+        sentence=re.sub('\d+',_NUM,sentence)
+    notoken = basic_tokenizer(sentence )
+    if Isch:
+        notoken = fenci(notoken)
+    else:
+        notoken = notoken.split()
+    #print("notoken",notoken)
+    idsdata = [vocabulary.get( w, UNK_ID) for w in notoken]
+    #print("data",idsdata)
+    return idsdata
+
+
+#将一个文件转成ids 不是windows下的要改编码格式 utf8
+def textfile_to_idsfile(data_file_name, target_file_name, vocab,
+                       normalize_digits=True,Isch=True):
+  
+  if not gfile.Exists(target_file_name):
+    print("Tokenizing data in %s" % data_file_name)
+    with gfile.GFile(data_file_name, mode="rb") as data_file:
+      with gfile.GFile(target_file_name, mode="w") as ids_file:
+        counter = 0
+        for line in data_file:
+          counter += 1
+          if counter % 100000 == 0:
+            print("  tokenizing line %d" % counter)
+          #token_ids = sentence_to_ids(line.decode('gb2312'), vocab,normalize_digits,Isch)
+          token_ids = sentence_to_ids(line.decode('utf8'), vocab,normalize_digits,Isch)
+          ids_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
+
+#将文件批量转成ids文件
+def textdir_to_idsdir(textdir,idsdir,vocab, normalize_digits=True,Isch=True):
+    text_files,filenames = getRawFileList(textdir)
+    #np.reshape(training_dataszs,(1,-1))
+    if len(text_files)== 0:
+        raise ValueError("err:no files in ",raw_data_dir)
+        
+    print(len(text_files),"files,one is",text_files[0])
+    
+    for text_file,name in zip(text_files,filenames):
+        print(text_file,idsdir+name)
+        textfile_to_idsfile(text_file,idsdir+name,vocab, normalize_digits,Isch)
+
+
+def ids2texts( indices,rev_vocab):
+    texts = []
+    for index in indices:
+        #texts.append(rev_vocab[index].decode('ascii'))
+        texts.append(rev_vocab[index])
+    return texts
+
+
+
+data_dir = "fanyichina/"
+raw_data_dir = "fanyichina/yuliao/from"
+raw_data_dir_to = "fanyichina/yuliao/to"
+vocabulary_fileen ="dicten.txt"
+vocabulary_filech = "dictch.txt"
+
+
+plot_histograms = plot_scatter =True
+vocab_size =40000
+
+
+max_num_lines =1
+max_target_size = 200
+max_source_size = 200
+
+
+
+def main():
+    vocabulary_filenameen = os.path.join(data_dir, vocabulary_fileen)
+    vocabulary_filenamech = os.path.join(data_dir, vocabulary_filech)
+##############################
+    创建英文字典
+    training_dataen, counten, dictionaryen, reverse_dictionaryen,textsszen =create_vocabulary(vocabulary_filenameen
+                                                            ,raw_data_dir,vocab_size,Isch=False,normalize_digits = True)
+    print("training_data",len(training_dataen))
+    print("dictionary",len(dictionaryen)) 
+#########################
+    #创建中文字典    
+    training_datach, countch, dictionarych, reverse_dictionarych,textsszch =create_vocabulary(vocabulary_filenamech
+                                                      ,raw_data_dir_to,vocab_size,Isch=True,normalize_digits = True)
+    print("training_datach",len(training_datach))
+    print("dictionarych",len(dictionarych)) 
+#############################    
+    vocaben, rev_vocaben =initialize_vocabulary(vocabulary_filenameen)
+    vocabch, rev_vocabch =initialize_vocabulary(vocabulary_filenamech)
+
+    print(len(rev_vocaben))
+    textdir_to_idsdir(raw_data_dir,data_dir+"fromids/",vocaben,normalize_digits=True,Isch=False)
+    textdir_to_idsdir(raw_data_dir_to,data_dir+"toids/",vocabch,normalize_digits=True,Isch=True)
+
+##########################分析
+    filesfrom,_=getRawFileList(data_dir+"fromids/")
+    filesto,_=getRawFileList(data_dir+"toids/")
+    source_train_file_path = filesfrom[0]
+    target_train_file_path= filesto[0]    
+    analysisfile(source_train_file_path,target_train_file_path)
+
+    
+
+if __name__=="__main__":
+	main()
\ No newline at end of file
--- a/NLP/16.6 Attention/myjiebadict.txt
+++ b/NLP/16.6 Attention/myjiebadict.txt
+_NUM nz
+_PAD nz
+_GO nz
+_EOS nz
+_UNK nz
\ No newline at end of file
--- a/NLP/16.6 Attention/seq2seq_model.py
+++ b/NLP/16.6 Attention/seq2seq_model.py
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""带有注意力机制的Sequence-to-sequence 模型."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow as tf
+datautil = __import__("9-33  datautil")
+import datautil as data_utils
+
+
+class Seq2SeqModel(object):
+  """带有注意力机制并且具有multiple buckets的Sequence-to-sequence 模型.
+    这个类实现了一个多层循环网络组成的编码器和一个具有注意力机制的解码器.完全是按照论文：
+    http://arxiv.org/abs/1412.7449 - 中所描述的机制实现。更多细节信息可以参看论文内容
+    这个class 除了使用LSTM cells还可以使用GRU cells, 还使用了sampled softmax 来
+    处理大词汇量的输出. 在论文http://arxiv.org/abs/1412.2007中的第三节描述了
+    sampled softmax。在论文http://arxiv.org/abs/1409.0473里面还有一个关于这个模
+    型的一个单层的使用双向RNN编码器的版本。
+  """
+
+  def __init__(self,
+               source_vocab_size,
+               target_vocab_size,
+               buckets,
+               size,
+               num_layers,
+               dropout_keep_prob,
+               max_gradient_norm,
+               batch_size,
+               learning_rate,
+               learning_rate_decay_factor,
+               use_lstm=False,
+               num_samples=512,
+               forward_only=False,
+               dtype=tf.float32):
+    """创建模型.
+
+            Args:
+            source_vocab_size:原词汇的大小.
+            target_vocab_size:目标词汇的大小.
+            buckets: 一个 (I, O)的list, I 代表输入的最大长度，O代表输出的最大长度，例如[(2, 4), (8, 16)].
+            size: 模型中每层的units个数.
+            num_layers: 模型的层数.
+            max_gradient_norm: 截断梯度的阀值.
+            batch_size: 训练中的批次数据大小;
+            learning_rate: 开始学习率.
+            learning_rate_decay_factor: 退化学习率的衰减参数.
+            use_lstm: 如果true, 使用 LSTM cells 替代GRU cells.
+            num_samples: sampled softmax的样本个数.
+            forward_only: 如果设置了, 模型只有正向传播.
+            dtype: internal variables的类型.
+    """
+
+    self.source_vocab_size = source_vocab_size
+    self.target_vocab_size = target_vocab_size
+    self.buckets = buckets
+    self.batch_size = batch_size
+    self.dropout_keep_prob_output = dropout_keep_prob
+    self.dropout_keep_prob_input = dropout_keep_prob
+    self.learning_rate = tf.Variable(
+        float(learning_rate), trainable=False, dtype=dtype)
+    self.learning_rate_decay_op = self.learning_rate.assign(
+        self.learning_rate * learning_rate_decay_factor)
+    self.global_step = tf.Variable(0, trainable=False)
+
+    # 如果使用 sampled softmax, 需要一个输出的映射.
+    output_projection = None
+    softmax_loss_function = None
+    # 当采样数小于vocabulary size 是Sampled softmax 才有意义.
+    if num_samples > 0 and num_samples < self.target_vocab_size:
+      w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype)
+      w = tf.transpose(w_t)
+      b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype)
+      output_projection = (w, b)
+
+      def sampled_loss(labels, logits):
+        labels = tf.reshape(labels, [-1, 1])
+         #需要使用 32bit的浮点数类型来计算sampled_softmax_loss,才会避免数值的不稳定性发生.
+        local_w_t = tf.cast(w_t, tf.float32)
+        local_b = tf.cast(b, tf.float32)
+        local_inputs = tf.cast(logits, tf.float32)
+        return tf.cast(
+            tf.nn.sampled_softmax_loss(
+                weights=local_w_t,
+                biases=local_b,
+                labels=labels,
+                inputs=local_inputs,
+                num_sampled=num_samples,
+                num_classes=self.target_vocab_size),
+            dtype)
+      softmax_loss_function = sampled_loss
+
+
+    # The seq2seq function: we use embedding for the input and attention.
+    def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
+#      cells = []
+#      for i in range(num_layers):
+#            with tf.variable_scope('RNN_{}'.format(i)):
+#                cells.append(tf.contrib.rnn.GRUCell(size))
+#      cell = tf.contrib.rnn.MultiRNNCell(cells)
+      
+      with tf.variable_scope("GRU") as scope:
+          cell = tf.contrib.rnn.DropoutWrapper(
+              tf.contrib.rnn.GRUCell(size),
+                input_keep_prob=self.dropout_keep_prob_input,
+                output_keep_prob=self.dropout_keep_prob_output)
+          if num_layers > 1:
+              cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)       
+      
+      
+      print("new a cell")
+      return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
+          encoder_inputs,
+          decoder_inputs,
+          cell,
+          num_encoder_symbols=source_vocab_size,
+          num_decoder_symbols=target_vocab_size,
+          embedding_size=size,
+          output_projection=output_projection,
+          feed_previous=do_decode,
+          dtype=dtype)
+
+    #  注入数据.
+    self.encoder_inputs = []
+    self.decoder_inputs = []
+    self.target_weights = []
+    for i in xrange(buckets[-1][0]):  #最后的bucket 是最大的.
+      self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
+                                                name="encoder{0}".format(i)))
+    for i in xrange(buckets[-1][1] + 1):
+      self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
+                                                name="decoder{0}".format(i)))
+      self.target_weights.append(tf.placeholder(dtype, shape=[None],
+                                                name="weight{0}".format(i)))
+
+    #将解码器移动一位得到targets.
+    targets = [self.decoder_inputs[i + 1]
+               for i in xrange(len(self.decoder_inputs) - 1)]
+
+    # 训练的输出和loss定义.
+    if forward_only:
+      self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
+          self.encoder_inputs, self.decoder_inputs, targets,
+          self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
+          softmax_loss_function=softmax_loss_function)
+      # 如果使用了输出映射, 需要为解码器映射输出处理.
+      if output_projection is not None:
+        for b in xrange(len(buckets)):
+          self.outputs[b] = [
+              tf.matmul(output, output_projection[0]) + output_projection[1]
+              for output in self.outputs[b]
+          ]
+    else:
+      self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
+          self.encoder_inputs, self.decoder_inputs, targets,
+          self.target_weights, buckets,
+          lambda x, y: seq2seq_f(x, y, False),
+          softmax_loss_function=softmax_loss_function)
+
+    # 梯度下降更新操作.
+    params = tf.trainable_variables()
+    if not forward_only:
+      self.gradient_norms = []
+      self.updates = []
+      opt = tf.train.GradientDescentOptimizer(self.learning_rate)
+      for b in xrange(len(buckets)):
+        gradients = tf.gradients(self.losses[b], params)
+        clipped_gradients, norm = tf.clip_by_global_norm(gradients,
+                                                         max_gradient_norm)
+        self.gradient_norms.append(norm)
+        self.updates.append(opt.apply_gradients(
+            zip(clipped_gradients, params), global_step=self.global_step))
+
+    self.saver = tf.train.Saver(tf.global_variables())
+
+  def step(self, session, encoder_inputs, decoder_inputs, target_weights,
+           bucket_id, forward_only):
+    """注入给定输入数据步骤。	
+      Args:
+          session: tensorflow 所使用的session.
+          encoder_inputs: 用来注入encoder 输入数据的numpy int vectors类型的list。
+          decoder_inputs: 用来注入decoder输入数据的numpy int vectors类型的list。
+          target_weights: 用来注入target weights的numpy float vectors类型的list.
+          bucket_id: which bucket of the model to use.
+          forward_only: 只进行正向传播.
+          
+	    Returns:
+             一个由gradient norm (不做反向时为none),average perplexity, and the outputs组成的triple.
+         
+	    Raises:
+             ValueError:如果 encoder_inputs, decoder_inputs, 或者是target_weights 的长度与指定bucket_id 的bucket size不符合.
+    """
+
+    # 检查长度..
+    encoder_size, decoder_size = self.buckets[bucket_id]
+    if len(encoder_inputs) != encoder_size:
+      raise ValueError("Encoder length must be equal to the one in bucket,"
+                       " %d != %d." % (len(encoder_inputs), encoder_size))
+    if len(decoder_inputs) != decoder_size:
+      raise ValueError("Decoder length must be equal to the one in bucket,"
+                       " %d != %d." % (len(decoder_inputs), decoder_size))
+    if len(target_weights) != decoder_size:
+      raise ValueError("Weights length must be equal to the one in bucket,"
+                       " %d != %d." % (len(target_weights), decoder_size))
+
+    # 定义Input feed
+    input_feed = {}
+    for l in xrange(encoder_size):
+      input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
+    for l in xrange(decoder_size):
+      input_feed[self.decoder_inputs[l].name] = decoder_inputs[l]
+      input_feed[self.target_weights[l].name] = target_weights[l]
+
+    # Since our targets are decoder inputs shifted by one, we need one more.
+    last_target = self.decoder_inputs[decoder_size].name
+    input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
+
+    # 定义Output feed
+    if not forward_only:
+      output_feed = [self.updates[bucket_id],  # Update Op that does SGD.
+                     self.gradient_norms[bucket_id],  # Gradient norm.
+                     self.losses[bucket_id]]  # Loss for this batch.
+    else:
+      output_feed = [self.losses[bucket_id]]  # Loss for this batch.
+      for l in xrange(decoder_size):  # Output logits.
+        output_feed.append(self.outputs[bucket_id][l])
+
+    outputs = session.run(output_feed, input_feed)
+    if not forward_only:
+      return outputs[1], outputs[2], None  # Gradient norm, loss, no outputs.
+    else:
+      return None, outputs[0], outputs[1:]  # No gradient norm, loss, outputs.   
+
+  def get_batch(self, data, bucket_id):
+    """在迭代训练过程中，从指定 bucket中获得一个随机批次数据.
+	    Args:
+	      data: 一个大小为len(self.buckets)的tuple，包含了创建一个batch中的输入输出的
+	        lists.
+	      bucket_id: 整型, 指定从哪个bucket中取数据.
+	    Returns:
+	      方便以后调用的 triple (encoder_inputs, decoder_inputs, target_weights) 
+    """
+    encoder_size, decoder_size = self.buckets[bucket_id]
+    encoder_inputs, decoder_inputs = [], []
+
+    # 获得一个随机批次的数据作为编码器与解码器的输入,
+    # 如果需要时会有pad操作, 同时反转encoder的输入顺序，并且为decoder添加GO.
+    for _ in xrange(self.batch_size):
+      encoder_input, decoder_input = random.choice(data[bucket_id])
+
+      # pad和反转Encoder 的输入数据..
+      encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input))
+      encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))
+
+      # 为Decoder输入数据添加一个额外的"GO", 并且进行pad.
+      decoder_pad_size = decoder_size - len(decoder_input) - 1
+      decoder_inputs.append([data_utils.GO_ID] + decoder_input +
+                            [data_utils.PAD_ID] * decoder_pad_size)
+
+    # 从上面选择好的数据中创建 batch-major vectors.
+    batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []
+
+    # Batch encoder inputs are just re-indexed encoder_inputs.
+    for length_idx in xrange(encoder_size):
+      batch_encoder_inputs.append(
+          np.array([encoder_inputs[batch_idx][length_idx]
+                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
+
+    # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
+    for length_idx in xrange(decoder_size):
+      batch_decoder_inputs.append(
+          np.array([decoder_inputs[batch_idx][length_idx]
+                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
+
+      # 定义target_weights 变量，默认是1，如果对应的targets是padding，就为0.
+      batch_weight = np.ones(self.batch_size, dtype=np.float32)
+      for batch_idx in xrange(self.batch_size):
+          # 如果对应的输出target 是一个 PAD符号，就将weight设为0.
+          # 将decoder_input向前移动1位得到对应的target.
+        if length_idx < decoder_size - 1:
+          target = decoder_inputs[batch_idx][length_idx + 1]
+        if length_idx == decoder_size - 1 or target == data_utils.PAD_ID:
+          batch_weight[batch_idx] = 0.0
+      batch_weights.append(batch_weight)
+    return batch_encoder_inputs, batch_decoder_inputs, batch_weights
--- a/NLP/16.6 Attention/test.py
+++ b/NLP/16.6 Attention/test.py
+
+import tensorflow as tf
+import numpy as np
+import os
+from six.moves import xrange
+
+
+_buckets = []
+convo_hist_limit = 1
+max_source_length = 0
+max_target_length = 0
+
+
+flags = tf.app.flags
+FLAGS = flags.FLAGS
+datautil = __import__("datautil")
+seq2seq_model = __import__("seq2seq_model")
+import datautil
+import seq2seq_model
+
+
+
+tf.reset_default_graph()
+
+
+_buckets =[(3, 3), (5, 5), (10, 10)]#, (20, 20)]# ["40,10","50,15"]
+max_train_data_size= 0#(0: no limit)
+
+data_dir = "datacn/"
+
+dropout = 1.0 
+grad_clip = 5.0
+batch_size = 60
+hidden_size = 14
+num_layers =2
+learning_rate =0.5
+lr_decay_factor =0.99
+
+checkpoint_dir= "data/checkpoints/"
+
+
+
+###############翻译
+hidden_size = 100
+checkpoint_dir= "fanyichina/checkpoints/"
+data_dir = "fanyichina/"
+_buckets =[(20, 20), (40, 40), (50, 50), (60, 60)]
+
+def getfanyiInfo():
+    vocaben, rev_vocaben=datautil.initialize_vocabulary(os.path.join(datautil.data_dir, datautil.vocabulary_fileen))
+    vocab_sizeen= len(vocaben)
+    print("vocab_size",vocab_sizeen)
+    
+    vocabch, rev_vocabch=datautil.initialize_vocabulary(os.path.join(datautil.data_dir, datautil.vocabulary_filech))
+    vocab_sizech= len(vocabch)
+    print("vocab_sizech",vocab_sizech) 
+
+    return vocab_sizeen,vocab_sizech,vocaben,rev_vocabch
+################################################################    
+#source_train_file_path = os.path.join(datautil.data_dir, "data_source_test.txt")
+#target_train_file_path = os.path.join(datautil.data_dir, "data_target_test.txt")    
+    
+
+def main():
+	
+    vocab_sizeen,vocab_sizech,vocaben,rev_vocabch= getfanyiInfo()
+
+    if not os.path.exists(checkpoint_dir):
+        os.mkdir(checkpoint_dir)
+    print ("checkpoint_dir is {0}".format(checkpoint_dir))
+
+    with tf.Session() as sess:
+        model = createModel(sess,True,vocab_sizeen,vocab_sizech)    
+    
+        print (_buckets)
+        model.batch_size = 1
+
+        conversation_history =[]
+        while True:  
+            prompt = "请输入: "
+            sentence = input(prompt)
+            conversation_history.append(sentence.strip())
+            conversation_history = conversation_history[-convo_hist_limit:]
+            
+            token_ids = list(reversed( datautil.sentence_to_ids(" ".join(conversation_history) ,vocaben,normalize_digits=True,Isch=False) )  )
+            #token_ids = list(reversed(vocab.tokens2Indices(" ".join(conversation_history))))
+            print(token_ids)
+            #token_ids = list(reversed(vocab.tokens2Indices(sentence)))
+            bucket_id = min([b for b in xrange(len(_buckets))if _buckets[b][0] > len(token_ids)])
+
+            encoder_inputs, decoder_inputs, target_weights = model.get_batch({bucket_id: [(token_ids, [])]}, bucket_id)
+
+            _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, True)
+
+            #TODO implement beam search
+            outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
+            print("outputs",outputs,datautil.EOS_ID)
+            if datautil.EOS_ID in outputs:
+                outputs = outputs[:outputs.index(datautil.EOS_ID)]
+                #print(vocab.indices2Tokens(outputs))
+                #print("结果",datautil.ids2texts(outputs,rev_vocabch))
+                convo_output =  " ".join(datautil.ids2texts(outputs,rev_vocabch))
+                conversation_history.append(convo_output)
+                print (convo_output)
+            else:
+                print("can not translation！")
+
+
+
+
+def createModel(session, forward_only,from_vocab_size,to_vocab_size):
+    """Create translation model and initialize or load parameters in session."""
+    model = seq2seq_model.Seq2SeqModel(
+      from_vocab_size,#from
+      to_vocab_size,#to
+      _buckets,
+      hidden_size,
+      num_layers,
+      dropout,
+      grad_clip,
+      batch_size,
+      learning_rate,
+      lr_decay_factor,
+      forward_only=forward_only,
+      dtype=tf.float32)
+      
+    print("model is ok")
+
+    
+    ckpt = tf.train.latest_checkpoint(checkpoint_dir)
+    if ckpt!=None:
+        model.saver.restore(session, ckpt)
+        print ("Reading model parameters from {0}".format(ckpt))
+    else:
+        print ("Created model with fresh parameters.")
+        session.run(tf.global_variables_initializer())  
+
+    return model
+
+
+
+
+if __name__=="__main__":
+	main()
--- a/NLP/16.6 Attention/train.ipynb
+++ b/NLP/16.6 Attention/train.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 注意力模型实现中英文机器翻译\n",
+    "\n",
+    "### 1.数据预处理\n",
+    "\n",
+    "首先先下载本目录的数据和代码，并执行 **datautil.py**，生成中、英文字典\n",
+    "\n",
+    "### 2.执行如下代码\n",
+    "\n",
+    "训练时间会比较长\n",
+    "\n",
+    "### 3.测试模型\n",
+    "运行 **test.py**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Building prefix dict from the default dictionary ...\n",
+      "Loading model from cache /tmp/jieba.cache\n",
+      "Loading model cost 0.657 seconds.\n",
+      "Prefix dict has been built succesfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import math\n",
+    "import os\n",
+    "\n",
+    "import sys\n",
+    "import time\n",
+    "import numpy as np\n",
+    "from six.moves import xrange\n",
+    "import tensorflow as tf\n",
+    "datautil = __import__(\"datautil\")\n",
+    "seq2seq_model = __import__(\"seq2seq_model\")\n",
+    "import datautil\n",
+    "import seq2seq_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "vocab_size 11963\n",
+      "vocab_sizech 15165\n",
+      "checkpoint_dir is fanyichina/checkpoints/\n",
+      "WARNING:tensorflow:From /usr/local/python3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Colocations handled automatically by placer.\n",
+      "\n",
+      "WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
+      "For more information, please see:\n",
+      "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
+      "  * https://github.com/tensorflow/addons\n",
+      "If you depend on functionality not listed there, please file an issue.\n",
+      "\n",
+      "WARNING:tensorflow:From /home/python_home/WeiZhongChuang/ML/TensorFlow/Attention/seq2seq_model.py:124: GRUCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.\n",
+      "WARNING:tensorflow:From /home/python_home/WeiZhongChuang/ML/TensorFlow/Attention/seq2seq_model.py:128: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.\n",
+      "WARNING:tensorflow:At least two cells provided to MultiRNNCell are the same object and will share weights.\n",
+      "new a cell\n",
+      "WARNING:tensorflow:From /usr/local/python3/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py:863: static_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API\n",
+      "WARNING:tensorflow:From /usr/local/python3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py:1259: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
+      "WARNING:tensorflow:From /usr/local/python3/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py:1444: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.\n",
+      "new a cell\n",
+      "new a cell\n",
+      "new a cell\n",
+      "WARNING:tensorflow:From /usr/local/python3/lib/python3.6/site-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Use tf.cast instead.\n",
+      "model is ok\n",
+      "WARNING:tensorflow:From /usr/local/python3/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Use standard file APIs to check for files with this prefix.\n",
+      "INFO:tensorflow:Restoring parameters from fanyichina/checkpoints/seq2seqtest.ckpt-54000\n",
+      "Reading model parameters from fanyichina/checkpoints/seq2seqtest.ckpt-54000\n",
+      "Using bucket sizes:\n",
+      "[(20, 20), (40, 40), (50, 50), (60, 60)]\n",
+      "fanyichina/fromids/english1w.txt\n",
+      "fanyichina/toids/chinese1w.txt\n",
+      "bucket sizes = [1649, 4933, 1904, 1383]\n",
+      "global step 54200 learning rate 0.3699 step-time 0.72 perplexity 3.16\n",
+      "fanyichina/checkpoints/seq2seqtest.ckpt\n",
+      "  eval: bucket 0 perplexity 1.74\n",
+      "输入 ['third', ',', 'the', 'pace', 'of', 'selling', 'public', 'houses', 'was', 'accelerated', '.', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "输出 ['_GO', '三', '是', '加快', '了', '公有', '住房', '的', '出售', '.', '_EOS', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "  eval: bucket 1 perplexity 2.93\n",
+      "输入 ['thanks', 'to', 'the', 'equilibrium', 'of', 'the', 'international', 'payments', ',', 'china', \"'s\", 'exchange', 'rates', 'have', 'all', 'along', 'been', 'comparatively', 'stable', '.', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "输出 ['_GO', '由', '於', '国际', '收支平衡', ',', '中国', '的', '汇率', '一直', '比较', '稳定', '.', '_EOS', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "结果 ['记者', '中国', '关系', '进行', '对']\n",
+      "  eval: bucket 2 perplexity 3.96\n",
+      "输入 ['in', 'order', 'to', 'respond', 'to', 'the', 'vast', 'business', 'opportunities', 'in', 'the', 'future', 'when', 'there', 'are', 'direct', 'cross', '-', 'strait', 'flights', ',', 'taiwan', \"'s\", 'fu', 'hsing', 'aviation', 'has', 'spent', 'a', 'huge', 'sum', 'on', 'buying', 'medium', 'and', 'long', '-', 'range', 'versions', 'of', 'the', 'european', 'airbus', '.', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "输出 ['_GO', '台湾', '复兴', '航空', '为', '因', '应', '未来', '两岸', '直航', '的', '庞大', '商机', ',', '大笔', '购', '进', '欧洲', '空中', '巴士', '的', '中', ',', '长程', '客机', '.', '_EOS', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "结果 ['建设', '建设', '说', '建设', '加强', '是', '说', '是', '重要', '是', '是', '是', '实现', '市场', '这', '这', '的']\n",
+      "  eval: bucket 3 perplexity 5.66\n",
+      "输入 ['zhu', 'bangzao', 'said', 'that', 'after', 'hong', 'kong', \"'s\", 'reversion', ',', 'china', \"'s\", 'central', 'government', 'and', 'the', 'hong', 'kong', 'special', 'administrative', 'region', '[', 'hksar', ']', 'government', 'implemented', 'the', 'policy', 'of', '\"', 'one', 'country', ',', 'two', 'systems', ',', '\"', '\"', 'hong', 'kong', 'people', 'governing', 'hong', 'kong', ',', '\"', 'and', 'a', 'high', 'degree', 'of', 'autonomy', '.', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n",
+      "输出 ['_GO', '朱邦造', '说', ',', '香港', '回归', '后', ',', '中国', '中央政府', '和', '香港特区', '政府', '贯彻', '\"', '一国两制', '\"', '\"', '港人', '治', '港', '\"', '和', '高度', '自治', '的', '方针', '.', '_EOS', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD', '_PAD']\n"
+     ]
+    }
+   ],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "steps_per_checkpoint=200 \n",
+    "\n",
+    "max_train_data_size= 0#(0: no limit)\n",
+    "\n",
+    "dropout = 0.9 \n",
+    "grad_clip = 5.0\n",
+    "batch_size = 60\n",
+    "\n",
+    "num_layers =2\n",
+    "learning_rate =0.5\n",
+    "lr_decay_factor =0.99\n",
+    "\n",
+    "###############翻译\n",
+    "hidden_size = 100\n",
+    "checkpoint_dir= \"fanyichina/checkpoints/\"\n",
+    "\n",
+    "_buckets =[(20, 20), (40, 40), (50, 50), (60, 60)]\n",
+    "def getfanyiInfo():\n",
+    "    vocaben, rev_vocaben=datautil.initialize_vocabulary(os.path.join(datautil.data_dir, datautil.vocabulary_fileen))\n",
+    "    vocab_sizeen= len(vocaben)\n",
+    "    print(\"vocab_size\",vocab_sizeen)\n",
+    "    \n",
+    "    vocabch, rev_vocabch=datautil.initialize_vocabulary(os.path.join(datautil.data_dir, datautil.vocabulary_filech))\n",
+    "    vocab_sizech= len(vocabch)\n",
+    "    print(\"vocab_sizech\",vocab_sizech) \n",
+    "    \n",
+    "    filesfrom,_=datautil.getRawFileList(datautil.data_dir+\"fromids/\")\n",
+    "    filesto,_=datautil.getRawFileList(datautil.data_dir+\"toids/\")\n",
+    "    source_train_file_path = filesfrom[0]\n",
+    "    target_train_file_path= filesto[0]\n",
+    "    return vocab_sizeen,vocab_sizech,rev_vocaben,rev_vocabch,source_train_file_path,target_train_file_path\n",
+    "################################################################    \n",
+    "#source_train_file_path = os.path.join(datautil.data_dir, \"data_source_test.txt\")\n",
+    "#target_train_file_path = os.path.join(datautil.data_dir, \"data_target_test.txt\")    \n",
+    "    \n",
+    "\n",
+    "def main():\n",
+    "\t\n",
+    "    vocab_sizeen,vocab_sizech,rev_vocaben,rev_vocabch,source_train_file_path,target_train_file_path = getfanyiInfo()\n",
+    "\n",
+    "    if not os.path.exists(checkpoint_dir):\n",
+    "        os.mkdir(checkpoint_dir)\n",
+    "    print (\"checkpoint_dir is {0}\".format(checkpoint_dir))\n",
+    "\n",
+    "    with tf.Session() as sess:\n",
+    "        model = createModel(sess,False,vocab_sizeen,vocab_sizech)\n",
+    "        print (\"Using bucket sizes:\")\n",
+    "        print (_buckets)\n",
+    "\n",
+    "\n",
+    "        source_test_file_path = source_train_file_path\n",
+    "        target_test_file_path = target_train_file_path\n",
+    "        \n",
+    "        print (source_train_file_path)\n",
+    "        print (target_train_file_path)\n",
+    "        \n",
+    "        train_set = readData(source_train_file_path, target_train_file_path,max_train_data_size)\n",
+    "        test_set = readData(source_test_file_path, target_test_file_path,max_train_data_size)\n",
+    "        \n",
+    "        train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]\n",
+    "        print( \"bucket sizes = {0}\".format(train_bucket_sizes))\n",
+    "        train_total_size = float(sum(train_bucket_sizes))\n",
+    "    \n",
+    "        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use\n",
+    "        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to\n",
+    "        # the size if i-th training bucket, as used later.\n",
+    "        train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))]\n",
+    "        step_time, loss = 0.0, 0.0\n",
+    "        current_step = 0\n",
+    "        previous_losses = []\n",
+    "        \n",
+    "        while True:\n",
+    "            # Choose a bucket according to data distribution. We pick a random number\n",
+    "            # in [0, 1] and use the corresponding interval in train_buckets_scale.\n",
+    "            random_number_01 = np.random.random_sample()\n",
+    "            bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01])\n",
+    "\n",
+    "            # 开始训练.\n",
+    "            start_time = time.time()\n",
+    "            encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id)\n",
+    "            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, False)\n",
+    "            step_time += (time.time() - start_time) / steps_per_checkpoint\n",
+    "            loss += step_loss / steps_per_checkpoint\n",
+    "            current_step += 1\n",
+    "            \n",
+    "            # 保存检查点，测试数据\n",
+    "            if current_step % steps_per_checkpoint == 0:\n",
+    "                # Print statistics for the previous epoch.\n",
+    "                perplexity = math.exp(loss) if loss < 300 else float('inf')\n",
+    "                print (\"global step %d learning rate %.4f step-time %.2f perplexity \"\n",
+    "                    \"%.2f\" % (model.global_step.eval(), model.learning_rate.eval(),step_time, perplexity))\n",
+    "                # Decrease learning rate if no improvement was seen over last 3 times.\n",
+    "                if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):\n",
+    "                    sess.run(model.learning_rate_decay_op)\n",
+    "                previous_losses.append(loss)\n",
+    "                # Save checkpoint and zero timer and loss.\n",
+    "                checkpoint_path = os.path.join(checkpoint_dir, \"seq2seqtest.ckpt\")\n",
+    "                print(checkpoint_path)\n",
+    "                model.saver.save(sess, checkpoint_path, global_step=model.global_step)\n",
+    "                step_time, loss = 0.0, 0.0\n",
+    "                # Run evals on development set and print their perplexity.\n",
+    "                for bucket_id in xrange(len(_buckets)):\n",
+    "                    if len(test_set[bucket_id]) == 0:\n",
+    "                        print(\"  eval: empty bucket %d\" % (bucket_id))\n",
+    "                        continue\n",
+    "                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(test_set, bucket_id)\n",
+    "\n",
+    "                    _, eval_loss,output_logits = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, True)\n",
+    "                    eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf')\n",
+    "                    print(\"  eval: bucket %d perplexity %.2f\" % (bucket_id, eval_ppx))\n",
+    "                    \n",
+    "                    \n",
+    "                    inputstr = datautil.ids2texts(reversed([en[0] for en in encoder_inputs]) ,rev_vocaben)\n",
+    "                    print(\"输入\",inputstr)\n",
+    "                    print(\"输出\",datautil.ids2texts([en[0] for en in decoder_inputs] ,rev_vocabch))\n",
+    "  \n",
+    "                    outputs = [np.argmax(logit, axis=1)[0] for logit in output_logits]                    \n",
+    "                    #outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]\n",
+    "                    #print(\"outputs\",outputs,datautil.EOS_ID)\n",
+    "                    if datautil.EOS_ID in outputs:\n",
+    "                        outputs = outputs[:outputs.index(datautil.EOS_ID)]\n",
+    "                        print(\"结果\",datautil.ids2texts(outputs,rev_vocabch))\n",
+    "                        \n",
+    "                    \n",
+    "                    \n",
+    "                sys.stdout.flush()\n",
+    "\n",
+    "\n",
+    "def createModel(session, forward_only,from_vocab_size,to_vocab_size):\n",
+    "    \"\"\"Create translation model and initialize or load parameters in session.\"\"\"\n",
+    "    model = seq2seq_model.Seq2SeqModel(\n",
+    "      from_vocab_size,#from\n",
+    "      to_vocab_size,#to\n",
+    "      _buckets,\n",
+    "      hidden_size,\n",
+    "      num_layers,\n",
+    "      dropout,\n",
+    "      grad_clip,\n",
+    "      batch_size,\n",
+    "      learning_rate,\n",
+    "      lr_decay_factor,\n",
+    "      forward_only=forward_only,\n",
+    "      dtype=tf.float32)\n",
+    "      \n",
+    "    print(\"model is ok\")\n",
+    "\n",
+    "    \n",
+    "    ckpt = tf.train.latest_checkpoint(checkpoint_dir)\n",
+    "    if ckpt!=None:\n",
+    "        model.saver.restore(session, ckpt)\n",
+    "        print (\"Reading model parameters from {0}\".format(ckpt))\n",
+    "    else:\n",
+    "        print (\"Created model with fresh parameters.\")\n",
+    "        session.run(tf.global_variables_initializer())  \n",
+    "\n",
+    "    return model\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "def readData(source_path, target_path, max_size=None):\n",
+    "\t'''\n",
+    "\tThis method directly from tensorflow translation example\n",
+    "\t'''\n",
+    "\tdata_set = [[] for _ in _buckets]\n",
+    "\twith tf.gfile.GFile(source_path, mode=\"r\") as source_file:\n",
+    "\t\twith tf.gfile.GFile(target_path, mode=\"r\") as target_file:\n",
+    "\t\t\tsource, target = source_file.readline(), target_file.readline()\n",
+    "\t\t\tcounter = 0\n",
+    "\t\t\twhile source and target and (not max_size or counter < max_size):\n",
+    "\t\t\t\tcounter += 1\n",
+    "\t\t\t\tif counter % 100000 == 0:\n",
+    "\t\t\t\t\tprint(\"  reading data line %d\" % counter)\n",
+    "\t\t\t\t\tsys.stdout.flush()\n",
+    "\t\t\t\tsource_ids = [int(x) for x in source.split()]\n",
+    "\t\t\t\ttarget_ids = [int(x) for x in target.split()]\n",
+    "\t\t\t\ttarget_ids.append(datautil.EOS_ID)\n",
+    "\t\t\t\tfor bucket_id, (source_size, target_size) in enumerate(_buckets):\n",
+    "\t\t\t\t\tif len(source_ids) < source_size and len(target_ids) < target_size:\n",
+    "\t\t\t\t\t\tdata_set[bucket_id].append([source_ids, target_ids])\n",
+    "\t\t\t\t\t\tbreak\n",
+    "\t\t\t\tsource, target = source_file.readline(), target_file.readline()\n",
+    "\treturn data_set\n",
+    "\n",
+    "\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "\tmain()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}