提交 9511ee38 编写于 作者: M mapingshuo

for Python3 compatible

上级 a53f41c2
from cdssm import cdssm_base
from dec_att import decatt_glove
from sse import sse_base
from infer_sent import infer_sent_v1
from infer_sent import infer_sent_v2
from .cdssm import cdssm_base
from .dec_att import decatt_glove
from .sse import sse_base
from .infer_sent import infer_sent_v1
from .infer_sent import infer_sent_v2
import basic_config
from . import basic_config
def cdssm_base():
"""
......
import basic_config
from . import basic_config
def decatt_glove():
"""
......
import basic_config
from . import basic_config
def infer_sent_v1():
"""
......
import basic_config
from . import basic_config
def sse_base():
"""
......
from cdssm import cdssmNet
from dec_att import DecAttNet
from sse import SSENet
from infer_sent import InferSentNet
from .cdssm import cdssmNet
from .dec_att import DecAttNet
from .sse import SSENet
from .infer_sent import InferSentNet
import paddle.fluid as fluid
from my_layers import bi_lstm_layer
from match_layers import ElementwiseMatching
from .my_layers import bi_lstm_layer
from .match_layers import ElementwiseMatching
class InferSentNet():
"""
......
......@@ -3,7 +3,6 @@ This Module provide different kinds of Match layers
"""
import paddle.fluid as fluid
import paddle.v2 as paddle
def MultiPerspectiveMatching(vec1, vec2, perspective_num):
......@@ -44,18 +43,3 @@ def ElementwiseMatching(vec1, vec2):
return fluid.layers.concat(input=[vec1, vec2, elementwise_mul, elementwise_abs_sub], axis=1)
def MultiPerspectiveFullMatching(seq1, seq2, perspective_num):
"""
seq1: Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence.
seq2: Another Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence.
use seq1 to match seq2
return match seq with same shape as seq1.
"""
print seq2
seq2_last = fluid.layers.sequence_pool(input=seq2, pool_type="last")
print seq2_last
seq2 = fluid.layers.sequence_expand(seq2_last, seq1)
print seq2
#seq2 = fluid.layers.lod_reset(x=seq2, y=seq1)
seq2.set_lod(seq1)
print seq2
import paddle.fluid as fluid
from my_layers import bi_lstm_layer
from match_layers import ElementwiseMatching
from .my_layers import bi_lstm_layer
from .match_layers import ElementwiseMatching
class SSENet():
"""
......
......@@ -2,27 +2,29 @@
This Module provide pretrained word-embeddings
"""
from __future__ import print_function
from __future__ import print_function, unicode_literals
import numpy as np
import time, datetime
import os, sys
def Glove840B_300D(filepath, keys=None):
"""
input: the "glove.840B.300d.txt" file path
return: a dict, key: word (unicode), value: a numpy array with shape [300]
"""
if keys is not None:
if keys is not None:
assert(isinstance(keys, set))
print("loading word2vec from ", filepath)
print("please wait for a minute.")
start = time.time()
word2vec = {}
with open(filepath, "r") as f:
for line in f:
info = line.strip().split()
# TODO: test python3
word = info[0].decode('utf-8')
if sys.version_info <= (3, 0): # for python2
line = line.decode('utf-8')
info = line.strip("\n").split(" ")
word = info[0]
if (keys is not None) and (word not in keys):
continue
vector = info[1:]
......@@ -32,6 +34,9 @@ def Glove840B_300D(filepath, keys=None):
end = time.time()
print("Spent ", str(datetime.timedelta(seconds=end-start)), " on loading word2vec.")
return word2vec
if __name__ == '__main__':
embed_dict = Glove840B_300D("data/glove.840B.300d.txt")
from os.path import expanduser
home = expanduser("~")
embed_dict = Glove840B_300D(os.path.join(home, "./.cache/paddle/dataset/glove.840B.300d.txt"))
exit(0)
......@@ -20,7 +20,7 @@ import tarfile
import re
import string
import random
import os
import os, sys
import nltk
from os.path import expanduser
......@@ -43,7 +43,8 @@ COLUMN_COUNT = 4
def tokenize(s):
s = s.decode('utf-8')
if sys.version_info <= (3, 0): # for python2
s = s.decode('utf-8')
if TOKENIZE_METHOD == "nltk":
return nltk.tokenize.word_tokenize(s)
elif TOKENIZE_METHOD == "punctuation":
......@@ -116,7 +117,7 @@ def build_dict(file_name, cutoff):
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary))
word_idx = dict(zip(words, xrange(len(words))))
word_idx = dict(zip(words, range(len(words))))
word_idx['<unk>'] = len(words)
word_idx['<pad>'] = len(words) + 1
return word_idx
......
......@@ -9,7 +9,6 @@ import contextlib
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import utils, metric, configs
import models
......@@ -155,7 +154,7 @@ def train_and_evaluate(train_reader,
# start training
print("[%s] Start Training" % time.asctime(time.localtime(time.time())))
for epoch_id in xrange(global_config.epoch_num):
for epoch_id in range(global_config.epoch_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
batch_id = 0
for data in train_reader():
......
......@@ -7,7 +7,7 @@ import time
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
import quora_question_pairs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册