提交 9511ee38 编写于 作者: M mapingshuo

for Python3 compatible

上级 a53f41c2
from cdssm import cdssm_base from .cdssm import cdssm_base
from dec_att import decatt_glove from .dec_att import decatt_glove
from sse import sse_base from .sse import sse_base
from infer_sent import infer_sent_v1 from .infer_sent import infer_sent_v1
from infer_sent import infer_sent_v2 from .infer_sent import infer_sent_v2
import basic_config from . import basic_config
def cdssm_base(): def cdssm_base():
""" """
......
import basic_config from . import basic_config
def decatt_glove(): def decatt_glove():
""" """
......
import basic_config from . import basic_config
def infer_sent_v1(): def infer_sent_v1():
""" """
......
import basic_config from . import basic_config
def sse_base(): def sse_base():
""" """
......
from cdssm import cdssmNet from .cdssm import cdssmNet
from dec_att import DecAttNet from .dec_att import DecAttNet
from sse import SSENet from .sse import SSENet
from infer_sent import InferSentNet from .infer_sent import InferSentNet
import paddle.fluid as fluid import paddle.fluid as fluid
from my_layers import bi_lstm_layer from .my_layers import bi_lstm_layer
from match_layers import ElementwiseMatching from .match_layers import ElementwiseMatching
class InferSentNet(): class InferSentNet():
""" """
......
...@@ -3,7 +3,6 @@ This Module provide different kinds of Match layers ...@@ -3,7 +3,6 @@ This Module provide different kinds of Match layers
""" """
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
def MultiPerspectiveMatching(vec1, vec2, perspective_num): def MultiPerspectiveMatching(vec1, vec2, perspective_num):
...@@ -44,18 +43,3 @@ def ElementwiseMatching(vec1, vec2): ...@@ -44,18 +43,3 @@ def ElementwiseMatching(vec1, vec2):
return fluid.layers.concat(input=[vec1, vec2, elementwise_mul, elementwise_abs_sub], axis=1) return fluid.layers.concat(input=[vec1, vec2, elementwise_mul, elementwise_abs_sub], axis=1)
def MultiPerspectiveFullMatching(seq1, seq2, perspective_num):
"""
seq1: Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence.
seq2: Another Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence.
use seq1 to match seq2
return match seq with same shape as seq1.
"""
print seq2
seq2_last = fluid.layers.sequence_pool(input=seq2, pool_type="last")
print seq2_last
seq2 = fluid.layers.sequence_expand(seq2_last, seq1)
print seq2
#seq2 = fluid.layers.lod_reset(x=seq2, y=seq1)
seq2.set_lod(seq1)
print seq2
import paddle.fluid as fluid import paddle.fluid as fluid
from my_layers import bi_lstm_layer from .my_layers import bi_lstm_layer
from match_layers import ElementwiseMatching from .match_layers import ElementwiseMatching
class SSENet(): class SSENet():
""" """
......
...@@ -2,9 +2,11 @@ ...@@ -2,9 +2,11 @@
This Module provide pretrained word-embeddings This Module provide pretrained word-embeddings
""" """
from __future__ import print_function from __future__ import print_function, unicode_literals
import numpy as np import numpy as np
import time, datetime import time, datetime
import os, sys
def Glove840B_300D(filepath, keys=None): def Glove840B_300D(filepath, keys=None):
""" """
...@@ -17,12 +19,12 @@ def Glove840B_300D(filepath, keys=None): ...@@ -17,12 +19,12 @@ def Glove840B_300D(filepath, keys=None):
print("please wait for a minute.") print("please wait for a minute.")
start = time.time() start = time.time()
word2vec = {} word2vec = {}
with open(filepath, "r") as f: with open(filepath, "r") as f:
for line in f: for line in f:
info = line.strip().split() if sys.version_info <= (3, 0): # for python2
# TODO: test python3 line = line.decode('utf-8')
word = info[0].decode('utf-8') info = line.strip("\n").split(" ")
word = info[0]
if (keys is not None) and (word not in keys): if (keys is not None) and (word not in keys):
continue continue
vector = info[1:] vector = info[1:]
...@@ -34,4 +36,7 @@ def Glove840B_300D(filepath, keys=None): ...@@ -34,4 +36,7 @@ def Glove840B_300D(filepath, keys=None):
return word2vec return word2vec
if __name__ == '__main__': if __name__ == '__main__':
embed_dict = Glove840B_300D("data/glove.840B.300d.txt") from os.path import expanduser
home = expanduser("~")
embed_dict = Glove840B_300D(os.path.join(home, "./.cache/paddle/dataset/glove.840B.300d.txt"))
exit(0)
...@@ -20,7 +20,7 @@ import tarfile ...@@ -20,7 +20,7 @@ import tarfile
import re import re
import string import string
import random import random
import os import os, sys
import nltk import nltk
from os.path import expanduser from os.path import expanduser
...@@ -43,6 +43,7 @@ COLUMN_COUNT = 4 ...@@ -43,6 +43,7 @@ COLUMN_COUNT = 4
def tokenize(s): def tokenize(s):
if sys.version_info <= (3, 0): # for python2
s = s.decode('utf-8') s = s.decode('utf-8')
if TOKENIZE_METHOD == "nltk": if TOKENIZE_METHOD == "nltk":
return nltk.tokenize.word_tokenize(s) return nltk.tokenize.word_tokenize(s)
...@@ -116,7 +117,7 @@ def build_dict(file_name, cutoff): ...@@ -116,7 +117,7 @@ def build_dict(file_name, cutoff):
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary)) words, _ = list(zip(*dictionary))
word_idx = dict(zip(words, xrange(len(words)))) word_idx = dict(zip(words, range(len(words))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
word_idx['<pad>'] = len(words) + 1 word_idx['<pad>'] = len(words) + 1
return word_idx return word_idx
......
...@@ -9,7 +9,6 @@ import contextlib ...@@ -9,7 +9,6 @@ import contextlib
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
import utils, metric, configs import utils, metric, configs
import models import models
...@@ -155,7 +154,7 @@ def train_and_evaluate(train_reader, ...@@ -155,7 +154,7 @@ def train_and_evaluate(train_reader,
# start training # start training
print("[%s] Start Training" % time.asctime(time.localtime(time.time()))) print("[%s] Start Training" % time.asctime(time.localtime(time.time())))
for epoch_id in xrange(global_config.epoch_num): for epoch_id in range(global_config.epoch_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
batch_id = 0 batch_id = 0
for data in train_reader(): for data in train_reader():
......
...@@ -7,7 +7,7 @@ import time ...@@ -7,7 +7,7 @@ import time
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
import quora_question_pairs import quora_question_pairs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册