diff --git a/fluid/text_matching_on_quora/configs/__init__.py b/fluid/text_matching_on_quora/configs/__init__.py index de786797273bdc1fe12691d9373db14bed10bf64..a8c3e13b8a203a199572c1bc0df3214bc69ee3df 100755 --- a/fluid/text_matching_on_quora/configs/__init__.py +++ b/fluid/text_matching_on_quora/configs/__init__.py @@ -1,6 +1,6 @@ -from cdssm import cdssm_base -from dec_att import decatt_glove -from sse import sse_base -from infer_sent import infer_sent_v1 -from infer_sent import infer_sent_v2 +from .cdssm import cdssm_base +from .dec_att import decatt_glove +from .sse import sse_base +from .infer_sent import infer_sent_v1 +from .infer_sent import infer_sent_v2 diff --git a/fluid/text_matching_on_quora/configs/cdssm.py b/fluid/text_matching_on_quora/configs/cdssm.py index 275c6f89ae143936a9a44e679337fa9a6e308eab..d6433e4f93118cf50e7c5d84e1c78735911897c8 100755 --- a/fluid/text_matching_on_quora/configs/cdssm.py +++ b/fluid/text_matching_on_quora/configs/cdssm.py @@ -1,5 +1,5 @@ -import basic_config +from . import basic_config def cdssm_base(): """ diff --git a/fluid/text_matching_on_quora/configs/dec_att.py b/fluid/text_matching_on_quora/configs/dec_att.py index a8994097c95990fd8c39391709639d088db0a5b5..9b8b192ae026a65a525aff5876b99c2388449da5 100755 --- a/fluid/text_matching_on_quora/configs/dec_att.py +++ b/fluid/text_matching_on_quora/configs/dec_att.py @@ -1,5 +1,5 @@ -import basic_config +from . import basic_config def decatt_glove(): """ diff --git a/fluid/text_matching_on_quora/configs/infer_sent.py b/fluid/text_matching_on_quora/configs/infer_sent.py index ac549c01b894934a7d3cb9c3d0ba4f1984512450..0bc91d49e87c64d02ca51e074331d1aa02abb5eb 100755 --- a/fluid/text_matching_on_quora/configs/infer_sent.py +++ b/fluid/text_matching_on_quora/configs/infer_sent.py @@ -1,5 +1,5 @@ -import basic_config +from . import basic_config def infer_sent_v1(): """ diff --git a/fluid/text_matching_on_quora/configs/sse.py b/fluid/text_matching_on_quora/configs/sse.py index f30e04953df07606308ac8d1a515e6a467b5e152..ea3c77319b0870130690f4f89bf6173b806ff14d 100755 --- a/fluid/text_matching_on_quora/configs/sse.py +++ b/fluid/text_matching_on_quora/configs/sse.py @@ -1,5 +1,5 @@ -import basic_config +from . import basic_config def sse_base(): """ diff --git a/fluid/text_matching_on_quora/models/__init__.py b/fluid/text_matching_on_quora/models/__init__.py index 08b661b3f46fbe34c2b69aef1ced38d4e63a9350..ecee5791ab3bdd277d35321d5d608683c22e5d01 100755 --- a/fluid/text_matching_on_quora/models/__init__.py +++ b/fluid/text_matching_on_quora/models/__init__.py @@ -1,4 +1,4 @@ -from cdssm import cdssmNet -from dec_att import DecAttNet -from sse import SSENet -from infer_sent import InferSentNet +from .cdssm import cdssmNet +from .dec_att import DecAttNet +from .sse import SSENet +from .infer_sent import InferSentNet diff --git a/fluid/text_matching_on_quora/models/infer_sent.py b/fluid/text_matching_on_quora/models/infer_sent.py index 1367cb75fd3684f165b7c5ca88b2dc2943906db3..45a4c3d122c12e9442c0adf849abae1d9d476a5c 100644 --- a/fluid/text_matching_on_quora/models/infer_sent.py +++ b/fluid/text_matching_on_quora/models/infer_sent.py @@ -1,6 +1,7 @@ + import paddle.fluid as fluid -from my_layers import bi_lstm_layer -from match_layers import ElementwiseMatching +from .my_layers import bi_lstm_layer +from .match_layers import ElementwiseMatching class InferSentNet(): """ diff --git a/fluid/text_matching_on_quora/models/match_layers.py b/fluid/text_matching_on_quora/models/match_layers.py index ee2d469181589a7e7d1f29916fd9b6c0b8362ca7..6c6105a1fa9bee3bf940e0a783c5622220637787 100755 --- a/fluid/text_matching_on_quora/models/match_layers.py +++ b/fluid/text_matching_on_quora/models/match_layers.py @@ -3,7 +3,6 @@ This Module provide different kinds of Match layers """ import paddle.fluid as fluid -import paddle.v2 as paddle def MultiPerspectiveMatching(vec1, vec2, perspective_num): @@ -44,18 +43,3 @@ def ElementwiseMatching(vec1, vec2): return fluid.layers.concat(input=[vec1, vec2, elementwise_mul, elementwise_abs_sub], axis=1) -def MultiPerspectiveFullMatching(seq1, seq2, perspective_num): - """ - seq1: Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence. - seq2: Another Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence. - use seq1 to match seq2 - return match seq with same shape as seq1. - """ - print seq2 - seq2_last = fluid.layers.sequence_pool(input=seq2, pool_type="last") - print seq2_last - seq2 = fluid.layers.sequence_expand(seq2_last, seq1) - print seq2 - #seq2 = fluid.layers.lod_reset(x=seq2, y=seq1) - seq2.set_lod(seq1) - print seq2 diff --git a/fluid/text_matching_on_quora/models/sse.py b/fluid/text_matching_on_quora/models/sse.py index dea6535e0454d90ba35033e0d2e595593dc8aeae..445032a2efb41213a0fef6b4e375acbc0eccf2ee 100644 --- a/fluid/text_matching_on_quora/models/sse.py +++ b/fluid/text_matching_on_quora/models/sse.py @@ -1,6 +1,7 @@ + import paddle.fluid as fluid -from my_layers import bi_lstm_layer -from match_layers import ElementwiseMatching +from .my_layers import bi_lstm_layer +from .match_layers import ElementwiseMatching class SSENet(): """ diff --git a/fluid/text_matching_on_quora/pretrained_word2vec.py b/fluid/text_matching_on_quora/pretrained_word2vec.py index eda9e80a7b58fd03e8a845f691d496997344113c..d49b4740a506a22fbf1cd6b43a002a135ee21ef9 100755 --- a/fluid/text_matching_on_quora/pretrained_word2vec.py +++ b/fluid/text_matching_on_quora/pretrained_word2vec.py @@ -2,27 +2,29 @@ This Module provide pretrained word-embeddings """ -from __future__ import print_function +from __future__ import print_function, unicode_literals import numpy as np import time, datetime +import os, sys + def Glove840B_300D(filepath, keys=None): """ input: the "glove.840B.300d.txt" file path return: a dict, key: word (unicode), value: a numpy array with shape [300] """ - if keys is not None: + if keys is not None: assert(isinstance(keys, set)) print("loading word2vec from ", filepath) print("please wait for a minute.") start = time.time() word2vec = {} - with open(filepath, "r") as f: for line in f: - info = line.strip().split() - # TODO: test python3 - word = info[0].decode('utf-8') + if sys.version_info <= (3, 0): # for python2 + line = line.decode('utf-8') + info = line.strip("\n").split(" ") + word = info[0] if (keys is not None) and (word not in keys): continue vector = info[1:] @@ -32,6 +34,9 @@ def Glove840B_300D(filepath, keys=None): end = time.time() print("Spent ", str(datetime.timedelta(seconds=end-start)), " on loading word2vec.") return word2vec - + if __name__ == '__main__': - embed_dict = Glove840B_300D("data/glove.840B.300d.txt") + from os.path import expanduser + home = expanduser("~") + embed_dict = Glove840B_300D(os.path.join(home, "./.cache/paddle/dataset/glove.840B.300d.txt")) + exit(0) diff --git a/fluid/text_matching_on_quora/quora_question_pairs.py b/fluid/text_matching_on_quora/quora_question_pairs.py index 76b24c20bf4158153406a2f2744f3245efe18e92..2d97a89f540bd39b794d698c35e0bbafc4617380 100755 --- a/fluid/text_matching_on_quora/quora_question_pairs.py +++ b/fluid/text_matching_on_quora/quora_question_pairs.py @@ -20,7 +20,7 @@ import tarfile import re import string import random -import os +import os, sys import nltk from os.path import expanduser @@ -43,7 +43,8 @@ COLUMN_COUNT = 4 def tokenize(s): - s = s.decode('utf-8') + if sys.version_info <= (3, 0): # for python2 + s = s.decode('utf-8') if TOKENIZE_METHOD == "nltk": return nltk.tokenize.word_tokenize(s) elif TOKENIZE_METHOD == "punctuation": @@ -116,7 +117,7 @@ def build_dict(file_name, cutoff): dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) words, _ = list(zip(*dictionary)) - word_idx = dict(zip(words, xrange(len(words)))) + word_idx = dict(zip(words, range(len(words)))) word_idx[''] = len(words) word_idx[''] = len(words) + 1 return word_idx diff --git a/fluid/text_matching_on_quora/train_and_evaluate.py b/fluid/text_matching_on_quora/train_and_evaluate.py index 333fe8698b19e76f6b8fb85082d4d4f72d866227..ad87a8ef4a967c42280c596d4639f5baec9d19ea 100755 --- a/fluid/text_matching_on_quora/train_and_evaluate.py +++ b/fluid/text_matching_on_quora/train_and_evaluate.py @@ -9,7 +9,6 @@ import contextlib import numpy as np import paddle.fluid as fluid -import paddle.v2 as paddle import utils, metric, configs import models @@ -155,7 +154,7 @@ def train_and_evaluate(train_reader, # start training print("[%s] Start Training" % time.asctime(time.localtime(time.time()))) - for epoch_id in xrange(global_config.epoch_num): + for epoch_id in range(global_config.epoch_num): data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 batch_id = 0 for data in train_reader(): diff --git a/fluid/text_matching_on_quora/utils.py b/fluid/text_matching_on_quora/utils.py index 604a0eea361d826296cf57b5904ed98e0952d8a4..25ffe9024f2f874c19eb65fcd484d6bd2d5e8a63 100755 --- a/fluid/text_matching_on_quora/utils.py +++ b/fluid/text_matching_on_quora/utils.py @@ -7,7 +7,7 @@ import time import numpy as np import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import quora_question_pairs