diff --git a/fluid/PaddleNLP/text_matching_on_quora/pretrained_word2vec.py b/fluid/PaddleNLP/text_matching_on_quora/pretrained_word2vec.py index a3f3422e8bb2d4065978d43378e6c607b00141a4..cda934d3402d1fd05432e75e3b7bfd0a1bd4ad2c 100755 --- a/fluid/PaddleNLP/text_matching_on_quora/pretrained_word2vec.py +++ b/fluid/PaddleNLP/text_matching_on_quora/pretrained_word2vec.py @@ -21,7 +21,12 @@ import numpy as np import time, datetime import os, sys - +def maybe_open(filepath): + if sys.version_info <= (3, 0): # for python2 + return open(filepath, 'r') + else: + return open(filepath, 'r', encoding="utf-8") + def Glove840B_300D(filepath, keys=None): """ input: the "glove.840B.300d.txt" file path @@ -33,7 +38,7 @@ def Glove840B_300D(filepath, keys=None): print("please wait for a minute.") start = time.time() word2vec = {} - with open(filepath, "r") as f: + with maybe_open(filepath) as f: for line in f: if sys.version_info <= (3, 0): # for python2 line = line.decode('utf-8') diff --git a/fluid/PaddleNLP/text_matching_on_quora/quora_question_pairs.py b/fluid/PaddleNLP/text_matching_on_quora/quora_question_pairs.py index d27fa4fdeb598b84fa2069df01951158f78b1834..4a1694929dc9a5a1d78bce2f99be04de0f1ba8e5 100755 --- a/fluid/PaddleNLP/text_matching_on_quora/quora_question_pairs.py +++ b/fluid/PaddleNLP/text_matching_on_quora/quora_question_pairs.py @@ -68,8 +68,10 @@ def maybe_open(file_name): " |- readme.txt\n" " |- wordvec.txt\n") raise RuntimeError(msg) - - return open(file_name, 'r') + if sys.version_info <= (3, 0): # for python2 + return open(file_name, 'r') + else: + return open(file_name, 'r', encoding="utf-8") def tokenized_question_pairs(file_name):