提交 8691f970 编写于 作者: X XieGuochao 提交者: Aston Zhang

Solve "gbk" decode problem on Windows. (#401)

上级 d3835367
......@@ -345,8 +345,8 @@ def read_imdb(folder='train'):
for label in ['pos', 'neg']:
folder_name = os.path.join('../data/aclImdb/', folder, label)
for file in os.listdir(folder_name):
with open(os.path.join(folder_name, file), 'r') as f:
review = f.read().replace('\n', '').lower()
with open(os.path.join(folder_name, file), 'rb') as f:
review = f.read().decode('utf-8').replace('\n', '').lower()
data.append([review, 1 if label == 'pos' else 0])
random.shuffle(data)
return data
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册