code_2.py 391 字节
Newer Older
ToTensor's avatar
ToTensor 已提交
1 2 3 4 5 6
from keras.preprocessing.text import Tokenizer # 导入分词工具
X_train_lst = df_train["Review Text"].values # 将评论读入张量(训练集)
y_train = df_train["Rating"].values # 构建标签集
dictionary_size = 20000 # 设定词典的大小
tokenizer = Tokenizer(num_words=dictionary_size) # 初始化词典
tokenizer.fit_on_texts( X_train_lst ) # 使用训练集创建词典索引