from keras.preprocessing.text import Tokenizer # 导入分词工具 X_train_lst = df_train["Review Text"].values # 将评论读入张量(训练集) y_train = df_train["Rating"].values # 构建标签集 dictionary_size = 20000 # 设定词典的大小 tokenizer = Tokenizer(num_words=dictionary_size) # 初始化词典 tokenizer.fit_on_texts( X_train_lst ) # 使用训练集创建词典索引