from__future__importabsolute_importfrom__future__importdivisionfrom__future__importprint_functionimportosclassTextFeaturizer(object):def__init__(self,vocab_filepath):self._vocab_dict,self._vocab_list=self._load_vocabulary_from_file(vocab_filepath)deftext2ids(self,text):tokens=self._char_tokenize(text)return[self._vocab_dict[token]fortokenintokens]defids2text(self,ids):return''.join([self._vocab_list[id]foridinids])@propertydefvocab_size(self):returnlen(self._vocab_list)@propertydefvocab_list(self):returnself._vocab_listdef_char_tokenize(self,text):returnlist(text.strip())def_load_vocabulary_from_file(self,vocab_filepath):"""Load vocabulary from file."""vocab_lines=[]withopen(vocab_filepath,'r')asfile:vocab_lines.extend(file.readlines())vocab_list=[line[:-1]forlineinvocab_lines]vocab_dict=dict([(token,id)for(id,token)inenumerate(vocab_list)])returnvocab_dict,vocab_list