提交 63fc48b0 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #170 from livc/fix

fix bug in sequence_tagging_for_ner.
wget http://cs224d.stanford.edu/assignment2/assignment2.zip
if [ -f assignment2.zip ]; then
echo "data exist"
else
wget http://cs224d.stanford.edu/assignment2/assignment2.zip
fi
if [ $? -eq 0 ];then
unzip assignment2.zip
......
......@@ -21,7 +21,7 @@ def canonicalize_word(word, wordset=None, digits=True):
if (wordset != None) and (word in wordset): return word
word = canonicalize_digits(word) # try to canonicalize numbers
if (wordset == None) or (word in wordset): return word
else: return "<UNK>" # unknown token
else: return "UUUNKKK" # unknown token
def data_reader(data_file, word_dict, label_dict):
......@@ -35,7 +35,7 @@ def data_reader(data_file, word_dict, label_dict):
"""
def reader():
UNK_IDX = word_dict["<UNK>"]
UNK_IDX = word_dict["UUUNKKK"]
sentence = []
labels = []
......
......@@ -106,4 +106,5 @@ if __name__ == "__main__":
test_data_file="data/test",
vocab_file="data/vocab.txt",
target_file="data/target.txt",
emb_file="data/wordVectors.txt")
emb_file="data/wordVectors.txt",
model_save_dir="model/")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册