From da00779cf20f018755eb2ffa07cbf6230246f6ba Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 15 Sep 2017 15:47:01 -0700 Subject: [PATCH] typo --- 04.word2vec/README.cn.md | 20 +++++++++++--------- 04.word2vec/README.md | 20 +++++++++++--------- 04.word2vec/index.cn.html | 20 +++++++++++--------- 04.word2vec/index.html | 20 +++++++++++--------- 4 files changed, 44 insertions(+), 36 deletions(-) diff --git a/04.word2vec/README.cn.md b/04.word2vec/README.cn.md index 8d9533b..f9aac91 100644 --- a/04.word2vec/README.cn.md +++ b/04.word2vec/README.cn.md @@ -209,15 +209,6 @@ N = 5 # 训练5-Gram 用于保存和加载word_dict和embedding table的函数 ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -225,6 +216,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` 接着,定义网络结构: diff --git a/04.word2vec/README.md b/04.word2vec/README.md index d86771b..4b11176 100644 --- a/04.word2vec/README.md +++ b/04.word2vec/README.md @@ -227,15 +227,6 @@ N = 5 # train 5-gram - functions used to save and load word dict and embedding table ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -243,6 +234,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` - Map the $n-1$ words $w_{t-n+1},...w_{t-1}$ before $w_t$ to a D-dimensional vector though matrix of dimention $|V|\times D$ (D=32 in this example). diff --git a/04.word2vec/index.cn.html b/04.word2vec/index.cn.html index fef5c0e..f8a4a0b 100644 --- a/04.word2vec/index.cn.html +++ b/04.word2vec/index.cn.html @@ -251,15 +251,6 @@ N = 5 # 训练5-Gram 用于保存和加载word_dict和embedding table的函数 ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -267,6 +258,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` 接着,定义网络结构: diff --git a/04.word2vec/index.html b/04.word2vec/index.html index 1ed7c3a..b0c2b67 100644 --- a/04.word2vec/index.html +++ b/04.word2vec/index.html @@ -269,15 +269,6 @@ N = 5 # train 5-gram - functions used to save and load word dict and embedding table ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -285,6 +276,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` - Map the $n-1$ words $w_{t-n+1},...w_{t-1}$ before $w_t$ to a D-dimensional vector though matrix of dimention $|V|\times D$ (D=32 in this example). -- GitLab