diff --git a/04.word2vec/README.cn.md b/04.word2vec/README.cn.md index 8d9533bcd76a2d4fefdf23c8b364462afc658f80..f9aac91005f69f63dbe392b8c219f2a93a1c77df 100644 --- a/04.word2vec/README.cn.md +++ b/04.word2vec/README.cn.md @@ -209,15 +209,6 @@ N = 5 # 训练5-Gram 用于保存和加载word_dict和embedding table的函数 ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -225,6 +216,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` 接着,定义网络结构: diff --git a/04.word2vec/README.md b/04.word2vec/README.md index d86771b85aafb3d007e765d5a5e33e0b6b7f98c4..4b111767441c65f91e97102790fbb5b78e4c0695 100644 --- a/04.word2vec/README.md +++ b/04.word2vec/README.md @@ -227,15 +227,6 @@ N = 5 # train 5-gram - functions used to save and load word dict and embedding table ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -243,6 +234,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` - Map the $n-1$ words $w_{t-n+1},...w_{t-1}$ before $w_t$ to a D-dimensional vector though matrix of dimention $|V|\times D$ (D=32 in this example). diff --git a/04.word2vec/index.cn.html b/04.word2vec/index.cn.html index fef5c0eaa3ac14e1a82918cfb1d551844d2b7c8c..f8a4a0bfd9450f2007848db5c1faa6486b152497 100644 --- a/04.word2vec/index.cn.html +++ b/04.word2vec/index.cn.html @@ -251,15 +251,6 @@ N = 5 # 训练5-Gram 用于保存和加载word_dict和embedding table的函数 ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -267,6 +258,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` 接着,定义网络结构: diff --git a/04.word2vec/index.html b/04.word2vec/index.html index 1ed7c3a660b35bf522b952fe654f818aa823d061..b0c2b67c6422ccc6c3d2754a9a1a6f22f42a4aeb 100644 --- a/04.word2vec/index.html +++ b/04.word2vec/index.html @@ -269,15 +269,6 @@ N = 5 # train 5-gram - functions used to save and load word dict and embedding table ```python -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)) - return wordemb - - # save and load word dict and embedding table def save_dict_and_embedding(word_dict, embeddings): with open("word_dict", "w") as f: @@ -285,6 +276,17 @@ def save_dict_and_embedding(word_dict, embeddings): f.write(key + " " + str(word_dict[key]) + "\n") with open("embedding_table", "w") as f: numpy.savetxt(f, embeddings, delimiter=',', newline='\n') + + +def load_dict_and_embedding(): + word_dict = dict() + with open("word_dict", "r") as f: + for line in f: + key, value = line.strip().split(" ") + word_dict[key] = value + + embeddings = numpy.loadtxt("embedding_table", delimiter=",") + return word_dict, embeddings ``` - Map the $n-1$ words $w_{t-n+1},...w_{t-1}$ before $w_t$ to a D-dimensional vector though matrix of dimention $|V|\times D$ (D=32 in this example).