Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
4097a2cb
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4097a2cb
编写于
6月 26, 2017
作者:
C
Cao Ying
提交者:
GitHub
6月 26, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #122 from lcy-seso/refine_codes_of_hsigmoid
rename and refine codes of hsigmoid.
上级
436f480d
2a8834ce
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
143 addition
and
117 deletion
+143
-117
README.md
README.md
+1
-1
hsigmoid/.gitignore
hsigmoid/.gitignore
+3
-0
hsigmoid/README.md
hsigmoid/README.md
+26
-41
hsigmoid/index.html
hsigmoid/index.html
+26
-41
hsigmoid/infer.py
hsigmoid/infer.py
+16
-12
hsigmoid/network_conf.py
hsigmoid/network_conf.py
+49
-0
hsigmoid/train.py
hsigmoid/train.py
+22
-22
未找到文件。
README.md
浏览文件 @
4097a2cb
...
@@ -13,7 +13,7 @@ PaddlePaddle提供了丰富的运算单元,帮助大家以模块化的方式
...
@@ -13,7 +13,7 @@ PaddlePaddle提供了丰富的运算单元,帮助大家以模块化的方式
在词向量的例子中,我们向大家展示如何使用Hierarchical-Sigmoid 和噪声对比估计(Noise Contrastive Estimation,NCE)来加速词向量的学习。
在词向量的例子中,我们向大家展示如何使用Hierarchical-Sigmoid 和噪声对比估计(Noise Contrastive Estimation,NCE)来加速词向量的学习。
-
1.1
[
Hsigmoid加速词向量训练
](
https://github.com/PaddlePaddle/models/tree/develop/
word_embedding
)
-
1.1
[
Hsigmoid加速词向量训练
](
https://github.com/PaddlePaddle/models/tree/develop/
hsigmoid
)
-
1.2
[
噪声对比估计加速词向量训练
](
https://github.com/PaddlePaddle/models/tree/develop/nce_cost
)
-
1.2
[
噪声对比估计加速词向量训练
](
https://github.com/PaddlePaddle/models/tree/develop/nce_cost
)
...
...
hsigmoid/.gitignore
0 → 100644
浏览文件 @
4097a2cb
*.pyc
models
hsigmoid/README.md
浏览文件 @
4097a2cb
...
@@ -50,7 +50,7 @@ def train_data(filename, word_dict, n):
...
@@ -50,7 +50,7 @@ def train_data(filename, word_dict, n):
```
```
## 网络结构
## 网络结构
本文通过训练N-gram语言模型来获得词向量,具体地使用前4个词来预测当前词。网络输入为词在字典中的id,然后查询词向量词表获取词向量,接着拼接4个词的词向量,然后接入一个全连接隐层,最后是
Hsigmoid
层。详细网络结构见图2:
本文通过训练N-gram语言模型来获得词向量,具体地使用前4个词来预测当前词。网络输入为词在字典中的id,然后查询词向量词表获取词向量,接着拼接4个词的词向量,然后接入一个全连接隐层,最后是
`Hsigmoid`
层。详细网络结构见图2:
<p
align=
"center"
>
<p
align=
"center"
>
<img
src=
"images/network_conf.png"
width =
"70%"
align=
"center"
/><br/>
<img
src=
"images/network_conf.png"
width =
"70%"
align=
"center"
/><br/>
...
@@ -60,41 +60,27 @@ def train_data(filename, word_dict, n):
...
@@ -60,41 +60,27 @@ def train_data(filename, word_dict, n):
代码实现如下:
代码实现如下:
```
python
```
python
import
math
def
ngram_lm
(
hidden_size
,
embed_size
,
dict_size
,
gram_num
=
4
,
is_train
=
True
):
import
paddle.v2
as
paddle
emb_layers
=
[]
def
network_conf
(
hidden_size
,
embed_size
,
dict_size
,
is_train
=
True
):
first_word
=
paddle
.
layer
.
data
(
name
=
'firstw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
second_word
=
paddle
.
layer
.
data
(
name
=
'secondw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
third_word
=
paddle
.
layer
.
data
(
name
=
'thirdw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
fourth_word
=
paddle
.
layer
.
data
(
name
=
'fourthw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
target_word
=
paddle
.
layer
.
data
(
name
=
'fifthw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
embed_param_attr
=
paddle
.
attr
.
Param
(
embed_param_attr
=
paddle
.
attr
.
Param
(
name
=
"_proj"
,
initial_std
=
0.001
,
learning_rate
=
1
,
l2_rate
=
0
)
name
=
"_proj"
,
initial_std
=
0.001
,
learning_rate
=
1
,
l2_rate
=
0
)
embed_first_word
=
paddle
.
layer
.
embedding
(
for
i
in
range
(
gram_num
):
input
=
first_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
word
=
paddle
.
layer
.
data
(
embed_second_word
=
paddle
.
layer
.
embedding
(
name
=
"__word%02d__"
%
(
i
),
input
=
second_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
type
=
paddle
.
data_type
.
integer_value
(
dict_size
)
)
embed_third_word
=
paddle
.
layer
.
embedding
(
emb_layers
.
append
(
input
=
third_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
paddle
.
layer
.
embedding
(
embed_fourth_word
=
paddle
.
layer
.
embedding
(
input
=
word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
))
input
=
fourth_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
target_word
=
paddle
.
layer
.
data
(
embed_context
=
paddle
.
layer
.
concat
(
input
=
[
name
=
"__target_word__"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
embed_first_word
,
embed_second_word
,
embed_third_word
,
embed_fourth_word
]
)
embed_context
=
paddle
.
layer
.
concat
(
input
=
emb_layers
)
hidden_layer
=
paddle
.
layer
.
fc
(
hidden_layer
=
paddle
.
layer
.
fc
(
input
=
embed_context
,
input
=
embed_context
,
size
=
hidden_size
,
size
=
hidden_size
,
act
=
paddle
.
activation
.
Sigmoid
(),
act
=
paddle
.
activation
.
Sigmoid
(),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
0.5
),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
0.5
),
bias_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
2
),
bias_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
2
),
param_attr
=
paddle
.
attr
.
Param
(
param_attr
=
paddle
.
attr
.
Param
(
...
@@ -105,27 +91,26 @@ def network_conf(hidden_size, embed_size, dict_size, is_train=True):
...
@@ -105,27 +91,26 @@ def network_conf(hidden_size, embed_size, dict_size, is_train=True):
input
=
hidden_layer
,
input
=
hidden_layer
,
label
=
target_word
,
label
=
target_word
,
num_classes
=
dict_size
,
num_classes
=
dict_size
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_w'
),
param_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_w"
),
bias_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_b'
))
bias_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_b"
))
return
cost
return
cost
else
:
else
:
with
paddle
.
layer
.
mixed
(
prediction
=
paddle
.
layer
.
fc
(
size
=
dict_size
-
1
,
size
=
dict_size
-
1
,
act
=
paddle
.
activation
.
Sigmoid
(),
input
=
hidden_layer
,
bias_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_b'
))
as
prediction
:
act
=
paddle
.
activation
.
Sigmoid
(),
prediction
+=
paddle
.
layer
.
trans_full_matrix_projection
(
bias_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_b"
),
input
=
hidden_layer
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_w"
))
param_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_w'
))
return
prediction
return
prediction
```
```
需要注意,在预测阶段,我们需要对hsigmoid参数做一次转置,这里输出的类别数为词典大小减1,对应非叶节点的数量。
需要注意,在预测阶段,我们需要对hsigmoid参数做一次转置,这里输出的类别数为词典大小减1,对应非叶节点的数量。
## 训练阶段
## 训练阶段
训练比较简单,直接运行
``` python
hsigmoid_
train.py ```
。程序第一次运行会检测用户缓存文件夹中是否包含imikolov数据集,如果未包含,则自动下载。运行过程中,每100个iteration会打印模型训练信息,主要包含训练损失和测试损失,每个pass会保存一次模型。
训练比较简单,直接运行
``` python train.py ```
。程序第一次运行会检测用户缓存文件夹中是否包含imikolov数据集,如果未包含,则自动下载。运行过程中,每100个iteration会打印模型训练信息,主要包含训练损失和测试损失,每个pass会保存一次模型。
## 预测阶段
## 预测阶段
预测时,直接运行
``` python
hsigmoid_predict
.py ```
,程序会首先load模型,然后按照batch方式进行预测,并打印预测结果。预测阶段最重要的就是根据概率得到编码路径,然后遍历路径获取最终的预测类别,这部分逻辑如下:
预测时,直接运行
``` python
infer
.py ```
,程序会首先load模型,然后按照batch方式进行预测,并打印预测结果。预测阶段最重要的就是根据概率得到编码路径,然后遍历路径获取最终的预测类别,这部分逻辑如下:
```
python
```
python
def
decode_res
(
infer_res
,
dict_size
):
def
decode_res
(
infer_res
,
dict_size
):
...
...
hsigmoid/index.html
浏览文件 @
4097a2cb
...
@@ -92,7 +92,7 @@ def train_data(filename, word_dict, n):
...
@@ -92,7 +92,7 @@ def train_data(filename, word_dict, n):
```
```
## 网络结构
## 网络结构
本文通过训练N-gram语言模型来获得词向量,具体地使用前4个词来预测当前词。网络输入为词在字典中的id,然后查询词向量词表获取词向量,接着拼接4个词的词向量,然后接入一个全连接隐层,最后是
Hsigmoid
层。详细网络结构见图2:
本文通过训练N-gram语言模型来获得词向量,具体地使用前4个词来预测当前词。网络输入为词在字典中的id,然后查询词向量词表获取词向量,接着拼接4个词的词向量,然后接入一个全连接隐层,最后是
`Hsigmoid`
层。详细网络结构见图2:
<p
align=
"center"
>
<p
align=
"center"
>
<img
src=
"images/network_conf.png"
width =
"70%"
align=
"center"
/><br/>
<img
src=
"images/network_conf.png"
width =
"70%"
align=
"center"
/><br/>
...
@@ -102,41 +102,27 @@ def train_data(filename, word_dict, n):
...
@@ -102,41 +102,27 @@ def train_data(filename, word_dict, n):
代码实现如下:
代码实现如下:
```python
```python
import math
def ngram_lm(hidden_size, embed_size, dict_size, gram_num=4, is_train=True):
import paddle.v2 as paddle
emb_layers = []
def network_conf(hidden_size, embed_size, dict_size, is_train=True):
first_word = paddle.layer.data(
name='firstw', type=paddle.data_type.integer_value(dict_size))
second_word = paddle.layer.data(
name='secondw', type=paddle.data_type.integer_value(dict_size))
third_word = paddle.layer.data(
name='thirdw', type=paddle.data_type.integer_value(dict_size))
fourth_word = paddle.layer.data(
name='fourthw', type=paddle.data_type.integer_value(dict_size))
target_word = paddle.layer.data(
name='fifthw', type=paddle.data_type.integer_value(dict_size))
embed_param_attr = paddle.attr.Param(
embed_param_attr = paddle.attr.Param(
name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)
name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)
embed_first_word = paddle.layer.embedding(
for i in range(gram_num):
input=first_word, size=embed_size, param_attr=embed_param_attr)
word = paddle.layer.data(
embed_second_word = paddle.layer.embedding(
name="__word%02d__" % (i),
input=second_word, size=embed_size, param_attr=embed_param_attr
)
type=paddle.data_type.integer_value(dict_size)
)
embed_third_word = paddle.layer.embedding
(
emb_layers.append
(
input=third_word, size=embed_size, param_attr=embed_param_attr)
paddle.layer.embedding(
embed_fourth_word = paddle.layer.embedding(
input=word, size=embed_size, param_attr=embed_param_attr))
input=fourth_word, size=embed_size, param_attr=embed_param_attr)
target_word = paddle.layer.data(
embed_context = paddle.layer.concat(input=[
name="__target_word__", type=paddle.data_type.integer_value(dict_size))
embed_first_word, embed_second_word, embed_third_word, embed_fourth_word
]
)
embed_context = paddle.layer.concat(input=emb_layers
)
hidden_layer = paddle.layer.fc(
hidden_layer = paddle.layer.fc(
input=embed_context,
input=embed_context,
size=hidden_size,
size=hidden_size,
act=paddle.activation.Sigmoid(),
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
param_attr=paddle.attr.Param(
...
@@ -147,27 +133,26 @@ def network_conf(hidden_size, embed_size, dict_size, is_train=True):
...
@@ -147,27 +133,26 @@ def network_conf(hidden_size, embed_size, dict_size, is_train=True):
input=hidden_layer,
input=hidden_layer,
label=target_word,
label=target_word,
num_classes=dict_size,
num_classes=dict_size,
param_attr=paddle.attr.Param(name=
'sigmoid_w'
),
param_attr=paddle.attr.Param(name=
"sigmoid_w"
),
bias_attr=paddle.attr.Param(name=
'sigmoid_b'
))
bias_attr=paddle.attr.Param(name=
"sigmoid_b"
))
return cost
return cost
else:
else:
with paddle.layer.mixed(
prediction = paddle.layer.fc(
size=dict_size - 1,
size=dict_size - 1,
act=paddle.activation.Sigmoid(),
input=hidden_layer,
bias_attr=paddle.attr.Param(name='sigmoid_b')) as prediction:
act=paddle.activation.Sigmoid(),
prediction += paddle.layer.trans_full_matrix_projection(
bias_attr=paddle.attr.Param(name="sigmoid_b"),
input=hidden_layer,
param_attr=paddle.attr.Param(name="sigmoid_w"))
param_attr=paddle.attr.Param(name='sigmoid_w'))
return prediction
return prediction
```
```
需要注意,在预测阶段,我们需要对hsigmoid参数做一次转置,这里输出的类别数为词典大小减1,对应非叶节点的数量。
需要注意,在预测阶段,我们需要对hsigmoid参数做一次转置,这里输出的类别数为词典大小减1,对应非叶节点的数量。
## 训练阶段
## 训练阶段
训练比较简单,直接运行``` python
hsigmoid_
train.py ```。程序第一次运行会检测用户缓存文件夹中是否包含imikolov数据集,如果未包含,则自动下载。运行过程中,每100个iteration会打印模型训练信息,主要包含训练损失和测试损失,每个pass会保存一次模型。
训练比较简单,直接运行``` python train.py ```。程序第一次运行会检测用户缓存文件夹中是否包含imikolov数据集,如果未包含,则自动下载。运行过程中,每100个iteration会打印模型训练信息,主要包含训练损失和测试损失,每个pass会保存一次模型。
## 预测阶段
## 预测阶段
预测时,直接运行``` python
hsigmoid_predict
.py ```,程序会首先load模型,然后按照batch方式进行预测,并打印预测结果。预测阶段最重要的就是根据概率得到编码路径,然后遍历路径获取最终的预测类别,这部分逻辑如下:
预测时,直接运行``` python
infer
.py ```,程序会首先load模型,然后按照batch方式进行预测,并打印预测结果。预测阶段最重要的就是根据概率得到编码路径,然后遍历路径获取最终的预测类别,这部分逻辑如下:
```python
```python
def decode_res(infer_res, dict_size):
def decode_res(infer_res, dict_size):
...
...
hsigmoid/
hsigmoid_predict
.py
→
hsigmoid/
infer
.py
浏览文件 @
4097a2cb
#!/usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import
os
import
logging
import
gzip
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
from
hsigmoid_conf
import
network_conf
from
network_conf
import
ngram_lm
import
gzip
logger
=
logging
.
getLogger
(
"paddle"
)
logger
.
setLevel
(
logging
.
WARNING
)
def
decode_res
(
infer_res
,
dict_size
):
def
decode_res
(
infer_res
,
dict_size
):
...
@@ -45,21 +50,20 @@ def predict(batch_ins, idx_word_dict, dict_size, prediction_layer, parameters):
...
@@ -45,21 +50,20 @@ def predict(batch_ins, idx_word_dict, dict_size, prediction_layer, parameters):
# Ouput format: word1 word2 word3 word4 -> predict label
# Ouput format: word1 word2 word3 word4 -> predict label
for
i
,
ins
in
enumerate
(
batch_ins
):
for
i
,
ins
in
enumerate
(
batch_ins
):
print
(
idx_word_dict
[
ins
[
0
]]
+
' '
+
\
print
(
" "
.
join
([
idx_word_dict
[
w
]
idx_word_dict
[
ins
[
1
]]
+
' '
+
\
for
w
in
ins
])
+
" -> "
+
predict_words
[
i
])
idx_word_dict
[
ins
[
2
]]
+
' '
+
\
idx_word_dict
[
ins
[
3
]]
+
' '
+
\
' -> '
+
predict_words
[
i
])
def
main
(
model_path
):
assert
os
.
path
.
exists
(
model_path
),
"trained model does not exist."
def
main
():
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
(
min_word_freq
=
2
)
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
(
min_word_freq
=
2
)
dict_size
=
len
(
word_dict
)
dict_size
=
len
(
word_dict
)
prediction_layer
=
n
etwork_conf
(
prediction_layer
=
n
gram_lm
(
is_train
=
False
,
hidden_size
=
256
,
embed_size
=
32
,
dict_size
=
dict_size
)
is_train
=
False
,
hidden_size
=
256
,
embed_size
=
32
,
dict_size
=
dict_size
)
with
gzip
.
open
(
'./models/model_pass_00000.tar.gz'
)
as
f
:
with
gzip
.
open
(
model_path
,
"r"
)
as
f
:
parameters
=
paddle
.
parameters
.
Parameters
.
from_tar
(
f
)
parameters
=
paddle
.
parameters
.
Parameters
.
from_tar
(
f
)
idx_word_dict
=
dict
((
v
,
k
)
for
k
,
v
in
word_dict
.
items
())
idx_word_dict
=
dict
((
v
,
k
)
for
k
,
v
in
word_dict
.
items
())
...
@@ -79,5 +83,5 @@ def main():
...
@@ -79,5 +83,5 @@ def main():
parameters
)
parameters
)
if
__name__
==
'__main__'
:
if
__name__
==
"__main__"
:
main
()
main
(
"models/hsigmoid_batch_00010.tar.gz"
)
hsigmoid/
hsigmoid
_conf.py
→
hsigmoid/
network
_conf.py
浏览文件 @
4097a2cb
...
@@ -5,32 +5,22 @@ import math
...
@@ -5,32 +5,22 @@ import math
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
def
network_conf
(
hidden_size
,
embed_size
,
dict_size
,
is_train
=
True
):
def
ngram_lm
(
hidden_size
,
embed_size
,
dict_size
,
gram_num
=
4
,
is_train
=
True
):
first_word
=
paddle
.
layer
.
data
(
emb_layers
=
[]
name
=
'firstw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
second_word
=
paddle
.
layer
.
data
(
name
=
'secondw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
third_word
=
paddle
.
layer
.
data
(
name
=
'thirdw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
fourth_word
=
paddle
.
layer
.
data
(
name
=
'fourthw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
target_word
=
paddle
.
layer
.
data
(
name
=
'fifthw'
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
embed_param_attr
=
paddle
.
attr
.
Param
(
embed_param_attr
=
paddle
.
attr
.
Param
(
name
=
"_proj"
,
initial_std
=
0.001
,
learning_rate
=
1
,
l2_rate
=
0
)
name
=
"_proj"
,
initial_std
=
0.001
,
learning_rate
=
1
,
l2_rate
=
0
)
embed_first_word
=
paddle
.
layer
.
embedding
(
for
i
in
range
(
gram_num
):
input
=
first_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
word
=
paddle
.
layer
.
data
(
embed_second_word
=
paddle
.
layer
.
embedding
(
name
=
"__word%02d__"
%
(
i
),
input
=
second_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
embed_third_word
=
paddle
.
layer
.
embedding
(
emb_layers
.
append
(
input
=
third_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
paddle
.
layer
.
embedding
(
embed_fourth_word
=
paddle
.
layer
.
embedding
(
input
=
word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
))
input
=
fourth_word
,
size
=
embed_size
,
param_attr
=
embed_param_attr
)
embed_context
=
paddle
.
layer
.
concat
(
input
=
[
target_word
=
paddle
.
layer
.
data
(
embed_first_word
,
embed_second_word
,
embed_third_word
,
embed_fourth_word
name
=
"__target_word__"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
])
embed_context
=
paddle
.
layer
.
concat
(
input
=
emb_layers
)
hidden_layer
=
paddle
.
layer
.
fc
(
hidden_layer
=
paddle
.
layer
.
fc
(
input
=
embed_context
,
input
=
embed_context
,
...
@@ -46,15 +36,14 @@ def network_conf(hidden_size, embed_size, dict_size, is_train=True):
...
@@ -46,15 +36,14 @@ def network_conf(hidden_size, embed_size, dict_size, is_train=True):
input
=
hidden_layer
,
input
=
hidden_layer
,
label
=
target_word
,
label
=
target_word
,
num_classes
=
dict_size
,
num_classes
=
dict_size
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_w'
),
param_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_w"
),
bias_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_b'
))
bias_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_b"
))
return
cost
return
cost
else
:
else
:
with
paddle
.
layer
.
mixed
(
prediction
=
paddle
.
layer
.
fc
(
size
=
dict_size
-
1
,
size
=
dict_size
-
1
,
act
=
paddle
.
activation
.
Sigmoid
(),
input
=
hidden_layer
,
bias_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_b'
))
as
prediction
:
act
=
paddle
.
activation
.
Sigmoid
(),
prediction
+=
paddle
.
layer
.
trans_full_matrix_projection
(
bias_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_b"
),
input
=
hidden_layer
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
"sigmoid_w"
))
param_attr
=
paddle
.
attr
.
Param
(
name
=
'sigmoid_w'
))
return
prediction
return
prediction
hsigmoid/
hsigmoid_
train.py
→
hsigmoid/train.py
浏览文件 @
4097a2cb
#!/usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import
os
import
logging
import
gzip
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
from
hsigmoid_conf
import
network_conf
from
network_conf
import
ngram_lm
import
gzip
logger
=
logging
.
getLogger
(
"paddle"
)
logger
.
setLevel
(
logging
.
INFO
)
def
main
():
def
main
(
save_dir
=
"models"
):
if
not
os
.
path
.
exists
(
save_dir
):
os
.
mkdir
(
save_dir
)
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
(
min_word_freq
=
2
)
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
(
min_word_freq
=
2
)
dict_size
=
len
(
word_dict
)
dict_size
=
len
(
word_dict
)
cost
=
network_conf
(
cost
=
ngram_lm
(
hidden_size
=
256
,
embed_size
=
32
,
dict_size
=
dict_size
)
is_train
=
True
,
hidden_size
=
256
,
embed_size
=
32
,
dict_size
=
dict_size
)
def
event_handler
(
event
):
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
model_name
=
'./models/model_pass_%05d.tar.gz'
%
event
.
pass_id
model_name
=
os
.
path
.
join
(
save_dir
,
"hsigmoid_pass_%05d.tar.gz"
%
print
(
"Save model into %s ..."
%
model_name
)
event
.
pass_id
)
with
gzip
.
open
(
model_name
,
'w'
)
as
f
:
logger
.
info
(
"Save model into %s ..."
%
model_name
)
with
gzip
.
open
(
model_name
,
"w"
)
as
f
:
parameters
.
to_tar
(
f
)
parameters
.
to_tar
(
f
)
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
10
0
==
0
:
if
event
.
batch_id
and
event
.
batch_id
%
1
0
==
0
:
result
=
trainer
.
test
(
result
=
trainer
.
test
(
paddle
.
batch
(
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
test
(
word_dict
,
5
),
32
))
paddle
.
dataset
.
imikolov
.
test
(
word_dict
,
5
),
32
))
print
(
"Pass %d, Batch %d, Cost %f, Test Cost %f"
%
logger
.
info
(
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
result
.
cost
))
"Pass %d, Batch %d, Cost %f, Test Cost %f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
result
.
cost
))
feeding
=
{
'firstw'
:
0
,
'secondw'
:
1
,
'thirdw'
:
2
,
'fourthw'
:
3
,
'fifthw'
:
4
}
parameters
=
paddle
.
parameters
.
create
(
cost
)
parameters
=
paddle
.
parameters
.
create
(
cost
)
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
...
@@ -48,9 +49,8 @@ def main():
...
@@ -48,9 +49,8 @@ def main():
lambda
:
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
5
)(),
lambda
:
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
5
)(),
buf_size
=
1000
),
64
),
buf_size
=
1000
),
64
),
num_passes
=
30
,
num_passes
=
30
,
event_handler
=
event_handler
,
event_handler
=
event_handler
)
feeding
=
feeding
)
if
__name__
==
'__main__'
:
if
__name__
==
"__main__"
:
main
()
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录