Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
620aa569
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
620aa569
编写于
3月 04, 2017
作者:
H
helinwang
提交者:
GitHub
3月 04, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1474 from helinwang/word2vec
word2vec v2 api
上级
aa2bcf51
97a1c0c7
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
80 addition
and
0 deletion
+80
-0
demo/word2vec/train_v2.py
demo/word2vec/train_v2.py
+80
-0
未找到文件。
demo/word2vec/train_v2.py
0 → 100644
浏览文件 @
620aa569
import
math
import
paddle.v2
as
paddle
dictsize
=
1953
embsize
=
32
hiddensize
=
256
N
=
5
def
wordemb
(
inlayer
):
wordemb
=
paddle
.
layer
.
table_projection
(
input
=
inlayer
,
size
=
embsize
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
"_proj"
,
initial_std
=
0.001
,
learning_rate
=
1
,
l2_rate
=
0
,
))
return
wordemb
def
main
():
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
firstword
=
paddle
.
layer
.
data
(
name
=
"firstw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
secondword
=
paddle
.
layer
.
data
(
name
=
"secondw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
thirdword
=
paddle
.
layer
.
data
(
name
=
"thirdw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
fourthword
=
paddle
.
layer
.
data
(
name
=
"fourthw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
nextword
=
paddle
.
layer
.
data
(
name
=
"fifthw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
Efirst
=
wordemb
(
firstword
)
Esecond
=
wordemb
(
secondword
)
Ethird
=
wordemb
(
thirdword
)
Efourth
=
wordemb
(
fourthword
)
contextemb
=
paddle
.
layer
.
concat
(
input
=
[
Efirst
,
Esecond
,
Ethird
,
Efourth
])
hidden1
=
paddle
.
layer
.
fc
(
input
=
contextemb
,
size
=
hiddensize
,
act
=
paddle
.
activation
.
Sigmoid
(),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
0.5
),
bias_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
2
),
param_attr
=
paddle
.
attr
.
Param
(
initial_std
=
1.
/
math
.
sqrt
(
embsize
*
8
),
learning_rate
=
1
))
predictword
=
paddle
.
layer
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
bias_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
2
),
act
=
paddle
.
activation
.
Softmax
())
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
result
=
trainer
.
test
(
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
test
(
word_dict
,
N
),
32
))
print
"Pass %d, Batch %d, Cost %f, %s, Testing metrics %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
,
result
.
metrics
)
cost
=
paddle
.
layer
.
classification_cost
(
input
=
predictword
,
label
=
nextword
)
parameters
=
paddle
.
parameters
.
create
(
cost
)
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
3e-3
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
8e-4
))
trainer
=
paddle
.
trainer
.
SGD
(
cost
,
parameters
,
adam_optimizer
)
trainer
.
train
(
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
32
),
num_passes
=
30
,
event_handler
=
event_handler
)
if
__name__
==
'__main__'
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录