Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleFL
提交
c9d4647a
P
PaddleFL
项目概览
PaddlePaddle
/
PaddleFL
通知
35
Star
5
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
6
列表
看板
标记
里程碑
合并请求
4
Wiki
3
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleFL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
6
Issue
6
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
3
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c9d4647a
编写于
10月 08, 2019
作者:
Z
zhang wenhui
提交者:
GitHub
10月 08, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2 from frankwhzhang/master
fix gru4rec demo
上级
649894b7
1af51ccd
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
26 addition
and
12 deletion
+26
-12
paddle_fl/core/trainer/fl_trainer.py
paddle_fl/core/trainer/fl_trainer.py
+2
-1
paddle_fl/examples/gru4rec_demo/fl_master.py
paddle_fl/examples/gru4rec_demo/fl_master.py
+5
-6
paddle_fl/examples/gru4rec_demo/fl_trainer.py
paddle_fl/examples/gru4rec_demo/fl_trainer.py
+15
-5
paddle_fl/examples/gru4rec_demo/run.sh
paddle_fl/examples/gru4rec_demo/run.sh
+4
-0
未找到文件。
paddle_fl/core/trainer/fl_trainer.py
浏览文件 @
c9d4647a
...
...
@@ -100,13 +100,14 @@ class FedAvgTrainer(FLTrainer):
self
.
_logger
.
debug
(
"begin to run recv program"
)
self
.
exe
.
run
(
self
.
_recv_program
)
self
.
_logger
.
debug
(
"begin to run current step"
)
self
.
exe
.
run
(
self
.
_main_program
,
loss
=
self
.
exe
.
run
(
self
.
_main_program
,
feed
=
feed
,
fetch_list
=
fetch
)
if
self
.
cur_step
%
self
.
_step
==
0
:
self
.
_logger
.
debug
(
"begin to run send program"
)
self
.
exe
.
run
(
self
.
_send_program
)
self
.
cur_step
+=
1
return
loss
def
stop
(
self
):
return
False
...
...
paddle_fl/examples/gru4rec_demo/fl_master.py
浏览文件 @
c9d4647a
...
...
@@ -9,7 +9,7 @@ class Model(object):
def
gru4rec_network
(
self
,
vocab_size
=
37483
,
hid_size
=
10
,
hid_size
=
10
0
,
init_low_bound
=-
0.04
,
init_high_bound
=
0.04
):
""" network definition """
...
...
@@ -29,7 +29,6 @@ class Model(object):
initializer
=
fluid
.
initializer
.
Uniform
(
low
=
init_low_bound
,
high
=
init_high_bound
),
learning_rate
=
emb_lr_x
),
#is_distributed=True,
is_sparse
=
False
)
fc0
=
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
hid_size
*
3
,
...
...
@@ -54,7 +53,7 @@ class Model(object):
learning_rate
=
fc_lr_x
))
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
fc
,
label
=
self
.
dst_wordseq
)
acc
=
fluid
.
layers
.
accuracy
(
self
.
acc
=
fluid
.
layers
.
accuracy
(
input
=
self
.
fc
,
label
=
self
.
dst_wordseq
,
k
=
20
)
self
.
loss
=
fluid
.
layers
.
mean
(
x
=
cost
)
self
.
startup_program
=
fluid
.
default_startup_program
()
...
...
@@ -70,11 +69,11 @@ job_generator.set_optimizer(optimizer)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
model
.
src_wordseq
.
name
,
model
.
dst_wordseq
.
name
],
[
model
.
f
c
.
name
])
[
model
.
src_wordseq
.
name
,
model
.
dst_wordseq
.
name
],
[
model
.
loss
.
name
,
model
.
ac
c
.
name
])
build_strategy
=
FLStrategyFactory
()
build_strategy
.
fed_avg
=
True
build_strategy
.
inner_step
=
1
0
build_strategy
.
inner_step
=
1
strategy
=
build_strategy
.
create_fl_strategy
()
# endpoints will be collected through the cluster
...
...
@@ -82,5 +81,5 @@ strategy = build_strategy.create_fl_strategy()
endpoints
=
[
"127.0.0.1:8181"
]
output
=
"fl_job_config"
job_generator
.
generate_fl_job
(
strategy
,
server_endpoints
=
endpoints
,
worker_num
=
2
,
output
=
output
)
strategy
,
server_endpoints
=
endpoints
,
worker_num
=
4
,
output
=
output
)
# fl_job_config will be dispatched to workers
paddle_fl/examples/gru4rec_demo/fl_trainer.py
浏览文件 @
c9d4647a
...
...
@@ -10,7 +10,7 @@ logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(nam
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
place
=
fluid
.
CPUPlace
()
train_file_dir
=
"mid_data/node
1/0/"
train_file_dir
=
"mid_data/node
4/%d/"
%
trainer_id
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_trainer_job
(
job_path
,
trainer_id
)
...
...
@@ -18,13 +18,23 @@ trainer = FLTrainerFactory().create_fl_trainer(job)
trainer
.
start
()
r
=
Gru4rec_Reader
()
train_reader
=
r
.
reader
(
train_file_dir
,
place
)
train_reader
=
r
.
reader
(
train_file_dir
,
place
,
batch_size
=
125
)
output_folder
=
"model_node4"
step_i
=
0
while
not
trainer
.
stop
():
step_i
+=
1
print
(
"batch %d start train"
%
(
step_i
))
for
data
in
train_reader
():
print
(
data
)
trainer
.
run
(
feed
=
data
,
fetch
=
[])
#print(np.array(data['src_wordseq']))
ret_avg_cost
=
trainer
.
run
(
feed
=
data
,
fetch
=
[
"mean_0.tmp_0"
])
avg_ppl
=
np
.
exp
(
ret_avg_cost
[
0
])
newest_ppl
=
np
.
mean
(
avg_ppl
)
print
(
"ppl:%.3f"
%
(
newest_ppl
))
save_dir
=
(
output_folder
+
"/epoch_%d"
)
%
step_i
if
trainer_id
==
0
:
print
(
"start save"
)
trainer
.
save_inference_program
(
save_dir
)
if
step_i
>=
40
:
break
paddle_fl/examples/gru4rec_demo/run.sh
浏览文件 @
c9d4647a
...
...
@@ -7,3 +7,7 @@ sleep 2
python
-u
fl_trainer.py 0
>
trainer0.log &
sleep
2
python
-u
fl_trainer.py 1
>
trainer1.log &
sleep
2
python
-u
fl_trainer.py 2
>
trainer2.log &
sleep
2
python
-u
fl_trainer.py 3
>
trainer3.log &
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录