Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4d4322a6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4d4322a6
编写于
2月 27, 2018
作者:
武
武毅
提交者:
GitHub
2月 27, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
merge fluid dist tests (#8573)
* merge fluid dist tests * update cmake
上级
ec338326
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
495 addition
and
1682 deletion
+495
-1682
python/paddle/fluid/tests/CMakeLists.txt
python/paddle/fluid/tests/CMakeLists.txt
+0
-1
python/paddle/fluid/tests/book/test_fit_a_line.py
python/paddle/fluid/tests/book/test_fit_a_line.py
+54
-23
python/paddle/fluid/tests/book/test_image_classification.py
python/paddle/fluid/tests/book/test_image_classification.py
+67
-36
python/paddle/fluid/tests/book/test_label_semantic_roles.py
python/paddle/fluid/tests/book/test_label_semantic_roles.py
+77
-43
python/paddle/fluid/tests/book/test_machine_translation.py
python/paddle/fluid/tests/book/test_machine_translation.py
+54
-23
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+73
-57
python/paddle/fluid/tests/book/test_recommender_system.py
python/paddle/fluid/tests/book/test_recommender_system.py
+66
-33
python/paddle/fluid/tests/book/test_understand_sentiment.py
python/paddle/fluid/tests/book/test_understand_sentiment.py
+55
-19
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+49
-19
python/paddle/fluid/tests/book_distribute/CMakeLists.txt
python/paddle/fluid/tests/book_distribute/CMakeLists.txt
+0
-5
python/paddle/fluid/tests/book_distribute/notest_dist_fit_a_line.py
...dle/fluid/tests/book_distribute/notest_dist_fit_a_line.py
+0
-76
python/paddle/fluid/tests/book_distribute/notest_dist_image_classification.py
...tests/book_distribute/notest_dist_image_classification.py
+0
-171
python/paddle/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
...tests/book_distribute/notest_dist_label_semantic_roles.py
+0
-241
python/paddle/fluid/tests/book_distribute/notest_dist_word2vec.py
...addle/fluid/tests/book_distribute/notest_dist_word2vec.py
+0
-113
python/paddle/fluid/tests/book_distribute/notest_machine_translation.py
...fluid/tests/book_distribute/notest_machine_translation.py
+0
-158
python/paddle/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
...ests/book_distribute/notest_recognize_digits_conv_dist.py
+0
-96
python/paddle/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py
...tests/book_distribute/notest_recognize_digits_mlp_dist.py
+0
-89
python/paddle/fluid/tests/book_distribute/notest_recommender_system_dist.py
...d/tests/book_distribute/notest_recommender_system_dist.py
+0
-217
python/paddle/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
.../book_distribute/notest_understand_sentiment_conv_dist.py
+0
-126
python/paddle/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py
...ok_distribute/notest_understand_sentiment_dynamic_lstm.py
+0
-136
未找到文件。
python/paddle/fluid/tests/CMakeLists.txt
浏览文件 @
4d4322a6
...
@@ -7,5 +7,4 @@ endforeach()
...
@@ -7,5 +7,4 @@ endforeach()
add_subdirectory
(
unittests
)
add_subdirectory
(
unittests
)
add_subdirectory
(
book
)
add_subdirectory
(
book
)
add_subdirectory
(
book_distribute
)
add_subdirectory
(
book_memory_optimization
)
add_subdirectory
(
book_memory_optimization
)
python/paddle/fluid/tests/book/test_fit_a_line.py
浏览文件 @
4d4322a6
...
@@ -19,9 +19,10 @@ import numpy
...
@@ -19,9 +19,10 @@ import numpy
import
unittest
import
unittest
import
math
import
math
import
sys
import
sys
import
os
def
train
(
use_cuda
,
save_dirname
):
def
train
(
use_cuda
,
save_dirname
,
is_local
):
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
...
@@ -32,7 +33,7 @@ def train(use_cuda, save_dirname):
...
@@ -32,7 +33,7 @@ def train(use_cuda, save_dirname):
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_cost
)
optimize_ops
,
params_grads
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
20
BATCH_SIZE
=
20
...
@@ -42,27 +43,57 @@ def train(use_cuda, save_dirname):
...
@@ -42,27 +43,57 @@ def train(use_cuda, save_dirname):
batch_size
=
BATCH_SIZE
)
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
def
train_loop
(
main_program
):
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
PASS_NUM
=
100
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
PASS_NUM
=
100
avg_loss_value
,
=
exe
.
run
(
fluid
.
default_main_program
(),
for
pass_id
in
range
(
PASS_NUM
):
feed
=
feeder
.
feed
(
data
),
for
data
in
train_reader
():
fetch_list
=
[
avg_cost
])
avg_loss_value
,
=
exe
.
run
(
main_program
,
print
(
avg_loss_value
)
feed
=
feeder
.
feed
(
data
),
if
avg_loss_value
[
0
]
<
10.0
:
fetch_list
=
[
avg_cost
])
if
save_dirname
is
not
None
:
print
(
avg_loss_value
)
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
'x'
],
if
avg_loss_value
[
0
]
<
10.0
:
[
y_predict
],
exe
)
if
save_dirname
is
not
None
:
return
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
'x'
],
if
math
.
isnan
(
float
(
avg_loss_value
)):
[
y_predict
],
exe
)
sys
.
exit
(
"got NaN loss, training failed."
)
return
raise
AssertionError
(
"Fit a line cost is too large, {0:2.2}"
.
format
(
if
math
.
isnan
(
float
(
avg_loss_value
)):
avg_loss_value
[
0
]))
sys
.
exit
(
"got NaN loss, training failed."
)
raise
AssertionError
(
"Fit a line cost is too large, {0:2.2}"
.
format
(
avg_loss_value
[
0
]))
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
use_cuda
,
save_dirname
=
None
):
...
@@ -94,14 +125,14 @@ def infer(use_cuda, save_dirname=None):
...
@@ -94,14 +125,14 @@ def infer(use_cuda, save_dirname=None):
print
(
"infer results: "
,
results
[
0
])
print
(
"infer results: "
,
results
[
0
])
def
main
(
use_cuda
):
def
main
(
use_cuda
,
is_local
=
True
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
return
# Directory for saving the trained model
# Directory for saving the trained model
save_dirname
=
"fit_a_line.inference.model"
save_dirname
=
"fit_a_line.inference.model"
train
(
use_cuda
,
save_dirname
)
train
(
use_cuda
,
save_dirname
,
is_local
)
infer
(
use_cuda
,
save_dirname
)
infer
(
use_cuda
,
save_dirname
)
...
...
python/paddle/fluid/tests/book/test_image_classification.py
浏览文件 @
4d4322a6
...
@@ -21,6 +21,7 @@ import math
...
@@ -21,6 +21,7 @@ import math
import
sys
import
sys
import
numpy
import
numpy
import
unittest
import
unittest
import
os
def
resnet_cifar10
(
input
,
depth
=
32
):
def
resnet_cifar10
(
input
,
depth
=
32
):
...
@@ -92,7 +93,7 @@ def vgg16_bn_drop(input):
...
@@ -92,7 +93,7 @@ def vgg16_bn_drop(input):
return
fc2
return
fc2
def
train
(
net_type
,
use_cuda
,
save_dirname
):
def
train
(
net_type
,
use_cuda
,
save_dirname
,
is_local
):
classdim
=
10
classdim
=
10
data_shape
=
[
3
,
32
,
32
]
data_shape
=
[
3
,
32
,
32
]
...
@@ -117,7 +118,7 @@ def train(net_type, use_cuda, save_dirname):
...
@@ -117,7 +118,7 @@ def train(net_type, use_cuda, save_dirname):
test_program
=
fluid
.
default_main_program
().
clone
()
test_program
=
fluid
.
default_main_program
().
clone
()
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
optimizer
.
minimize
(
avg_cost
)
optimize
_ops
,
params_grads
=
optimize
r
.
minimize
(
avg_cost
)
BATCH_SIZE
=
128
BATCH_SIZE
=
128
PASS_NUM
=
1
PASS_NUM
=
1
...
@@ -133,38 +134,68 @@ def train(net_type, use_cuda, save_dirname):
...
@@ -133,38 +134,68 @@ def train(net_type, use_cuda, save_dirname):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
images
,
label
])
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
images
,
label
])
exe
.
run
(
fluid
.
default_startup_program
())
def
train_loop
(
main_program
):
loss
=
0.0
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
loss
=
0.0
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
pass_id
in
range
(
PASS_NUM
):
exe
.
run
(
feed
=
feeder
.
feed
(
data
))
for
batch_id
,
data
in
enumerate
(
train_reader
()):
exe
.
run
(
main_program
,
feed
=
feeder
.
feed
(
data
))
if
(
batch_id
%
10
)
==
0
:
acc_list
=
[]
if
(
batch_id
%
10
)
==
0
:
avg_loss_list
=
[]
acc_list
=
[]
for
tid
,
test_data
in
enumerate
(
test_reader
()):
avg_loss_list
=
[]
loss_t
,
acc_t
=
exe
.
run
(
program
=
test_program
,
for
tid
,
test_data
in
enumerate
(
test_reader
()):
feed
=
feeder
.
feed
(
test_data
),
loss_t
,
acc_t
=
exe
.
run
(
program
=
test_program
,
fetch_list
=
[
avg_cost
,
acc
])
feed
=
feeder
.
feed
(
test_data
),
if
math
.
isnan
(
float
(
loss_t
)):
fetch_list
=
[
avg_cost
,
acc
])
sys
.
exit
(
"got NaN loss, training failed."
)
if
math
.
isnan
(
float
(
loss_t
)):
acc_list
.
append
(
float
(
acc_t
))
sys
.
exit
(
"got NaN loss, training failed."
)
avg_loss_list
.
append
(
float
(
loss_t
))
acc_list
.
append
(
float
(
acc_t
))
break
# Use 1 segment for speeding up CI
avg_loss_list
.
append
(
float
(
loss_t
))
break
# Use 1 segment for speeding up CI
acc_value
=
numpy
.
array
(
acc_list
).
mean
()
avg_loss_value
=
numpy
.
array
(
avg_loss_list
).
mean
()
acc_value
=
numpy
.
array
(
acc_list
).
mean
()
avg_loss_value
=
numpy
.
array
(
avg_loss_list
).
mean
()
print
(
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'
.
print
(
format
(
pass_id
,
batch_id
+
1
,
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'
.
float
(
avg_loss_value
),
float
(
acc_value
)))
format
(
pass_id
,
batch_id
+
1
,
float
(
avg_loss_value
),
float
(
acc_value
)))
if
acc_value
>
0.01
:
# Low threshold for speeding up CI
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
"pixel"
],
if
acc_value
>
0.01
:
# Low threshold for speeding up CI
[
predict
],
exe
)
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
"pixel"
],
return
[
predict
],
exe
)
return
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
use_cuda
,
save_dirname
=
None
):
...
@@ -196,14 +227,14 @@ def infer(use_cuda, save_dirname=None):
...
@@ -196,14 +227,14 @@ def infer(use_cuda, save_dirname=None):
print
(
"infer results: "
,
results
[
0
])
print
(
"infer results: "
,
results
[
0
])
def
main
(
net_type
,
use_cuda
):
def
main
(
net_type
,
use_cuda
,
is_local
=
True
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
return
# Directory for saving the trained model
# Directory for saving the trained model
save_dirname
=
"image_classification_"
+
net_type
+
".inference.model"
save_dirname
=
"image_classification_"
+
net_type
+
".inference.model"
train
(
net_type
,
use_cuda
,
save_dirname
)
train
(
net_type
,
use_cuda
,
save_dirname
,
is_local
)
infer
(
use_cuda
,
save_dirname
)
infer
(
use_cuda
,
save_dirname
)
...
...
python/paddle/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
4d4322a6
...
@@ -22,6 +22,7 @@ from paddle.fluid.initializer import init_on_cpu
...
@@ -22,6 +22,7 @@ from paddle.fluid.initializer import init_on_cpu
import
contextlib
import
contextlib
import
time
import
time
import
unittest
import
unittest
import
os
word_dict
,
verb_dict
,
label_dict
=
conll05
.
get_dict
()
word_dict
,
verb_dict
,
label_dict
=
conll05
.
get_dict
()
word_dict_len
=
len
(
word_dict
)
word_dict_len
=
len
(
word_dict
)
...
@@ -138,7 +139,7 @@ def create_random_lodtensor(lod, place, low, high):
...
@@ -138,7 +139,7 @@ def create_random_lodtensor(lod, place, low, high):
return
res
return
res
def
train
(
use_cuda
,
save_dirname
=
None
):
def
train
(
use_cuda
,
save_dirname
=
None
,
is_local
=
True
):
# define network topology
# define network topology
word
=
fluid
.
layers
.
data
(
word
=
fluid
.
layers
.
data
(
name
=
'word_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
name
=
'word_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
...
@@ -178,7 +179,7 @@ def train(use_cuda, save_dirname=None):
...
@@ -178,7 +179,7 @@ def train(use_cuda, save_dirname=None):
decay_rate
=
0.5
,
decay_rate
=
0.5
,
staircase
=
True
),
staircase
=
True
),
global_step
=
global_step
)
global_step
=
global_step
)
sgd_optimizer
.
minimize
(
avg_cost
)
optimize_ops
,
params_grads
=
sgd_optimizer
.
minimize
(
avg_cost
)
# TODO(qiao)
# TODO(qiao)
# add dependency track and move this config before optimizer
# add dependency track and move this config before optimizer
...
@@ -204,45 +205,78 @@ def train(use_cuda, save_dirname=None):
...
@@ -204,45 +205,78 @@ def train(use_cuda, save_dirname=None):
place
=
place
)
place
=
place
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
def
train_loop
(
main_program
):
exe
.
run
(
fluid
.
default_startup_program
())
embedding_param
=
fluid
.
global_scope
().
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
embedding_param
=
fluid
.
global_scope
().
find_var
(
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
place
)
embedding_name
).
get_tensor
()
embedding_param
.
set
(
start_time
=
time
.
time
()
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
batch_id
=
0
place
)
for
pass_id
in
xrange
(
PASS_NUM
):
chunk_evaluator
.
reset
(
exe
)
start_time
=
time
.
time
()
for
data
in
train_data
():
batch_id
=
0
cost
,
precision
,
recall
,
f1_score
=
exe
.
run
(
for
pass_id
in
xrange
(
PASS_NUM
):
fluid
.
default_main_program
(),
chunk_evaluator
.
reset
(
exe
)
feed
=
feeder
.
feed
(
data
),
for
data
in
train_data
():
fetch_list
=
[
avg_cost
]
+
chunk_evaluator
.
metrics
)
cost
,
precision
,
recall
,
f1_score
=
exe
.
run
(
pass_precision
,
pass_recall
,
pass_f1_score
=
chunk_evaluator
.
eval
(
main_program
,
exe
)
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
chunk_evaluator
.
metrics
)
if
batch_id
%
10
==
0
:
pass_precision
,
pass_recall
,
pass_f1_score
=
chunk_evaluator
.
eval
(
print
(
"avg_cost:"
+
str
(
cost
)
+
" precision:"
+
str
(
exe
)
precision
)
+
" recall:"
+
str
(
recall
)
+
" f1_score:"
+
str
(
f1_score
)
+
" pass_precision:"
+
str
(
if
batch_id
%
10
==
0
:
pass_precision
)
+
" pass_recall:"
+
str
(
pass_recall
)
print
(
"avg_cost:"
+
str
(
cost
)
+
" precision:"
+
str
(
+
" pass_f1_score:"
+
str
(
pass_f1_score
))
precision
)
+
" recall:"
+
str
(
recall
)
+
" f1_score:"
+
if
batch_id
!=
0
:
str
(
f1_score
)
+
" pass_precision:"
+
str
(
print
(
"second per batch: "
+
str
((
time
.
time
()
-
start_time
)
pass_precision
)
+
" pass_recall:"
+
str
(
/
batch_id
))
pass_recall
)
+
" pass_f1_score:"
+
str
(
# Set the threshold low to speed up the CI test
pass_f1_score
))
if
float
(
pass_precision
)
>
0.05
:
if
batch_id
!=
0
:
if
save_dirname
is
not
None
:
print
(
"second per batch: "
+
str
((
time
.
time
(
# TODO(liuyiqun): Change the target to crf_decode
)
-
start_time
)
/
batch_id
))
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
# Set the threshold low to speed up the CI test
'word_data'
,
'verb_data'
,
'ctx_n2_data'
,
if
float
(
pass_precision
)
>
0.05
:
'ctx_n1_data'
,
'ctx_0_data'
,
'ctx_p1_data'
,
if
save_dirname
is
not
None
:
'ctx_p2_data'
,
'mark_data'
# TODO(liuyiqun): Change the target to crf_decode
],
[
feature_out
],
exe
)
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
return
'word_data'
,
'verb_data'
,
'ctx_n2_data'
,
'ctx_n1_data'
,
'ctx_0_data'
,
'ctx_p1_data'
,
batch_id
=
batch_id
+
1
'ctx_p2_data'
,
'mark_data'
],
[
feature_out
],
exe
)
return
batch_id
=
batch_id
+
1
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
use_cuda
,
save_dirname
=
None
):
...
@@ -308,14 +342,14 @@ def infer(use_cuda, save_dirname=None):
...
@@ -308,14 +342,14 @@ def infer(use_cuda, save_dirname=None):
print
(
"Inference Shape: "
,
np_data
.
shape
)
print
(
"Inference Shape: "
,
np_data
.
shape
)
def
main
(
use_cuda
):
def
main
(
use_cuda
,
is_local
=
True
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
return
# Directory for saving the trained model
# Directory for saving the trained model
save_dirname
=
"label_semantic_roles.inference.model"
save_dirname
=
"label_semantic_roles.inference.model"
train
(
use_cuda
,
save_dirname
)
train
(
use_cuda
,
save_dirname
,
is_local
)
infer
(
use_cuda
,
save_dirname
)
infer
(
use_cuda
,
save_dirname
)
...
...
python/paddle/fluid/tests/book/test_machine_translation.py
浏览文件 @
4d4322a6
...
@@ -20,6 +20,7 @@ import paddle.fluid.framework as framework
...
@@ -20,6 +20,7 @@ import paddle.fluid.framework as framework
import
paddle.fluid.layers
as
pd
import
paddle.fluid.layers
as
pd
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
import
unittest
import
unittest
import
os
dict_size
=
30000
dict_size
=
30000
source_dict_dim
=
target_dict_dim
=
dict_size
source_dict_dim
=
target_dict_dim
=
dict_size
...
@@ -168,7 +169,7 @@ def to_lodtensor(data, place):
...
@@ -168,7 +169,7 @@ def to_lodtensor(data, place):
return
res
return
res
def
train_main
(
use_cuda
,
is_sparse
):
def
train_main
(
use_cuda
,
is_sparse
,
is_local
=
True
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
return
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
...
@@ -181,7 +182,7 @@ def train_main(use_cuda, is_sparse):
...
@@ -181,7 +182,7 @@ def train_main(use_cuda, is_sparse):
avg_cost
=
pd
.
mean
(
cost
)
avg_cost
=
pd
.
mean
(
cost
)
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
1e-4
)
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
1e-4
)
optimizer
.
minimize
(
avg_cost
)
optimize
_ops
,
params_grads
=
optimize
r
.
minimize
(
avg_cost
)
train_data
=
paddle
.
batch
(
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -190,27 +191,57 @@ def train_main(use_cuda, is_sparse):
...
@@ -190,27 +191,57 @@ def train_main(use_cuda, is_sparse):
exe
=
Executor
(
place
)
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
def
train_loop
(
main_program
):
exe
.
run
(
framework
.
default_startup_program
())
batch_id
=
0
for
pass_id
in
xrange
(
1
):
batch_id
=
0
for
data
in
train_data
():
for
pass_id
in
xrange
(
1
):
word_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
for
data
in
train_data
():
trg_word
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)
word_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
trg_word_next
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)
trg_word
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)
outs
=
exe
.
run
(
framework
.
default_main_program
(),
trg_word_next
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)
feed
=
{
outs
=
exe
.
run
(
main_program
,
'src_word_id'
:
word_data
,
feed
=
{
'target_language_word'
:
trg_word
,
'src_word_id'
:
word_data
,
'target_language_next_word'
:
trg_word_next
'target_language_word'
:
trg_word
,
},
'target_language_next_word'
:
trg_word_next
fetch_list
=
[
avg_cost
])
},
avg_cost_val
=
np
.
array
(
outs
[
0
])
fetch_list
=
[
avg_cost
])
print
(
'pass_id='
+
str
(
pass_id
)
+
' batch='
+
str
(
batch_id
)
+
avg_cost_val
=
np
.
array
(
outs
[
0
])
" avg_cost="
+
str
(
avg_cost_val
))
print
(
'pass_id='
+
str
(
pass_id
)
+
' batch='
+
str
(
batch_id
)
+
if
batch_id
>
3
:
" avg_cost="
+
str
(
avg_cost_val
))
break
if
batch_id
>
3
:
batch_id
+=
1
break
batch_id
+=
1
if
is_local
:
train_loop
(
framework
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
decode_main
(
use_cuda
,
is_sparse
):
def
decode_main
(
use_cuda
,
is_sparse
):
...
...
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
4d4322a6
...
@@ -20,27 +20,7 @@ import numpy
...
@@ -20,27 +20,7 @@ import numpy
import
unittest
import
unittest
import
math
import
math
import
sys
import
sys
import
os
def
parse_arg
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"nn_type"
,
help
=
"The neural network type, in ['mlp', 'conv']"
,
type
=
str
,
choices
=
[
'mlp'
,
'conv'
])
parser
.
add_argument
(
"--parallel"
,
help
=
'Run in parallel or not'
,
default
=
False
,
action
=
"store_true"
)
parser
.
add_argument
(
"--use_cuda"
,
help
=
"Run the program by using CUDA"
,
default
=
False
,
action
=
"store_true"
)
return
parser
.
parse_args
()
BATCH_SIZE
=
64
BATCH_SIZE
=
64
...
@@ -83,7 +63,8 @@ def train(nn_type,
...
@@ -83,7 +63,8 @@ def train(nn_type,
parallel
,
parallel
,
save_dirname
=
None
,
save_dirname
=
None
,
model_filename
=
None
,
model_filename
=
None
,
params_filename
=
None
):
params_filename
=
None
,
is_local
=
True
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
return
img
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
img
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
...
@@ -114,12 +95,11 @@ def train(nn_type,
...
@@ -114,12 +95,11 @@ def train(nn_type,
test_program
=
fluid
.
default_main_program
().
clone
()
test_program
=
fluid
.
default_main_program
().
clone
()
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
optimizer
.
minimize
(
avg_loss
)
optimize
_ops
,
params_grads
=
optimize
r
.
minimize
(
avg_loss
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -129,39 +109,74 @@ def train(nn_type,
...
@@ -129,39 +109,74 @@ def train(nn_type,
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
BATCH_SIZE
)
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
BATCH_SIZE
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
place
)
PASS_NUM
=
100
def
train_loop
(
main_program
):
for
pass_id
in
range
(
PASS_NUM
):
exe
.
run
(
fluid
.
default_startup_program
())
for
batch_id
,
data
in
enumerate
(
train_reader
()):
# train a mini-batch, fetch nothing
PASS_NUM
=
100
exe
.
run
(
feed
=
feeder
.
feed
(
data
))
for
pass_id
in
range
(
PASS_NUM
):
if
(
batch_id
+
1
)
%
10
==
0
:
for
batch_id
,
data
in
enumerate
(
train_reader
()):
acc_set
=
[]
# train a mini-batch, fetch nothing
avg_loss_set
=
[]
exe
.
run
(
main_program
,
feed
=
feeder
.
feed
(
data
))
for
test_data
in
test_reader
():
if
(
batch_id
+
1
)
%
10
==
0
:
acc_np
,
avg_loss_np
=
exe
.
run
(
program
=
test_program
,
acc_set
=
[]
feed
=
feeder
.
feed
(
test_data
),
avg_loss_set
=
[]
fetch_list
=
[
acc
,
avg_loss
])
for
test_data
in
test_reader
():
acc_set
.
append
(
float
(
acc_np
))
acc_np
,
avg_loss_np
=
exe
.
run
(
avg_loss_set
.
append
(
float
(
avg_loss_np
))
program
=
test_program
,
# get test acc and loss
feed
=
feeder
.
feed
(
test_data
),
acc_val
=
numpy
.
array
(
acc_set
).
mean
()
fetch_list
=
[
acc
,
avg_loss
])
avg_loss_val
=
numpy
.
array
(
avg_loss_set
).
mean
()
acc_set
.
append
(
float
(
acc_np
))
if
float
(
acc_val
)
>
0.2
:
# Smaller value to increase CI speed
avg_loss_set
.
append
(
float
(
avg_loss_np
))
if
save_dirname
is
not
None
:
# get test acc and loss
fluid
.
io
.
save_inference_model
(
acc_val
=
numpy
.
array
(
acc_set
).
mean
()
save_dirname
,
[
"img"
],
[
prediction
],
avg_loss_val
=
numpy
.
array
(
avg_loss_set
).
mean
()
exe
,
if
float
(
acc_val
model_filename
=
model_filename
,
)
>
0.2
:
# Smaller value to increase CI speed
params_filename
=
params_filename
)
if
save_dirname
is
not
None
:
return
fluid
.
io
.
save_inference_model
(
else
:
save_dirname
,
[
"img"
],
[
prediction
],
print
(
exe
,
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'
.
model_filename
=
model_filename
,
format
(
pass_id
,
batch_id
+
1
,
params_filename
=
params_filename
)
float
(
avg_loss_val
),
float
(
acc_val
)))
return
if
math
.
isnan
(
float
(
avg_loss_val
)):
else
:
sys
.
exit
(
"got NaN loss, training failed."
)
print
(
raise
AssertionError
(
"Loss of recognize digits is too large"
)
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'
.
format
(
pass_id
,
batch_id
+
1
,
float
(
avg_loss_val
),
float
(
acc_val
)))
if
math
.
isnan
(
float
(
avg_loss_val
)):
sys
.
exit
(
"got NaN loss, training failed."
)
raise
AssertionError
(
"Loss of recognize digits is too large"
)
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
def
infer
(
use_cuda
,
...
@@ -208,6 +223,7 @@ def main(use_cuda, parallel, nn_type, combine):
...
@@ -208,6 +223,7 @@ def main(use_cuda, parallel, nn_type, combine):
model_filename
=
"__model_combined__"
model_filename
=
"__model_combined__"
params_filename
=
"__params_combined__"
params_filename
=
"__params_combined__"
# call train() with is_local argument to run distributed train
train
(
train
(
nn_type
=
nn_type
,
nn_type
=
nn_type
,
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
...
...
python/paddle/fluid/tests/book/test_recommender_system.py
浏览文件 @
4d4322a6
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
import
math
import
math
import
sys
import
sys
import
os
import
numpy
as
np
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
@@ -152,19 +153,18 @@ def model():
...
@@ -152,19 +153,18 @@ def model():
return
scale_infer
,
avg_cost
return
scale_infer
,
avg_cost
def
train
(
use_cuda
,
save_dirname
):
def
train
(
use_cuda
,
save_dirname
,
is_local
=
True
):
scale_infer
,
avg_cost
=
model
()
scale_infer
,
avg_cost
=
model
()
# test program
# test program
test_program
=
fluid
.
default_main_program
().
clone
()
test_program
=
fluid
.
default_main_program
().
clone
()
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.2
)
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.2
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
opt
imize_ops
,
params_grad
s
=
sgd_optimizer
.
minimize
(
avg_cost
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -212,36 +212,69 @@ def train(use_cuda, save_dirname):
...
@@ -212,36 +212,69 @@ def train(use_cuda, save_dirname):
feed_tensors
[
key
]
=
tensor
feed_tensors
[
key
]
=
tensor
return
feed_tensors
return
feed_tensors
PASS_NUM
=
100
def
train_loop
(
main_program
):
for
pass_id
in
range
(
PASS_NUM
):
exe
.
run
(
framework
.
default_startup_program
())
for
batch_id
,
data
in
enumerate
(
train_reader
()):
# train a mini-batch
PASS_NUM
=
100
outs
=
exe
.
run
(
program
=
fluid
.
default_main_program
(),
for
pass_id
in
range
(
PASS_NUM
):
feed
=
func_feed
(
feeding
,
data
),
for
batch_id
,
data
in
enumerate
(
train_reader
()):
fetch_list
=
[
avg_cost
])
# train a mini-batch
out
=
np
.
array
(
outs
[
0
])
outs
=
exe
.
run
(
program
=
main_program
,
if
(
batch_id
+
1
)
%
10
==
0
:
feed
=
func_feed
(
feeding
,
data
),
avg_cost_set
=
[]
fetch_list
=
[
avg_cost
])
for
test_data
in
test_reader
():
out
=
np
.
array
(
outs
[
0
])
avg_cost_np
=
exe
.
run
(
program
=
test_program
,
if
(
batch_id
+
1
)
%
10
==
0
:
feed
=
func_feed
(
feeding
,
test_data
),
avg_cost_set
=
[]
fetch_list
=
[
avg_cost
])
for
test_data
in
test_reader
():
avg_cost_set
.
append
(
avg_cost_np
[
0
])
avg_cost_np
=
exe
.
run
(
break
# test only 1 segment for speeding up CI
program
=
test_program
,
feed
=
func_feed
(
feeding
,
test_data
),
# get test avg_cost
fetch_list
=
[
avg_cost
])
test_avg_cost
=
np
.
array
(
avg_cost_set
).
mean
()
avg_cost_set
.
append
(
avg_cost_np
[
0
])
if
test_avg_cost
<
6.0
:
break
# test only 1 segment for speeding up CI
# if avg_cost less than 6.0, we think our code is good.
if
save_dirname
is
not
None
:
# get test avg_cost
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
test_avg_cost
=
np
.
array
(
avg_cost_set
).
mean
()
"user_id"
,
"gender_id"
,
"age_id"
,
"job_id"
,
if
test_avg_cost
<
6.0
:
"movie_id"
,
"category_id"
,
"movie_title"
# if avg_cost less than 6.0, we think our code is good.
],
[
scale_infer
],
exe
)
if
save_dirname
is
not
None
:
return
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
"user_id"
,
"gender_id"
,
"age_id"
,
"job_id"
,
if
math
.
isnan
(
float
(
out
[
0
])):
"movie_id"
,
"category_id"
,
"movie_title"
sys
.
exit
(
"got NaN loss, training failed."
)
],
[
scale_infer
],
exe
)
return
if
math
.
isnan
(
float
(
out
[
0
])):
sys
.
exit
(
"got NaN loss, training failed."
)
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
use_cuda
,
save_dirname
=
None
):
...
...
python/paddle/fluid/tests/book/test_understand_sentiment.py
浏览文件 @
4d4322a6
...
@@ -20,6 +20,7 @@ import contextlib
...
@@ -20,6 +20,7 @@ import contextlib
import
math
import
math
import
numpy
as
np
import
numpy
as
np
import
sys
import
sys
import
os
def
convolution_net
(
data
,
label
,
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
def
convolution_net
(
data
,
label
,
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
...
@@ -132,7 +133,12 @@ def create_random_lodtensor(lod, place, low, high):
...
@@ -132,7 +133,12 @@ def create_random_lodtensor(lod, place, low, high):
return
res
return
res
def
train
(
word_dict
,
net_method
,
use_cuda
,
parallel
=
False
,
save_dirname
=
None
):
def
train
(
word_dict
,
net_method
,
use_cuda
,
parallel
=
False
,
save_dirname
=
None
,
is_local
=
True
):
BATCH_SIZE
=
128
BATCH_SIZE
=
128
PASS_NUM
=
5
PASS_NUM
=
5
dict_dim
=
len
(
word_dict
)
dict_dim
=
len
(
word_dict
)
...
@@ -164,7 +170,7 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
...
@@ -164,7 +170,7 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
assert
save_dirname
is
None
assert
save_dirname
is
None
adagrad
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.002
)
adagrad
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.002
)
adagrad
.
minimize
(
cost
)
optimize_ops
,
params_grads
=
adagrad
.
minimize
(
cost
)
train_data
=
paddle
.
batch
(
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -174,23 +180,53 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
...
@@ -174,23 +180,53 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
data
,
label
],
place
=
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
data
,
label
],
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
def
train_loop
(
main_program
):
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
xrange
(
PASS_NUM
):
for
data
in
train_data
():
for
pass_id
in
xrange
(
PASS_NUM
):
cost_val
,
acc_val
=
exe
.
run
(
fluid
.
default_main_program
(),
for
data
in
train_data
():
feed
=
feeder
.
feed
(
data
),
cost_val
,
acc_val
=
exe
.
run
(
main_program
,
fetch_list
=
[
cost
,
acc_out
])
feed
=
feeder
.
feed
(
data
),
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
))
fetch_list
=
[
cost
,
acc_out
])
if
cost_val
<
0.4
and
acc_val
>
0.8
:
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
))
if
save_dirname
is
not
None
:
if
cost_val
<
0.4
and
acc_val
>
0.8
:
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
"words"
],
if
save_dirname
is
not
None
:
prediction
,
exe
)
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
"words"
],
return
prediction
,
exe
)
if
math
.
isnan
(
float
(
cost_val
)):
return
sys
.
exit
(
"got NaN loss, training failed."
)
if
math
.
isnan
(
float
(
cost_val
)):
raise
AssertionError
(
"Cost is too large for {0}"
.
format
(
sys
.
exit
(
"got NaN loss, training failed."
)
net_method
.
__name__
))
raise
AssertionError
(
"Cost is too large for {0}"
.
format
(
net_method
.
__name__
))
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
word_dict
,
use_cuda
,
save_dirname
=
None
):
def
infer
(
word_dict
,
use_cuda
,
save_dirname
=
None
):
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
4d4322a6
...
@@ -30,7 +30,7 @@ def create_random_lodtensor(lod, place, low, high):
...
@@ -30,7 +30,7 @@ def create_random_lodtensor(lod, place, low, high):
return
res
return
res
def
train
(
use_cuda
,
is_sparse
,
is_parallel
,
save_dirname
):
def
train
(
use_cuda
,
is_sparse
,
is_parallel
,
save_dirname
,
is_local
=
True
):
PASS_NUM
=
100
PASS_NUM
=
100
EMBED_SIZE
=
32
EMBED_SIZE
=
32
HIDDEN_SIZE
=
256
HIDDEN_SIZE
=
256
...
@@ -101,7 +101,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname):
...
@@ -101,7 +101,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname):
avg_cost
=
fluid
.
layers
.
mean
(
pd
())
avg_cost
=
fluid
.
layers
.
mean
(
pd
())
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_cost
)
optimize_ops
,
params_grads
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
BATCH_SIZE
)
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
BATCH_SIZE
)
...
@@ -112,23 +112,53 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname):
...
@@ -112,23 +112,53 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname):
feed_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
],
feed_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
],
place
=
place
)
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
def
train_loop
(
main_program
):
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
for
pass_id
in
range
(
PASS_NUM
):
avg_cost_np
=
exe
.
run
(
fluid
.
default_main_program
(),
for
data
in
train_reader
():
feed
=
feeder
.
feed
(
data
),
avg_cost_np
=
exe
.
run
(
main_program
,
fetch_list
=
[
avg_cost
])
feed
=
feeder
.
feed
(
data
),
if
avg_cost_np
[
0
]
<
5.0
:
fetch_list
=
[
avg_cost
])
if
save_dirname
is
not
None
:
if
avg_cost_np
[
0
]
<
5.0
:
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
if
save_dirname
is
not
None
:
'firstw'
,
'secondw'
,
'thirdw'
,
'forthw'
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
],
[
predict_word
],
exe
)
'firstw'
,
'secondw'
,
'thirdw'
,
'forthw'
return
],
[
predict_word
],
exe
)
if
math
.
isnan
(
float
(
avg_cost_np
[
0
])):
return
sys
.
exit
(
"got NaN loss, training failed."
)
if
math
.
isnan
(
float
(
avg_cost_np
[
0
])):
sys
.
exit
(
"got NaN loss, training failed."
)
raise
AssertionError
(
"Cost is too large {0:2.2}"
.
format
(
avg_cost_np
[
0
]))
raise
AssertionError
(
"Cost is too large {0:2.2}"
.
format
(
avg_cost_np
[
0
]))
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_INIT_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_INIT_PSERVERS"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
use_cuda
,
save_dirname
=
None
):
...
...
python/paddle/fluid/tests/book_distribute/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
ec338326
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/fluid/tests/book_distribute/notest_dist_fit_a_line.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
os
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'float32'
)
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
optimize_ops
,
params_grads
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
20
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
uci_housing
.
train
(),
buf_size
=
500
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
else
:
trainer_prog
=
t
.
get_trainer_program
()
exe
.
run
(
fluid
.
default_startup_program
())
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
avg_loss_value
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
print
(
"loss:"
+
str
(
avg_loss_value
))
if
avg_loss_value
[
0
]
<
10.0
:
exit
(
0
)
exit
(
1
)
python/paddle/fluid/tests/book_distribute/notest_dist_image_classification.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
os
import
sys
TRAINERS
=
5
BATCH_SIZE
=
128
PASS_NUM
=
100
def
resnet_cifar10
(
input
,
depth
=
32
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
False
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
def
shortcut
(
input
,
ch_in
,
ch_out
,
stride
):
if
ch_in
!=
ch_out
:
return
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
0
,
None
)
else
:
return
input
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
short
=
shortcut
(
input
,
ch_in
,
ch_out
,
stride
)
return
fluid
.
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
def
layer_warp
(
block_func
,
input
,
ch_in
,
ch_out
,
count
,
stride
):
tmp
=
block_func
(
input
,
ch_in
,
ch_out
,
stride
)
for
i
in
range
(
1
,
count
):
tmp
=
block_func
(
tmp
,
ch_out
,
ch_out
,
1
)
return
tmp
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
/
6
conv1
=
conv_bn_layer
(
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
return
pool
def
vgg16_bn_drop
(
input
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
return
fluid
.
nets
.
img_conv_group
(
input
=
input
,
pool_size
=
2
,
pool_stride
=
2
,
conv_num_filter
=
[
num_filter
]
*
groups
,
conv_filter_size
=
3
,
conv_act
=
'relu'
,
conv_with_batchnorm
=
True
,
conv_batchnorm_drop_rate
=
dropouts
,
pool_type
=
'max'
)
conv1
=
conv_block
(
input
,
64
,
2
,
[
0.3
,
0
])
conv2
=
conv_block
(
conv1
,
128
,
2
,
[
0.4
,
0
])
conv3
=
conv_block
(
conv2
,
256
,
3
,
[
0.4
,
0.4
,
0
])
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
fluid
.
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
fc1
=
fluid
.
layers
.
fc
(
input
=
drop
,
size
=
512
,
act
=
None
)
bn
=
fluid
.
layers
.
batch_norm
(
input
=
fc1
,
act
=
'relu'
)
drop2
=
fluid
.
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
fc2
=
fluid
.
layers
.
fc
(
input
=
drop2
,
size
=
512
,
act
=
None
)
return
fc2
classdim
=
10
data_shape
=
[
3
,
32
,
32
]
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
net_type
=
"vgg"
if
len
(
sys
.
argv
)
>=
2
:
net_type
=
sys
.
argv
[
1
]
if
net_type
==
"vgg"
:
print
(
"training vgg net"
)
net
=
vgg16_bn_drop
(
images
)
elif
net_type
==
"resnet"
:
print
(
"training resnet"
)
net
=
resnet_cifar10
(
images
,
32
)
else
:
raise
ValueError
(
"%s network is not supported"
%
net_type
)
predict
=
fluid
.
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
optimize_ops
,
params_grads
=
optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
predict
,
label
=
label
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
(),
buf_size
=
128
*
10
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
images
,
label
])
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
TRAINERS
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
trainer_prog
=
t
.
get_trainer_program
()
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_reader
():
loss
,
acc
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
accuracy
.
metrics
)
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"pass_id:"
+
str
(
pass_id
)
+
"loss:"
+
str
(
loss
)
+
" pass_acc:"
+
str
(
pass_acc
))
# this model is slow, so if we can train two mini batches,
# we think it works properly.
print
(
"trainer run end"
)
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
exit
(
1
)
python/paddle/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.dataset.conll05
as
conll05
import
paddle.fluid
as
fluid
import
time
import
os
word_dict
,
verb_dict
,
label_dict
=
conll05
.
get_dict
()
word_dict_len
=
len
(
word_dict
)
label_dict_len
=
len
(
label_dict
)
pred_len
=
len
(
verb_dict
)
mark_dict_len
=
2
word_dim
=
32
mark_dim
=
5
hidden_dim
=
512
depth
=
8
mix_hidden_lr
=
1e-3
IS_SPARSE
=
True
PASS_NUM
=
10
BATCH_SIZE
=
20
embedding_name
=
'emb'
def
load_parameter
(
file_name
,
h
,
w
):
with
open
(
file_name
,
'rb'
)
as
f
:
f
.
read
(
16
)
# skip header.
return
np
.
fromfile
(
f
,
dtype
=
np
.
float32
).
reshape
(
h
,
w
)
def
db_lstm
(
word
,
predicate
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
,
mark
,
**
ignored
):
# 8 features
predicate_embedding
=
fluid
.
layers
.
embedding
(
input
=
predicate
,
size
=
[
pred_len
,
word_dim
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
'vemb'
)
mark_embedding
=
fluid
.
layers
.
embedding
(
input
=
mark
,
size
=
[
mark_dict_len
,
mark_dim
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
)
word_input
=
[
word
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
]
emb_layers
=
[
fluid
.
layers
.
embedding
(
size
=
[
word_dict_len
,
word_dim
],
input
=
x
,
param_attr
=
fluid
.
ParamAttr
(
name
=
embedding_name
,
trainable
=
False
))
for
x
in
word_input
]
emb_layers
.
append
(
predicate_embedding
)
emb_layers
.
append
(
mark_embedding
)
hidden_0_layers
=
[
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
hidden_dim
)
for
emb
in
emb_layers
]
hidden_0
=
fluid
.
layers
.
sums
(
input
=
hidden_0_layers
)
lstm_0
=
fluid
.
layers
.
dynamic_lstm
(
input
=
hidden_0
,
size
=
hidden_dim
,
candidate_activation
=
'relu'
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'sigmoid'
)
# stack L-LSTM and R-LSTM with direct edges
input_tmp
=
[
hidden_0
,
lstm_0
]
for
i
in
range
(
1
,
depth
):
mix_hidden
=
fluid
.
layers
.
sums
(
input
=
[
fluid
.
layers
.
fc
(
input
=
input_tmp
[
0
],
size
=
hidden_dim
),
fluid
.
layers
.
fc
(
input
=
input_tmp
[
1
],
size
=
hidden_dim
)
])
lstm
=
fluid
.
layers
.
dynamic_lstm
(
input
=
mix_hidden
,
size
=
hidden_dim
,
candidate_activation
=
'relu'
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'sigmoid'
,
is_reverse
=
((
i
%
2
)
==
1
))
input_tmp
=
[
mix_hidden
,
lstm
]
feature_out
=
fluid
.
layers
.
sums
(
input
=
[
fluid
.
layers
.
fc
(
input
=
input_tmp
[
0
],
size
=
label_dict_len
),
fluid
.
layers
.
fc
(
input
=
input_tmp
[
1
],
size
=
label_dict_len
)
])
return
feature_out
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
fluid
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
def
main
():
# define network topology
word
=
fluid
.
layers
.
data
(
name
=
'word_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
predicate
=
fluid
.
layers
.
data
(
name
=
'verb_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
ctx_n2
=
fluid
.
layers
.
data
(
name
=
'ctx_n2_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
ctx_n1
=
fluid
.
layers
.
data
(
name
=
'ctx_n1_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
ctx_0
=
fluid
.
layers
.
data
(
name
=
'ctx_0_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
ctx_p1
=
fluid
.
layers
.
data
(
name
=
'ctx_p1_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
ctx_p2
=
fluid
.
layers
.
data
(
name
=
'ctx_p2_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
mark
=
fluid
.
layers
.
data
(
name
=
'mark_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
feature_out
=
db_lstm
(
**
locals
())
target
=
fluid
.
layers
.
data
(
name
=
'target'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
crf_cost
=
fluid
.
layers
.
linear_chain_crf
(
input
=
feature_out
,
label
=
target
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'crfw'
,
learning_rate
=
mix_hidden_lr
))
avg_cost
=
fluid
.
layers
.
mean
(
crf_cost
)
# TODO(qiao)
# check other optimizers and check why out will be NAN
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0001
)
optimize_ops
,
params_grads
=
sgd_optimizer
.
minimize
(
avg_cost
)
# TODO(qiao)
# add dependency track and move this config before optimizer
crf_decode
=
fluid
.
layers
.
crf_decoding
(
input
=
feature_out
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'crfw'
))
chunk_evaluator
=
fluid
.
evaluator
.
ChunkEvaluator
(
input
=
crf_decode
,
label
=
target
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
int
(
math
.
ceil
((
label_dict_len
-
1
)
/
2.0
)))
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
word
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
,
predicate
,
mark
,
target
],
place
=
place
)
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
trainer_prog
=
t
.
get_trainer_program
()
start_time
=
time
.
time
()
batch_id
=
0
exe
.
run
(
fluid
.
default_startup_program
())
embedding_param
=
fluid
.
global_scope
().
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
place
)
for
pass_id
in
xrange
(
PASS_NUM
):
chunk_evaluator
.
reset
(
exe
)
for
data
in
train_data
():
cost
,
precision
,
recall
,
f1_score
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
chunk_evaluator
.
metrics
)
pass_precision
,
pass_recall
,
pass_f1_score
=
chunk_evaluator
.
eval
(
exe
)
if
batch_id
%
10
==
0
:
print
(
"avg_cost:"
+
str
(
cost
)
+
" precision:"
+
str
(
precision
)
+
" recall:"
+
str
(
recall
)
+
" f1_score:"
+
str
(
f1_score
)
+
" pass_precision:"
+
str
(
pass_precision
)
+
" pass_recall:"
+
str
(
pass_recall
)
+
" pass_f1_score:"
+
str
(
pass_f1_score
))
if
batch_id
!=
0
:
print
(
"second per batch: "
+
str
((
time
.
time
(
)
-
start_time
)
/
batch_id
))
batch_id
=
batch_id
+
1
if
__name__
==
'__main__'
:
main
()
python/paddle/fluid/tests/book_distribute/notest_dist_word2vec.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
os
PASS_NUM
=
100
EMBED_SIZE
=
32
HIDDEN_SIZE
=
256
N
=
5
BATCH_SIZE
=
32
IS_SPARSE
=
True
TRAINERS
=
2
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
first_word
=
fluid
.
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
dtype
=
'int64'
)
second_word
=
fluid
.
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
dtype
=
'int64'
)
third_word
=
fluid
.
layers
.
data
(
name
=
'thirdw'
,
shape
=
[
1
],
dtype
=
'int64'
)
forth_word
=
fluid
.
layers
.
data
(
name
=
'forthw'
,
shape
=
[
1
],
dtype
=
'int64'
)
next_word
=
fluid
.
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
dtype
=
'int64'
)
embed_first
=
fluid
.
layers
.
embedding
(
input
=
first_word
,
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
'shared_w'
)
embed_second
=
fluid
.
layers
.
embedding
(
input
=
second_word
,
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
'shared_w'
)
embed_third
=
fluid
.
layers
.
embedding
(
input
=
third_word
,
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
'shared_w'
)
embed_forth
=
fluid
.
layers
.
embedding
(
input
=
forth_word
,
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
'shared_w'
)
concat_embed
=
fluid
.
layers
.
concat
(
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
axis
=
1
)
hidden1
=
fluid
.
layers
.
fc
(
input
=
concat_embed
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
predict_word
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
next_word
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
optimize_ops
,
params_grads
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
TRAINERS
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
],
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
trainer_prog
=
t
.
get_trainer_program
()
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
avg_cost_np
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
print
(
"avg_cost_np"
,
avg_cost_np
)
if
avg_cost_np
[
0
]
<
5.0
:
exit
(
0
)
# if avg cost less than 10.0, we think our code is good.
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
exit
(
1
)
python/paddle/fluid/tests/book_distribute/notest_machine_translation.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.framework
as
framework
import
paddle.fluid.layers
as
layers
from
paddle.fluid.executor
import
Executor
import
os
dict_size
=
30000
source_dict_dim
=
target_dict_dim
=
dict_size
src_dict
,
trg_dict
=
paddle
.
dataset
.
wmt14
.
get_dict
(
dict_size
)
hidden_dim
=
32
word_dim
=
16
IS_SPARSE
=
True
batch_size
=
10
max_length
=
50
topk_size
=
50
trg_dic_size
=
10000
decoder_size
=
hidden_dim
def
encoder_decoder
():
# encoder
src_word_id
=
layers
.
data
(
name
=
"src_word_id"
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
src_embedding
=
layers
.
embedding
(
input
=
src_word_id
,
size
=
[
dict_size
,
word_dim
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'vemb'
))
fc1
=
fluid
.
layers
.
fc
(
input
=
src_embedding
,
size
=
hidden_dim
*
4
,
act
=
'tanh'
)
lstm_hidden0
,
lstm_0
=
layers
.
dynamic_lstm
(
input
=
fc1
,
size
=
hidden_dim
*
4
)
encoder_out
=
layers
.
sequence_last_step
(
input
=
lstm_hidden0
)
# decoder
trg_language_word
=
layers
.
data
(
name
=
"target_language_word"
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
trg_embedding
=
layers
.
embedding
(
input
=
trg_language_word
,
size
=
[
dict_size
,
word_dim
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'vemb'
))
rnn
=
fluid
.
layers
.
DynamicRNN
()
with
rnn
.
block
():
current_word
=
rnn
.
step_input
(
trg_embedding
)
mem
=
rnn
.
memory
(
init
=
encoder_out
)
fc1
=
fluid
.
layers
.
fc
(
input
=
[
current_word
,
mem
],
size
=
decoder_size
,
act
=
'tanh'
)
out
=
fluid
.
layers
.
fc
(
input
=
fc1
,
size
=
target_dict_dim
,
act
=
'softmax'
)
rnn
.
update_memory
(
mem
,
fc1
)
rnn
.
output
(
out
)
return
rnn
()
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
core
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
def
main
():
rnn_out
=
encoder_decoder
()
label
=
layers
.
data
(
name
=
"target_language_next_word"
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
cost
=
layers
.
cross_entropy
(
input
=
rnn_out
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
1e-4
)
optimize_ops
,
params_grads
=
optimizer
.
minimize
(
avg_cost
)
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
train
(
dict_size
),
buf_size
=
1000
),
batch_size
=
batch_size
)
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
trainer_prog
=
t
.
get_trainer_program
()
exe
.
run
(
framework
.
default_startup_program
())
batch_id
=
0
for
pass_id
in
xrange
(
2
):
for
data
in
train_data
():
word_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
trg_word
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)
trg_word_next
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)
outs
=
exe
.
run
(
trainer_prog
,
feed
=
{
'src_word_id'
:
word_data
,
'target_language_word'
:
trg_word
,
'target_language_next_word'
:
trg_word_next
},
fetch_list
=
[
avg_cost
])
avg_cost_val
=
np
.
array
(
outs
[
0
])
print
(
'pass_id='
+
str
(
pass_id
)
+
' batch='
+
str
(
batch_id
)
+
" avg_cost="
+
str
(
avg_cost_val
))
if
batch_id
>
3
:
exit
(
0
)
batch_id
+=
1
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
if
__name__
==
'__main__'
:
main
()
python/paddle/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
os
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
images
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
predict
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.01
)
optimize_ops
,
params_grads
=
optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
predict
,
label
=
label
)
BATCH_SIZE
=
50
PASS_NUM
=
3
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# all pserver endpoints
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
# total trainer count
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# current pserver endpoint
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
trainer_prog
=
t
.
get_trainer_program
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
images
,
label
],
place
=
place
)
# TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
batch_id
=
0
for
data
in
train_reader
():
loss
,
acc
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
accuracy
.
metrics
)
pass_acc
=
accuracy
.
eval
(
exe
)
if
batch_id
%
100
==
0
:
print
(
"batch_id %d, loss: %f, acc: %f"
%
(
batch_id
,
loss
,
pass_acc
))
batch_id
+=
1
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"pass_id="
+
str
(
pass_id
)
+
" pass_acc="
+
str
(
pass_acc
))
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
python/paddle/fluid/tests/book_distribute/notest_recognize_digits_mlp_dist.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
os
BATCH_SIZE
=
128
PASS_NUM
=
100
images
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
dtype
=
'float32'
)
# TODO(aroraabhinav) Add regularization and error clipping after
# Issue 7432(https://github.com/PaddlePaddle/Paddle/issues/7432) is resolved.
hidden1
=
fluid
.
layers
.
fc
(
input
=
images
,
size
=
128
,
act
=
'relu'
)
hidden2
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
)
predict
=
fluid
.
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
)
label
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.001
,
momentum
=
0.9
)
optimize_ops
,
params_grads
=
optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
predict
,
label
=
label
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
trainer_prog
=
t
.
get_trainer_program
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
images
,
label
],
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
batch_id
=
0
for
data
in
train_reader
():
loss
,
acc
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
accuracy
.
metrics
)
pass_acc
=
accuracy
.
eval
(
exe
)
if
batch_id
%
100
==
0
:
print
(
"batch_id %d, loss: %f, acc: %f"
%
(
batch_id
,
loss
,
pass_acc
))
batch_id
+=
1
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"pass_id="
+
str
(
pass_id
)
+
" pass_acc="
+
str
(
pass_acc
))
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
python/paddle/fluid/tests/book_distribute/notest_recommender_system_dist.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
os
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.layers
as
layers
import
paddle.fluid.nets
as
nets
from
paddle.fluid.optimizer
import
SGDOptimizer
IS_SPARSE
=
True
BATCH_SIZE
=
256
PASS_NUM
=
100
def
get_usr_combined_features
():
USR_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
uid
=
layers
.
data
(
name
=
'user_id'
,
shape
=
[
1
],
dtype
=
'int64'
)
usr_emb
=
layers
.
embedding
(
input
=
uid
,
dtype
=
'float32'
,
size
=
[
USR_DICT_SIZE
,
32
],
param_attr
=
'user_table'
,
is_sparse
=
IS_SPARSE
)
usr_fc
=
layers
.
fc
(
input
=
usr_emb
,
size
=
32
)
USR_GENDER_DICT_SIZE
=
2
usr_gender_id
=
layers
.
data
(
name
=
'gender_id'
,
shape
=
[
1
],
dtype
=
'int64'
)
usr_gender_emb
=
layers
.
embedding
(
input
=
usr_gender_id
,
size
=
[
USR_GENDER_DICT_SIZE
,
16
],
param_attr
=
'gender_table'
,
is_sparse
=
IS_SPARSE
)
usr_gender_fc
=
layers
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
USR_AGE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
age_table
)
usr_age_id
=
layers
.
data
(
name
=
'age_id'
,
shape
=
[
1
],
dtype
=
"int64"
)
usr_age_emb
=
layers
.
embedding
(
input
=
usr_age_id
,
size
=
[
USR_AGE_DICT_SIZE
,
16
],
is_sparse
=
IS_SPARSE
,
param_attr
=
'age_table'
)
usr_age_fc
=
layers
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
USR_JOB_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_job_id
()
+
1
usr_job_id
=
layers
.
data
(
name
=
'job_id'
,
shape
=
[
1
],
dtype
=
"int64"
)
usr_job_emb
=
layers
.
embedding
(
input
=
usr_job_id
,
size
=
[
USR_JOB_DICT_SIZE
,
16
],
param_attr
=
'job_table'
,
is_sparse
=
IS_SPARSE
)
usr_job_fc
=
layers
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
concat_embed
=
layers
.
concat
(
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
axis
=
1
)
usr_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
return
usr_combined_features
def
get_mov_combined_features
():
MOV_DICT_SIZE
=
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
mov_id
=
layers
.
data
(
name
=
'movie_id'
,
shape
=
[
1
],
dtype
=
'int64'
)
mov_emb
=
layers
.
embedding
(
input
=
mov_id
,
dtype
=
'float32'
,
size
=
[
MOV_DICT_SIZE
,
32
],
param_attr
=
'movie_table'
,
is_sparse
=
IS_SPARSE
)
mov_fc
=
layers
.
fc
(
input
=
mov_emb
,
size
=
32
)
CATEGORY_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())
category_id
=
layers
.
data
(
name
=
'category_id'
,
shape
=
[
1
],
dtype
=
'int64'
)
mov_categories_emb
=
layers
.
embedding
(
input
=
category_id
,
size
=
[
CATEGORY_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_categories_hidden
=
layers
.
sequence_pool
(
input
=
mov_categories_emb
,
pool_type
=
"sum"
)
MOV_TITLE_DICT_SIZE
=
len
(
paddle
.
dataset
.
movielens
.
get_movie_title_dict
())
mov_title_id
=
layers
.
data
(
name
=
'movie_title'
,
shape
=
[
1
],
dtype
=
'int64'
)
mov_title_emb
=
layers
.
embedding
(
input
=
mov_title_id
,
size
=
[
MOV_TITLE_DICT_SIZE
,
32
],
is_sparse
=
IS_SPARSE
)
mov_title_conv
=
nets
.
sequence_conv_pool
(
input
=
mov_title_emb
,
num_filters
=
32
,
filter_size
=
3
,
act
=
"tanh"
,
pool_type
=
"sum"
)
concat_embed
=
layers
.
concat
(
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
axis
=
1
)
mov_combined_features
=
layers
.
fc
(
input
=
concat_embed
,
size
=
200
,
act
=
"tanh"
)
return
mov_combined_features
def
model
():
usr_combined_features
=
get_usr_combined_features
()
mov_combined_features
=
get_mov_combined_features
()
# need cos sim
inference
=
layers
.
cos_sim
(
X
=
usr_combined_features
,
Y
=
mov_combined_features
)
scale_infer
=
layers
.
scale
(
x
=
inference
,
scale
=
5.0
)
label
=
layers
.
data
(
name
=
'score'
,
shape
=
[
1
],
dtype
=
'float32'
)
square_cost
=
layers
.
square_error_cost
(
input
=
scale_infer
,
label
=
label
)
avg_cost
=
layers
.
mean
(
square_cost
)
return
avg_cost
def
func_feed
(
feeding
,
data
,
place
):
feed_tensors
=
{}
for
(
key
,
idx
)
in
feeding
.
iteritems
():
tensor
=
core
.
LoDTensor
()
if
key
!=
"category_id"
and
key
!=
"movie_title"
:
if
key
==
"score"
:
numpy_data
=
np
.
array
(
map
(
lambda
x
:
x
[
idx
],
data
)).
astype
(
"float32"
)
else
:
numpy_data
=
np
.
array
(
map
(
lambda
x
:
x
[
idx
],
data
)).
astype
(
"int64"
)
else
:
numpy_data
=
map
(
lambda
x
:
np
.
array
(
x
[
idx
]).
astype
(
"int64"
),
data
)
lod_info
=
[
len
(
item
)
for
item
in
numpy_data
]
offset
=
0
lod
=
[
offset
]
for
item
in
lod_info
:
offset
+=
item
lod
.
append
(
offset
)
numpy_data
=
np
.
concatenate
(
numpy_data
,
axis
=
0
)
tensor
.
set_lod
([
lod
])
numpy_data
=
numpy_data
.
reshape
([
numpy_data
.
shape
[
0
],
1
])
tensor
.
set
(
numpy_data
,
place
)
feed_tensors
[
key
]
=
tensor
return
feed_tensors
def
main
():
cost
=
model
()
optimizer
=
SGDOptimizer
(
learning_rate
=
0.2
)
optimize_ops
,
params_grads
=
optimizer
.
minimize
(
cost
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
movielens
.
train
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
exe
.
run
(
fluid
.
default_startup_program
())
trainer_prog
=
t
.
get_trainer_program
()
feeding
=
{
'user_id'
:
0
,
'gender_id'
:
1
,
'age_id'
:
2
,
'job_id'
:
3
,
'movie_id'
:
4
,
'category_id'
:
5
,
'movie_title'
:
6
,
'score'
:
7
}
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
outs
=
exe
.
run
(
trainer_prog
,
feed
=
func_feed
(
feeding
,
data
,
place
),
fetch_list
=
[
cost
])
out
=
np
.
array
(
outs
[
0
])
print
(
"cost="
+
str
(
out
[
0
]))
if
out
[
0
]
<
6.0
:
print
(
"Training complete. Average cost is less than 6.0."
)
# if avg cost less than 6.0, we think our code is good.
exit
(
0
)
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
if
__name__
==
'__main__'
:
main
()
python/paddle/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
def
convolution_net
(
data
,
label
,
input_dim
,
class_dim
=
2
,
emb_dim
=
32
,
hid_dim
=
32
):
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
])
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
num_filters
=
hid_dim
,
filter_size
=
3
,
act
=
"tanh"
,
pool_type
=
"sqrt"
)
conv_4
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
num_filters
=
hid_dim
,
filter_size
=
4
,
act
=
"tanh"
,
pool_type
=
"sqrt"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
conv_3
,
conv_4
],
size
=
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
adam_optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.002
)
optimize_ops
,
params_grads
=
adam_optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
accuracy
,
accuracy
.
metrics
[
0
],
optimize_ops
,
params_grads
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
fluid
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
def
main
():
BATCH_SIZE
=
100
PASS_NUM
=
5
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
dict_dim
=
len
(
word_dict
)
class_dim
=
2
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
,
accuracy
,
acc_out
,
optimize_ops
,
params_grads
=
convolution_net
(
data
,
label
,
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
1000
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
exe
.
run
(
fluid
.
default_startup_program
())
trainer_prog
=
t
.
get_trainer_program
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
data
,
label
],
place
=
place
)
for
pass_id
in
xrange
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_data
():
cost_val
,
acc_val
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
,
acc_out
])
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
)
+
" pass_acc="
+
str
(
pass_acc
))
if
cost_val
<
1.0
and
pass_acc
>
0.8
:
exit
(
0
)
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
if
__name__
==
'__main__'
:
main
()
python/paddle/fluid/tests/book_distribute/notest_understand_sentiment_dynamic_lstm.py
已删除
100644 → 0
浏览文件 @
ec338326
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
os
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
def
stacked_lstm_net
(
data
,
label
,
input_dim
,
class_dim
=
2
,
emb_dim
=
128
,
hid_dim
=
512
,
stacked_num
=
3
):
assert
stacked_num
%
2
==
1
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
input_dim
,
emb_dim
])
# add bias attr
# TODO(qijun) linear act
fc1
=
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
hid_dim
)
lstm1
,
cell1
=
fluid
.
layers
.
dynamic_lstm
(
input
=
fc1
,
size
=
hid_dim
)
inputs
=
[
fc1
,
lstm1
]
for
i
in
range
(
2
,
stacked_num
+
1
):
fc
=
fluid
.
layers
.
fc
(
input
=
inputs
,
size
=
hid_dim
)
lstm
,
cell
=
fluid
.
layers
.
dynamic_lstm
(
input
=
fc
,
size
=
hid_dim
,
is_reverse
=
(
i
%
2
)
==
0
)
inputs
=
[
fc
,
lstm
]
fc_last
=
fluid
.
layers
.
sequence_pool
(
input
=
inputs
[
0
],
pool_type
=
'max'
)
lstm_last
=
fluid
.
layers
.
sequence_pool
(
input
=
inputs
[
1
],
pool_type
=
'max'
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_last
,
lstm_last
],
size
=
class_dim
,
act
=
'softmax'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
adam_optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.002
)
optimize_ops
,
params_grads
=
adam_optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
prediction
,
label
=
label
)
return
avg_cost
,
accuracy
,
accuracy
.
metrics
[
0
],
optimize_ops
,
params_grads
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
fluid
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
def
main
():
BATCH_SIZE
=
100
PASS_NUM
=
5
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
print
"loaded word dict successfully"
dict_dim
=
len
(
word_dict
)
class_dim
=
2
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
cost
,
accuracy
,
acc_out
,
optimize_ops
,
params_grads
=
stacked_lstm_net
(
data
,
label
,
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
1000
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
data
,
label
],
place
=
place
)
t
=
fluid
.
DistributeTranspiler
()
# all parameter server endpoints list for spliting parameters
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
# server endpoint for current node
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# run as trainer or parameter server
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
2
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
exe
.
run
(
pserver_prog
)
elif
training_role
==
"TRAINER"
:
exe
.
run
(
fluid
.
default_startup_program
())
trainer_prog
=
t
.
get_trainer_program
()
for
pass_id
in
xrange
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_data
():
cost_val
,
acc_val
=
exe
.
run
(
trainer_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
,
acc_out
])
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"cost="
+
str
(
cost_val
)
+
" acc="
+
str
(
acc_val
)
+
" pass_acc="
+
str
(
pass_acc
))
if
cost_val
<
1.0
and
acc_val
>
0.8
:
exit
(
0
)
else
:
print
(
"environment var TRAINER_ROLE should be TRAINER os PSERVER"
)
if
__name__
==
'__main__'
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录