Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
6bc0efb4
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6bc0efb4
编写于
12月 10, 2018
作者:
H
heqiaozhi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine interface
上级
575ae7c6
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
93 addition
and
41 deletion
+93
-41
python/paddle/fluid/async_executor.py
python/paddle/fluid/async_executor.py
+28
-14
python/paddle/fluid/distributed/downpour.py
python/paddle/fluid/distributed/downpour.py
+9
-3
python/paddle/fluid/distributed/helper.py
python/paddle/fluid/distributed/helper.py
+17
-13
python/paddle/fluid/distributed/node.py
python/paddle/fluid/distributed/node.py
+36
-8
python/paddle/fluid/distributed/ps_pb2.py
python/paddle/fluid/distributed/ps_pb2.py
+3
-3
未找到文件。
python/paddle/fluid/async_executor.py
浏览文件 @
6bc0efb4
...
@@ -24,6 +24,7 @@ from paddle.fluid.proto import data_feed_pb2
...
@@ -24,6 +24,7 @@ from paddle.fluid.proto import data_feed_pb2
from
google.protobuf
import
text_format
from
google.protobuf
import
text_format
from
.
import
io
from
.
import
io
from
.data_feed_desc
import
DataFeedDesc
from
.data_feed_desc
import
DataFeedDesc
from
.distributed
import
ps_instance
__all__
=
[
'AsyncExecutor'
]
__all__
=
[
'AsyncExecutor'
]
...
@@ -85,6 +86,7 @@ class AsyncExecutor(object):
...
@@ -85,6 +86,7 @@ class AsyncExecutor(object):
scope
=
global_scope
()
scope
=
global_scope
()
self
.
executor
=
core
.
AsyncExecutor
(
scope
,
p
)
self
.
executor
=
core
.
AsyncExecutor
(
scope
,
p
)
self
.
instance
=
ps_instance
.
PaddlePSInstance
(
"init_param"
,
1
,
2
)
def
run
(
self
,
program
,
data_feed
,
filelist
,
thread_num
,
fetch
,
debug
=
False
):
def
run
(
self
,
program
,
data_feed
,
filelist
,
thread_num
,
fetch
,
debug
=
False
):
"""
"""
...
@@ -149,26 +151,38 @@ class AsyncExecutor(object):
...
@@ -149,26 +151,38 @@ class AsyncExecutor(object):
self
.
executor
.
run_from_files
(
program_desc
,
self
.
executor
.
run_from_files
(
program_desc
,
data_feed
.
desc
(),
filelist
,
thread_num
,
data_feed
.
desc
(),
filelist
,
thread_num
,
fetch_var_names
,
debug
)
fetch_var_names
,
debug
)
self
.
instance
.
barrier_all
()
def
config_distributed_nodes
(
self
,
dist_opt
):
def
config_distributed_nodes
(
self
,
dist_opt
):
# get total rank
# get total rank
# get rank index
# get rank index
# get iplists
# get iplists
# get hadoop info
# get hadoop info
return
pass
def
get_instance
(
self
):
def
init_server
(
self
,
filename
,
index
):
return
self
.
instance
self
.
executor
.
init_server
(
filename
,
index
)
def
init_server
(
self
,
dist_desc
):
def
init_worker
(
self
,
filename
,
ips
,
nodes_cnt
,
index
):
self
.
executor
.
init_server
(
dist_desc
,
self
.
instance
.
_rankid
)
self
.
executor
.
init_worker
(
filename
,
ips
,
nodes_cnt
,
index
)
ip
=
self
.
executor
.
start_server
()
self
.
instance
.
set_ip
(
ip
)
def
start_server
(
self
):
self
.
instance
.
barrier_all
()
#wait all server start
return
self
.
executor
.
start_server
()
ips
=
self
.
instance
.
gather_ips
()
self
.
executor
.
gather_servers
(
ips
,
self
.
instance
.
get_node_cnt
())
def
gather_servers
(
self
,
ips
,
nodes_cnt
):
self
.
instance
.
barrier_all
()
#wait all worker start
self
.
executor
.
gather_servers
(
ips
,
nodes_cnt
)
self
.
instance
.
barrier_all
()
#wait init model
self
.
instance
.
barrier_all
()
#wait worker do all things
def
init_worker
(
self
,
dist_desc
):
self
.
instance
.
barrier_all
()
#wait all server start
ips
=
self
.
instance
.
gather_ips
()
self
.
executor
.
init_worker
(
dist_desc
,
ips
,
self
.
instance
.
get_node_cnt
(),
self
.
instance
.
_rankid
)
self
.
instance
.
barrier_all
()
#wait all worker start
if
self
.
instance
.
is_first_worker
():
self
.
executor
.
init_model
()
self
.
instance
.
barrier_all
()
#wait init model
def
init_model
(
self
):
def
init_model
(
self
):
self
.
executor
.
init_model
()
self
.
executor
.
init_model
()
...
...
python/paddle/fluid/distributed/downpour.py
浏览文件 @
6bc0efb4
...
@@ -46,14 +46,20 @@ class DownpourSGD(object):
...
@@ -46,14 +46,20 @@ class DownpourSGD(object):
sparse_table_index
=
0
sparse_table_index
=
0
# currently merge all dense parameters into one dense table
# currently merge all dense parameters into one dense table
dense_table_index
=
1
dense_table_index
=
1
params
=
[]
grads
=
[]
for
i
in
params_grads
:
params
.
append
(
i
[
0
])
for
i
in
params_grads
:
grads
.
append
(
i
[
1
])
server
.
add_sparse_table
(
sparse_table_index
,
self
.
learning_rate_
,
server
.
add_sparse_table
(
sparse_table_index
,
self
.
learning_rate_
,
prefetch_slots
,
prefetch_slots_emb
)
prefetch_slots
,
prefetch_slots_emb
)
server
.
add_dense_table
(
dense_table_index
,
self
.
learning_rate_
,
server
.
add_dense_table
(
dense_table_index
,
self
.
learning_rate_
,
params
_grads
[
0
],
params_grads
[
1
]
)
params
,
grads
)
worker
.
add_sparse_table
(
sparse_table_index
,
self
.
learning_rate_
,
worker
.
add_sparse_table
(
sparse_table_index
,
self
.
learning_rate_
,
prefetch_slots
,
prefetch_slots_emb
)
prefetch_slots
,
prefetch_slots_emb
)
worker
.
add_dense_table
(
dense_table_index
,
self
.
learning_rate_
,
worker
.
add_dense_table
(
dense_table_index
,
self
.
learning_rate_
,
params
_grads
[
0
],
params_grads
[
1
]
)
params
,
grads
)
ps_param
=
pslib
.
PSParameter
()
ps_param
=
pslib
.
PSParameter
()
ps_param
.
server_param
.
CopyFrom
(
server
.
get_desc
())
ps_param
.
server_param
.
CopyFrom
(
server
.
get_desc
())
ps_param
.
trainer_param
.
CopyFrom
(
worker
.
get_desc
())
ps_param
.
trainer_param
.
CopyFrom
(
worker
.
get_desc
())
...
@@ -61,4 +67,4 @@ class DownpourSGD(object):
...
@@ -61,4 +67,4 @@ class DownpourSGD(object):
# currently only support lookup_table
# currently only support lookup_table
worker_skipped_ops
=
[
"lookup_table"
,
"lookup_table_grad"
]
worker_skipped_ops
=
[
"lookup_table"
,
"lookup_table_grad"
]
ps_param_str
=
text_format
.
MessageToString
(
ps_param
)
ps_param_str
=
text_format
.
MessageToString
(
ps_param
)
return
[
ps_param
_str
,
worker_skipped_ops
]
return
[
ps_param
,
worker_skipped_ops
]
python/paddle/fluid/distributed/helper.py
浏览文件 @
6bc0efb4
from
mpi4py
import
MPI
from
mpi4py
import
MPI
import
ps_pb2
as
pslib
class
FileSystem
(
object
):
class
FileSystem
(
object
):
def
__init__
(
self
,
fs_type
=
"afs"
,
def
__init__
(
self
,
fs_type
=
"afs"
,
...
@@ -7,20 +8,23 @@ class FileSystem(object):
...
@@ -7,20 +8,23 @@ class FileSystem(object):
passwd
=
None
,
passwd
=
None
,
hadoop_bin
=
""
,
hadoop_bin
=
""
,
afs_conf
=
None
):
afs_conf
=
None
):
assert
user
not
None
assert
user
!=
None
assert
passwd
not
None
assert
passwd
!=
None
assert
hadoop_bin
not
None
assert
hadoop_bin
!=
None
fs_client
=
pslib
.
FsClientParameter
()
self
.
fs_client
=
pslib
.
FsClientParameter
()
if
fs_type
==
"afs"
:
#if fs_type == "afs":
fs_client
.
fs_type
=
pslib
.
FsApiType
.
AFS
# fs_client.fs_type = pslib.FsApiType.AFS
else
:
#else:
fs_client
.
fs_type
=
pslib
.
FsApiType
.
HDFS
# fs_client.fs_type = pslib.FsApiType.HDFS
fs_client
.
uri
=
uri
self
.
fs_client
.
uri
=
uri
fs_client
.
user
=
user
self
.
fs_client
.
user
=
user
fs_client
.
passwd
=
passwd
self
.
fs_client
.
passwd
=
passwd
fs_client
.
buffer_size
=
0
#self.fs_client.buffer_size = 0
fs_client
.
afs_conf
=
afs_conf
if
not
afs_conf
else
""
self
.
fs_client
.
hadoop_bin
=
hadoop_bin
#self.fs_client.afs_conf = afs_conf if not afs_conf else ""
def
get_desc
(
self
):
return
self
.
fs_client
class
MPIHelper
(
object
):
class
MPIHelper
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/fluid/distributed/node.py
浏览文件 @
6bc0efb4
...
@@ -13,24 +13,52 @@ class Worker(object):
...
@@ -13,24 +13,52 @@ class Worker(object):
class
DownpourServer
(
Server
):
class
DownpourServer
(
Server
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
server_
=
pslib
.
ServerParameter
()
self
.
server_
=
pslib
.
ServerParameter
()
self
.
server_
.
downpour_server_param
.
service_param
.
start_server_port
=
0
self
.
server_
.
downpour_server_param
.
service_param
.
server_class
=
"DownpourBrpcPsServer"
self
.
server_
.
downpour_server_param
.
service_param
.
client_class
=
"DownpourBrpcPsClient"
self
.
server_
.
downpour_server_param
.
service_param
.
service_class
=
"DownpourPsService"
self
.
server_
.
downpour_server_param
.
service_param
.
start_server_port
=
0
self
.
server_
.
downpour_server_param
.
service_param
.
server_thread_num
=
12
def
add_sparse_table
(
self
,
table_id
,
learning_rate
,
def
add_sparse_table
(
self
,
table_id
,
learning_rate
,
slot_key_vars
,
slot_value_var
):
slot_key_vars
,
slot_value_var
):
table
=
self
.
server_
.
downpour_server_param
.
downpour_table_param
.
add
()
table
=
self
.
server_
.
downpour_server_param
.
downpour_table_param
.
add
()
table
.
table_id
=
table_id
table
.
table_id
=
table_id
table
.
table_class
=
"DownpourSparseTable"
table
.
type
=
pslib
.
PS_SPARSE_TABLE
table
.
type
=
pslib
.
PS_SPARSE_TABLE
table
.
accessor
.
accessor_class
=
"DownpourFeatureValueAccessor"
table
.
accessor
.
accessor_class
=
"DownpourFeatureValueAccessor"
table
.
accessor
.
dense_sgd_param
.
adam
.
learning_rate
=
learning_rate
table
.
accessor
.
sparse_sgd_param
.
learning_rate
=
learning_rate
table
.
accessor
.
fea_dim
=
abs
(
reduce
(
lambda
x
,
y
:
x
*
y
,
table
.
accessor
.
sparse_sgd_param
.
initial_g2sum
=
3
slot_value_var
[
0
].
shape
,
1
))
table
.
accessor
.
sparse_sgd_param
.
initial_range
=
1e-4
table
.
accessor
.
sparse_sgd_param
.
weight_bounds
.
extend
([
-
10
,
10
])
table
.
accessor
.
embedx_dim
=
8
table
.
accessor
.
embedx_threshold
=
5
table
.
accessor
.
fea_dim
=
11
#table.accessor.fea_dim = abs(reduce(lambda x, y: x * y,
# slot_value_var[0].shape, 1))
table
.
accessor
.
downpour_accessor_param
.
nonclk_coeff
=
0.1
table
.
accessor
.
downpour_accessor_param
.
click_coeff
=
2
table
.
accessor
.
downpour_accessor_param
.
base_threshold
=
0.2
table
.
accessor
.
downpour_accessor_param
.
delta_threshold
=
0.15
table
.
accessor
.
downpour_accessor_param
.
delta_keep_days
=
31
table
.
accessor
.
downpour_accessor_param
.
show_click_decay_rate
=
0.999
table
.
accessor
.
downpour_accessor_param
.
delete_threshold
=
0.8
def
add_dense_table
(
self
,
table_id
,
learning_rate
,
def
add_dense_table
(
self
,
table_id
,
learning_rate
,
param_var
,
grad_var
):
param_var
,
grad_var
):
table
=
self
.
server_
.
downpour_server_param
.
downpour_table_param
.
add
()
table
=
self
.
server_
.
downpour_server_param
.
downpour_table_param
.
add
()
table
.
table_id
=
table_id
table
.
table_id
=
table_id
table
.
table_class
=
"DownpourDenseTable"
table
.
type
=
pslib
.
PS_DENSE_TABLE
table
.
type
=
pslib
.
PS_DENSE_TABLE
table
.
accessor
.
accessor_class
=
"DownpourDenseValueAccessor"
table
.
accessor
.
accessor_class
=
"DownpourDenseValueAccessor"
table
.
accessor
.
sparse_sgd_param
.
learning_rate
=
learning_rate
table
.
accessor
.
dense_sgd_param
.
name
=
"adam"
table
.
accessor
.
dense_sgd_param
.
adam
.
learning_rate
=
learning_rate
table
.
accessor
.
dense_sgd_param
.
adam
.
avg_decay_rate
=
0.999993
table
.
accessor
.
dense_sgd_param
.
adam
.
ada_decay_rate
=
0.9999
table
.
accessor
.
dense_sgd_param
.
adam
.
ada_epsilon
=
1e-8
table
.
accessor
.
dense_sgd_param
.
adam
.
mom_decay_rate
=
0.99
table
.
accessor
.
dense_sgd_param
.
naive
.
learning_rate
=
0.0002
fea_dim
=
0
fea_dim
=
0
for
param
in
param_var
:
for
param
in
param_var
:
fea_dim
+=
reduce
(
lambda
x
,
y
:
x
*
y
,
param
.
shape
,
1
)
fea_dim
+=
reduce
(
lambda
x
,
y
:
x
*
y
,
param
.
shape
,
1
)
...
@@ -44,8 +72,8 @@ class DownpourWorker(Worker):
...
@@ -44,8 +72,8 @@ class DownpourWorker(Worker):
def
__init__
(
self
,
window
):
def
__init__
(
self
,
window
):
self
.
window
=
window
self
.
window
=
window
self
.
worker_
=
pslib
.
DownpourTrainerParameter
()
self
.
worker_
=
pslib
.
DownpourTrainerParameter
()
self
.
worker_
.
pull_dense_per_batch
=
window
#
self.worker_.pull_dense_per_batch = window
self
.
worker_
.
push_dense_per_batch
=
window
#
self.worker_.push_dense_per_batch = window
def
add_sparse_table
(
self
,
table_id
,
learning_rate
,
def
add_sparse_table
(
self
,
table_id
,
learning_rate
,
slot_key_vars
,
slot_value_vars
):
slot_key_vars
,
slot_value_vars
):
...
@@ -62,8 +90,8 @@ class DownpourWorker(Worker):
...
@@ -62,8 +90,8 @@ class DownpourWorker(Worker):
param_vars
,
grad_vars
):
param_vars
,
grad_vars
):
table
=
self
.
worker_
.
dense_table
.
add
()
table
=
self
.
worker_
.
dense_table
.
add
()
table
.
table_id
=
table_id
table
.
table_id
=
table_id
table
.
dense_variable_name
.
extend
(
[
p
.
name
for
p
in
param_vars
]
)
table
.
dense_variable_name
.
extend
(
filter
(
lambda
x
:
x
.
find
(
"embedding"
)
==
-
1
,
[
p
.
name
for
p
in
param_vars
])
)
table
.
dense_gradient_variable_name
.
extend
(
[
g
.
name
for
g
in
grad_vars
]
)
table
.
dense_gradient_variable_name
.
extend
(
filter
(
lambda
x
:
x
.
find
(
"embedding"
)
==
-
1
,
[
g
.
name
for
g
in
grad_vars
])
)
def
get_desc
(
self
):
def
get_desc
(
self
):
return
self
.
worker_
return
self
.
worker_
python/paddle/fluid/distributed/ps_pb2.py
浏览文件 @
6bc0efb4
...
@@ -531,21 +531,21 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor(
...
@@ -531,21 +531,21 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor(
_descriptor
.
FieldDescriptor
(
_descriptor
.
FieldDescriptor
(
name
=
'server_class'
,
full_name
=
'paddle.ServerServiceParameter.server_class'
,
index
=
0
,
name
=
'server_class'
,
full_name
=
'paddle.ServerServiceParameter.server_class'
,
index
=
0
,
number
=
1
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
number
=
1
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
has_default_value
=
True
,
default_value
=
_b
(
"
Abacus
BrpcPsServer"
).
decode
(
'utf-8'
),
has_default_value
=
True
,
default_value
=
_b
(
"
Downpour
BrpcPsServer"
).
decode
(
'utf-8'
),
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
options
=
None
),
_descriptor
.
FieldDescriptor
(
_descriptor
.
FieldDescriptor
(
name
=
'client_class'
,
full_name
=
'paddle.ServerServiceParameter.client_class'
,
index
=
1
,
name
=
'client_class'
,
full_name
=
'paddle.ServerServiceParameter.client_class'
,
index
=
1
,
number
=
2
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
number
=
2
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
has_default_value
=
True
,
default_value
=
_b
(
"
Abacus
BrpcPsClient"
).
decode
(
'utf-8'
),
has_default_value
=
True
,
default_value
=
_b
(
"
Downpour
BrpcPsClient"
).
decode
(
'utf-8'
),
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
options
=
None
),
_descriptor
.
FieldDescriptor
(
_descriptor
.
FieldDescriptor
(
name
=
'service_class'
,
full_name
=
'paddle.ServerServiceParameter.service_class'
,
index
=
2
,
name
=
'service_class'
,
full_name
=
'paddle.ServerServiceParameter.service_class'
,
index
=
2
,
number
=
3
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
number
=
3
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
has_default_value
=
True
,
default_value
=
_b
(
"
Abacus
PsService"
).
decode
(
'utf-8'
),
has_default_value
=
True
,
default_value
=
_b
(
"
Downpour
PsService"
).
decode
(
'utf-8'
),
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
options
=
None
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录