Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a86f11b5
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a86f11b5
编写于
12月 19, 2019
作者:
C
Chengmo
提交者:
GitHub
12月 19, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Speed GEO dense calc & communication (#21579)
* test=develop, speed dense calc & communication
上级
666c3bb9
变更
4
展开全部
显示空白变更内容
内联
并排
Showing
4 changed file
with
218 addition
and
108 deletion
+218
-108
paddle/fluid/operators/distributed/communicator.cc
paddle/fluid/operators/distributed/communicator.cc
+171
-93
paddle/fluid/operators/distributed/communicator.h
paddle/fluid/operators/distributed/communicator.h
+22
-9
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+24
-5
python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
+1
-1
未找到文件。
paddle/fluid/operators/distributed/communicator.cc
浏览文件 @
a86f11b5
此差异已折叠。
点击以展开。
paddle/fluid/operators/distributed/communicator.h
浏览文件 @
a86f11b5
...
@@ -364,12 +364,14 @@ class GeoSgdCommunicator : public Communicator {
...
@@ -364,12 +364,14 @@ class GeoSgdCommunicator : public Communicator {
const
std
::
vector
<
SparseIdsMap
>&
ids_send_vec
,
const
std
::
vector
<
SparseIdsMap
>&
ids_send_vec
,
const
std
::
string
&
var_name
,
const
std
::
string
&
splited_var_name
);
const
std
::
string
&
var_name
,
const
std
::
string
&
splited_var_name
);
void
SendUpdateDenseVars
(
const
std
::
string
&
var_name
);
void
SendUpdateDenseVars
(
const
std
::
string
&
var_name
,
const
std
::
string
&
splited_var_name
);
void
SendUpdateSparseVars
(
const
std
::
string
&
var_name
,
void
SendUpdateSparseVars
(
const
std
::
string
&
var_name
,
const
std
::
string
&
splited_var_name
,
const
std
::
string
&
splited_var_name
,
const
std
::
unordered_set
<
int64_t
>&
ids_table
);
const
std
::
unordered_set
<
int64_t
>&
ids_table
);
void
RecvUpdateDenseVars
(
const
std
::
string
&
var_name
);
void
RecvUpdateDenseVars
(
const
std
::
string
&
var_name
,
const
std
::
string
&
splited_var_name
);
void
RecvUpdateSparseVars
(
const
std
::
string
&
var_name
,
void
RecvUpdateSparseVars
(
const
std
::
string
&
var_name
,
const
std
::
string
&
splited_var_name
);
const
std
::
string
&
splited_var_name
);
...
@@ -420,21 +422,32 @@ class GeoSgdCommunicator : public Communicator {
...
@@ -420,21 +422,32 @@ class GeoSgdCommunicator : public Communicator {
int
trainer_nums_
=
1
;
int
trainer_nums_
=
1
;
size_t
geo_need_push_nums_
=
100
;
size_t
geo_need_push_nums_
=
100
;
bool
is_geo_sgd_
=
false
;
bool
is_geo_sgd_
=
false
;
Scope
*
training_scope_
;
int
send_var_nums_
=
0
;
std
::
shared_ptr
<
Scope
>
delta_scope_
;
// parameter local delta: recv - old
std
::
shared_ptr
<
Scope
>
old_scope_
;
// parameter local, storage the param after last recv
std
::
shared_ptr
<
Scope
>
pserver_scope_
;
// parameter on pserver,gloabl scope
RpcCtxMap
send_varname_to_ctx_
;
RpcCtxMap
send_varname_to_ctx_
;
RpcCtxMap
recv_varname_to_ctx_
;
RpcCtxMap
recv_varname_to_ctx_
;
std
::
unordered_map
<
std
::
string
,
bool
>
var_list_
;
// if var is sparse, using selected rows, bool=true
// parameter for local training
Scope
*
training_scope_
;
// parameter for delta calc and send
std
::
shared_ptr
<
Scope
>
delta_scope_
;
// parameter for storage the pserver param after last recv
std
::
shared_ptr
<
Scope
>
old_scope_
;
// parameter on pserver
std
::
shared_ptr
<
Scope
>
pserver_scope_
;
// if var is sparse, using selected rows, bool=true
std
::
unordered_map
<
std
::
string
,
bool
>
var_list_
;
std
::
shared_ptr
<
BlockingQueue
<
std
::
shared_ptr
<
SparseIdsMap
>>>
std
::
shared_ptr
<
BlockingQueue
<
std
::
shared_ptr
<
SparseIdsMap
>>>
need_push_queue_
;
need_push_queue_
;
std
::
vector
<
SparseIdsMap
>
ids_send_vec_
;
std
::
vector
<
SparseIdsMap
>
ids_send_vec_
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
absolute_section_
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
absolute_section_
;
std
::
unordered_map
<
std
::
string
,
int64_t
>
vars_first_dimension_
;
std
::
unique_ptr
<::
ThreadPool
>
send_threadpool_
{
nullptr
};
std
::
unique_ptr
<::
ThreadPool
>
send_threadpool_
{
nullptr
};
std
::
unique_ptr
<
std
::
thread
>
send_thread_
{
nullptr
};
std
::
unique_ptr
<
std
::
thread
>
send_thread_
{
nullptr
};
...
...
python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
浏览文件 @
a86f11b5
...
@@ -11,6 +11,9 @@
...
@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
Distribute CTR model for test fleet api
"""
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -30,10 +33,22 @@ fluid.default_main_program().random_seed = 1
...
@@ -30,10 +33,22 @@ fluid.default_main_program().random_seed = 1
class
TestDistCTR2x2
(
FleetDistRunnerBase
):
class
TestDistCTR2x2
(
FleetDistRunnerBase
):
"""
For test CTR model, using Fleet api
"""
def
net
(
self
,
batch_size
=
4
,
lr
=
0.01
):
def
net
(
self
,
batch_size
=
4
,
lr
=
0.01
):
"""
network definition
Args:
batch_size(int): the size of mini-batch for training
lr(float): learning rate of training
Returns:
avg_cost: LoDTensor of cost.
"""
dnn_input_dim
,
lr_input_dim
,
train_file_path
=
ctr_dataset_reader
.
prepare_data
(
dnn_input_dim
,
lr_input_dim
,
train_file_path
=
ctr_dataset_reader
.
prepare_data
(
)
)
""" network definition """
dnn_data
=
fluid
.
layers
.
data
(
dnn_data
=
fluid
.
layers
.
data
(
name
=
"dnn_data"
,
name
=
"dnn_data"
,
shape
=
[
-
1
,
1
],
shape
=
[
-
1
,
1
],
...
@@ -56,7 +71,8 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -56,7 +71,8 @@ class TestDistCTR2x2(FleetDistRunnerBase):
datas
=
[
dnn_data
,
lr_data
,
label
]
datas
=
[
dnn_data
,
lr_data
,
label
]
# build dnn model
# build dnn model
dnn_layer_dims
=
[
128
,
64
,
32
,
1
]
# add 12800 for test huge dense Variable
dnn_layer_dims
=
[
128
,
128000
,
64
,
32
,
1
]
dnn_embedding
=
fluid
.
layers
.
embedding
(
dnn_embedding
=
fluid
.
layers
.
embedding
(
is_distributed
=
False
,
is_distributed
=
False
,
input
=
dnn_data
,
input
=
dnn_data
,
...
@@ -116,6 +132,11 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -116,6 +132,11 @@ class TestDistCTR2x2(FleetDistRunnerBase):
wn
.
write
(
str
(
program
))
wn
.
write
(
str
(
program
))
def
do_training
(
self
,
fleet
):
def
do_training
(
self
,
fleet
):
"""
do training using dataset, using fetch handler to catch variable
Args:
fleet(Fleet api): the fleet object of Parameter Server, define distribute training role
"""
dnn_input_dim
,
lr_input_dim
,
train_file_path
=
ctr_dataset_reader
.
prepare_data
(
dnn_input_dim
,
lr_input_dim
,
train_file_path
=
ctr_dataset_reader
.
prepare_data
(
)
)
...
@@ -163,9 +184,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -163,9 +184,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
exe
.
train_from_dataset
(
exe
.
train_from_dataset
(
program
=
fleet
.
main_program
,
program
=
fleet
.
main_program
,
dataset
=
dataset
,
dataset
=
dataset
,
fetch_handler
=
FH
([
self
.
avg_cost
.
name
],
fetch_handler
=
FH
([
self
.
avg_cost
.
name
],
period_secs
=
2
),
period_secs
=
2
,
return_np
=
True
),
debug
=
False
)
debug
=
False
)
pass_time
=
time
.
time
()
-
pass_start
pass_time
=
time
.
time
()
-
pass_start
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
浏览文件 @
a86f11b5
...
@@ -46,7 +46,7 @@ class TestDistGeoCtr_2x2(TestFleetBase):
...
@@ -46,7 +46,7 @@ class TestDistGeoCtr_2x2(TestFleetBase):
required_envs
.
update
(
need_envs
)
required_envs
.
update
(
need_envs
)
if
check_error_log
:
if
check_error_log
:
required_envs
[
"GLOG_v"
]
=
"
3
"
required_envs
[
"GLOG_v"
]
=
"
4
"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录