Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5e74c4e8
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5e74c4e8
编写于
1月 10, 2019
作者:
乔
乔龙飞 Qiao Longfei
提交者:
GitHub
1月 10, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15100 from jacquesqiao/fix-dist-sparse-decay
fix dist sparse l2 decay
上级
2abdd5dd
40330c2c
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
47 addition
and
12 deletion
+47
-12
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+4
-0
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+9
-1
python/paddle/fluid/tests/unittests/dist_ctr.py
python/paddle/fluid/tests/unittests/dist_ctr.py
+9
-1
python/paddle/fluid/tests/unittests/dist_se_resnext.py
python/paddle/fluid/tests/unittests/dist_se_resnext.py
+0
-1
python/paddle/fluid/tests/unittests/test_dist_ctr.py
python/paddle/fluid/tests/unittests/test_dist_ctr.py
+14
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+11
-8
未找到文件。
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
5e74c4e8
...
...
@@ -195,6 +195,10 @@ struct SelectedRowsAddToTensor<platform::CPUDeviceContext, T> {
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
if
(
UNLIKELY
(
input1
.
rows
().
size
()
==
0
))
{
LOG
(
WARNING
)
<<
"input selected rows is empty!"
;
return
;
}
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
]);
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
5e74c4e8
...
...
@@ -41,7 +41,9 @@ class SumOp : public framework::OperatorWithKernel {
return
;
// skip runtime infershape when is tensor array;
}
auto
x_var_types
=
ctx
->
GetInputsVarType
(
"X"
);
auto
x_dims
=
ctx
->
GetInputsDim
(
"X"
);
size_t
N
=
x_dims
.
size
();
PADDLE_ENFORCE_GT
(
N
,
0
,
"Input tensors count should > 0."
);
if
(
N
==
1
)
{
...
...
@@ -49,7 +51,13 @@ class SumOp : public framework::OperatorWithKernel {
}
framework
::
DDim
in_dim
({
0
});
for
(
auto
&
x_dim
:
x_dims
)
{
for
(
size_t
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
auto
&
x_dim
=
x_dims
[
i
];
// x_dim.size() == 1 means the real dim of selected rows is [0]
if
(
x_var_types
[
i
]
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
&&
x_dim
.
size
()
==
1
)
{
continue
;
}
if
(
framework
::
product
(
x_dim
)
==
0
)
{
continue
;
}
...
...
python/paddle/fluid/tests/unittests/dist_ctr.py
浏览文件 @
5e74c4e8
...
...
@@ -31,6 +31,7 @@ fluid.default_main_program().random_seed = 1
class
TestDistCTR2x2
(
TestDistRunnerBase
):
def
get_model
(
self
,
batch_size
=
2
):
dnn_input_dim
,
lr_input_dim
=
dist_ctr_reader
.
load_data_meta
()
""" network definition """
dnn_data
=
fluid
.
layers
.
data
(
...
...
@@ -97,7 +98,14 @@ class TestDistCTR2x2(TestDistRunnerBase):
inference_program
=
paddle
.
fluid
.
default_main_program
().
clone
()
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0001
)
regularization
=
None
use_l2_decay
=
bool
(
os
.
getenv
(
'USE_L2_DECAY'
,
0
))
if
use_l2_decay
:
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-1
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0001
,
regularization
=
regularization
)
sgd_optimizer
.
minimize
(
avg_cost
)
dataset
=
dist_ctr_reader
.
Dataset
()
...
...
python/paddle/fluid/tests/unittests/dist_se_resnext.py
浏览文件 @
5e74c4e8
...
...
@@ -235,7 +235,6 @@ class DistSeResneXt2x2(TestDistRunnerBase):
bd
=
[
step
*
e
for
e
in
epochs
]
base_lr
=
0.1
lr
=
[]
lr
=
[
base_lr
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]
optimizer
=
fluid
.
optimizer
.
Momentum
(
...
...
python/paddle/fluid/tests/unittests/test_dist_ctr.py
浏览文件 @
5e74c4e8
...
...
@@ -18,7 +18,6 @@ import unittest
from
test_dist_base
import
TestDistBase
# FIXME(tangwei): sum op can not handle when inputs is empty.
class
TestDistCTR2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
...
...
@@ -28,5 +27,19 @@ class TestDistCTR2x2(TestDistBase):
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
)
class
TestDistCTRWithL2Decay2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_enforce_place
=
"CPU"
def
test_dist_ctr
(
self
):
need_envs
=
{
"USE_L2_DECAY"
:
"1"
}
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
,
need_envs
=
need_envs
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
5e74c4e8
...
...
@@ -752,12 +752,6 @@ class DistributeTranspiler(object):
elif
op
not
in
lr_ops
:
self
.
_append_pserver_non_opt_ops
(
block
,
op
)
def
__op_have_grad_input__
(
op
):
for
varname
in
op
.
input_arg_names
:
if
varname
.
find
(
"@GRAD"
)
>=
0
:
return
varname
return
""
def
__clone_lr_op_sub_block__
(
op
,
program
,
lr_block
):
if
not
op
.
has_attr
(
'sub_block'
):
return
...
...
@@ -808,7 +802,7 @@ class DistributeTranspiler(object):
merged_var
=
None
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
# find the origin grad var before clipping/L2Decay,
# merged_var should be the input var name of L2Decay
buil
# merged_var should be the input var name of L2Decay
grad_varname_for_block
=
op
.
attr
(
OP_ROLE_VAR_ATTR_NAME
)[
1
]
if
op
.
attr
(
OP_ROLE_VAR_ATTR_NAME
)[
0
]
==
optimize_target_param_name
:
...
...
@@ -1684,7 +1678,16 @@ class DistributeTranspiler(object):
if
self
.
config
.
enable_dc_asgd
:
new_inputs
[
key
]
=
dc
else
:
new_inputs
[
key
]
=
merged_var
# Note!! This is for l2decay on sparse gradient, because it will create a new tensor for
# decayed gradient but not inplace modify the origin one
origin_grad_name
=
opt_op
.
input
(
key
)[
0
]
if
core
.
kNewGradSuffix
(
)
in
origin_grad_name
and
pserver_block
.
has_var
(
origin_grad_name
):
new_grad
=
pserver_block
.
var
(
origin_grad_name
)
new_inputs
[
key
]
=
new_grad
else
:
new_inputs
[
key
]
=
merged_var
elif
key
==
"Param"
:
param_block
=
_get_param_block
(
opt_op
)
if
not
param_block
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录