Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
02d68051
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
02d68051
编写于
11月 26, 2018
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add sparsed bias grad, test=develop
上级
42470f14
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
78 addition
and
29 deletion
+78
-29
paddle/fluid/operators/hierarchical_sigmoid_op.cc
paddle/fluid/operators/hierarchical_sigmoid_op.cc
+22
-10
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+24
-7
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+18
-0
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+5
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+2
-2
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+7
-10
未找到文件。
paddle/fluid/operators/hierarchical_sigmoid_op.cc
浏览文件 @
02d68051
...
...
@@ -107,8 +107,9 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
"it should have shape like [N, L], L is the length of the Path"
)
.
AsDispensable
();
AddInput
(
"Bias"
,
"(LoDTensor, optional), The bias is a tensor with shape"
"[1, num_classes - 1]."
);
"(LoDTensor, optional), The bias is a tensor with shape or "
"[non_leaf_num, 1]"
"[num_classes - 1, 1]."
);
AddOutput
(
"Out"
,
"(LoDTensor, required) The output of hierarchical sigmoid operator."
...
...
@@ -148,11 +149,11 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
"Output(W@Grad should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
"Output(X@Grad should not be null."
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Bias"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Bias"
),
ctx
->
GetInputDim
(
"Bias"
));
}
if
(
!
ctx
->
Attrs
().
Get
<
bool
>
(
"is_sparse"
))
{
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Bias"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Bias"
),
ctx
->
GetInputDim
(
"Bias"
));
}
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"W"
),
ctx
->
GetInputDim
(
"W"
));
}
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
...
...
@@ -172,20 +173,31 @@ class HierarchicalSigmoidGradOpGradVarTypeInference
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
out_var_name
=
op_desc
.
Output
(
framework
::
GradVarName
(
"W"
)).
front
();
auto
out_W_var_name
=
op_desc
.
Output
(
framework
::
GradVarName
(
"W"
)).
front
();
auto
out_Bias_var_name
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Bias"
)).
front
();
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
VLOG
(
3
)
<<
"hierarchical_sigmoid_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to SelectedRows"
;
block
->
Var
(
out_var_name
)
block
->
Var
(
out_W_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
VLOG
(
3
)
<<
"hierarchical_sigmoid_grad op "
<<
framework
::
GradVarName
(
"Bias"
)
<<
" is set to SelectedRows"
;
block
->
Var
(
out_Bias_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
VLOG
(
3
)
<<
"hierarchical_sigmoid_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to LoDTensor"
;
block
->
Var
(
out_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
out_W_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
VLOG
(
3
)
<<
"hierarchical_sigmoid_grad op "
<<
framework
::
GradVarName
(
"Bias"
)
<<
" is set to SelectedRows"
;
block
->
Var
(
out_Bias_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
block
->
Var
(
out_var_name
)
->
SetDataType
(
block
->
Var
(
"W"
)
->
GetDataType
());
block
->
Var
(
out_
W_
var_name
)
->
SetDataType
(
block
->
Var
(
"W"
)
->
GetDataType
());
}
};
...
...
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
02d68051
...
...
@@ -124,13 +124,12 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto
*
w
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"W"
);
auto
*
path
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PCode"
);
auto
*
bias
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Bias"
);
auto
*
in_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
bool
is_sparse
=
ctx
.
Attr
<
bool
>
(
"is_sparse"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
SetConstant
<
DeviceContext
,
T
>
zero
;
auto
*
bias_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
label
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Label"
);
auto
*
pre_out
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PreOut"
);
auto
*
out_grad
=
...
...
@@ -174,12 +173,15 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
pre_out_grad_mat
*
out_grad_mat
.
broadcast
(
bcast
);
// TODO(guosheng): multiply pre_out_grad with subgradient of clipping to
// be consistent with the clipping in forward.
if
(
bias_grad
)
{
bias_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
dev_ctx
,
bias_grad
,
static_cast
<
T
>
(
0.0
));
bit_code
->
AddGrad
(
pre_out_grad
,
bias_grad
);
}
if
(
!
is_sparse
)
{
auto
*
bias_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
bias_grad
)
{
bias_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
dev_ctx
,
bias_grad
,
static_cast
<
T
>
(
0.0
));
bit_code
->
AddGrad
(
pre_out_grad
,
bias_grad
);
}
auto
*
w_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
w_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
@@ -199,6 +201,21 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
w_grad_value
->
mutable_data
<
T
>
(
temp_dim
,
ctx
.
GetPlace
());
zero
(
dev_ctx
,
w_grad_value
,
static_cast
<
T
>
(
0.0
));
auto
*
bias_grad
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
bias_grad
)
{
bias_grad
->
set_rows
(
real_rows
);
// build ids -> rows index map
bias_grad
->
SyncIndex
();
bias_grad
->
set_height
(
bias
->
dims
()[
0
]);
auto
*
bias_grad_value
=
bias_grad
->
mutable_value
();
std
::
vector
<
int64_t
>
dims
=
{
static_cast
<
int64_t
>
(
real_rows
.
size
()),
bias
->
dims
()[
1
]};
bias_grad_value
->
mutable_data
<
T
>
(
framework
::
make_ddim
(
dims
),
ctx
.
GetPlace
());
zero
(
dev_ctx
,
bias_grad_value
,
static_cast
<
T
>
(
0.0
));
bit_code
->
AddGrad
(
pre_out_grad
,
bias_grad
);
}
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
*
in
);
}
bit_code
->
MulGradError
(
pre_out_grad
,
*
w
,
in_grad
);
...
...
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
02d68051
...
...
@@ -48,6 +48,24 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
}
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
SelectedRows
*
vec
)
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
int64_t
row_index
=
vec
->
AutoGrownIndex
(
static_cast
<
int64_t
>
(
index
),
false
,
true
);
vec
->
mutable_value
()
->
data
<
T
>
()[
row_index
]
+=
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
}
}
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoDTensor
*
sum
,
T
scale_sum
)
{
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
02d68051
...
...
@@ -241,6 +241,11 @@ class MatrixBitCodeFunctor {
*/
void
AddGrad
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoDTensor
*
vec
);
/* For selected rows For j < code_length
vec(0, index(i, j)) += tmat(i, j)
*/
void
AddGrad
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
SelectedRows
*
vec
);
/* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
02d68051
...
...
@@ -4639,14 +4639,14 @@ def hsigmoid(input,
if
not
is_costum
:
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
[
1
,
num_classes
-
1
],
shape
=
[
num_classes
-
1
,
1
],
is_bias
=
True
,
dtype
=
input
.
dtype
)
inputs
[
'Bias'
]
=
bias
else
:
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
[
1
,
non_leaf_num
],
shape
=
[
non_leaf_num
,
1
],
is_bias
=
True
,
dtype
=
input
.
dtype
)
inputs
[
'Bias'
]
=
bias
...
...
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
浏览文件 @
02d68051
...
...
@@ -77,7 +77,7 @@ def hsigmoid(x, w, label, bias, num_classes):
length
=
code_table
.
get_length
()
for
j
in
range
(
length
):
idx
=
code_table
.
cal_index
(
j
)
pre_output
[
i
][
j
]
+=
bias
[
0
][
idx
]
pre_output
[
i
][
j
]
+=
bias
[
idx
][
0
]
for
i
in
range
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
label
[
i
])
length
=
code_table
.
get_length
()
...
...
@@ -115,7 +115,7 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes):
length
=
code_table
.
get_length
()
for
j
in
range
(
length
):
idx
=
code_table
.
cal_index
(
j
)
pre_output
[
i
][
j
]
+=
bias
[
0
][
idx
]
pre_output
[
i
][
j
]
+=
bias
[
idx
][
0
]
for
i
in
range
(
batch_size
):
code_table
=
CodeTableWithCustomTree
(
ptable
,
pcode
,
i
)
length
=
code_table
.
get_length
()
...
...
@@ -150,7 +150,7 @@ class TestHSigmoidOp(OpTest):
w
=
np
.
random
.
random
(
(
num_classes
-
1
,
feature_size
)).
astype
(
"float32"
)
*
2
label
=
np
.
random
.
randint
(
0
,
num_classes
,
(
batch_size
,
1
))
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
bias
=
np
.
random
.
random
((
num_classes
-
1
,
1
)).
astype
(
"float32"
)
self
.
attrs
=
{
'num_classes'
:
num_classes
,
'is_sparse'
:
False
}
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'Label'
:
label
,
'Bias'
:
bias
}
pre_output
,
out
=
hsigmoid
(
x
,
w
,
label
,
bias
,
num_classes
)
...
...
@@ -178,7 +178,7 @@ class TestHSigmoidOpSparse(OpTest):
-
1
)])
#np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
pcode
=
np
.
array
([(
0
,
0
,
-
1
,
-
1
,
-
1
),
(
1
,
1
,
1
,
-
1
,
-
1
),
(
1
,
0
,
0
,
-
1
,
-
1
),
(
0
,
1
,
-
1
,
-
1
,
-
1
)])
#np.array to store
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
bias
=
np
.
random
.
random
((
num_classes
-
1
,
1
)).
astype
(
"float32"
)
self
.
attrs
=
{
'num_classes'
:
num_classes
,
'is_sparse'
:
True
}
self
.
inputs
=
{
'X'
:
x
,
...
...
@@ -193,7 +193,6 @@ class TestHSigmoidOpSparse(OpTest):
self
.
outputs
=
{
'PreOut'
:
pre_output
,
'Out'
:
out
}
def
test_check_output
(
self
):
print
(
"checking output in CostumTree"
)
self
.
check_output
()
...
...
@@ -208,7 +207,7 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
emb
=
fluid
.
layers
.
embedding
(
input
=
input_word
,
is_sparse
=
Fal
se
,
is_sparse
=
is_spar
se
,
size
=
[
3
,
3
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
scale
=
1
/
math
.
sqrt
(
3
))))
...
...
@@ -220,6 +219,7 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
ptable
=
ptable
,
pcode
=
pcode
,
is_costum
=
True
,
bias_attr
=
True
,
is_sparse
=
is_sparse
)
avg_cost
=
fluid
.
layers
.
reduce_mean
(
cost
)
...
...
@@ -240,7 +240,6 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
optimizer
.
minimize
(
loss
)
main_program
=
fluid
.
default_main_program
()
# print("main program: {program}".format{program=str(main_program)})
place
=
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
data_list
,
place
=
place
)
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -279,7 +278,7 @@ class TestHSigmoidOpWithCostumTree(OpTest):
-
1
)])
#np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
pcode
=
np
.
array
([(
0
,
0
,
-
1
,
-
1
,
-
1
),
(
1
,
1
,
1
,
-
1
,
-
1
),
(
1
,
0
,
0
,
-
1
,
-
1
),
(
0
,
1
,
-
1
,
-
1
,
-
1
)])
#np.array to store
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
bias
=
np
.
random
.
random
((
num_classes
-
1
,
1
)).
astype
(
"float32"
)
self
.
attrs
=
{
'num_classes'
:
num_classes
,
'is_sparse'
:
False
}
self
.
inputs
=
{
'X'
:
x
,
...
...
@@ -294,11 +293,9 @@ class TestHSigmoidOpWithCostumTree(OpTest):
self
.
outputs
=
{
'PreOut'
:
pre_output
,
'Out'
:
out
}
def
test_check_output
(
self
):
print
(
"checking output in CostumTree"
)
self
.
check_output
()
def
test_check_grad
(
self
):
print
(
"checking outputGrad in CostumTree"
)
self
.
check_grad
([
'Bias'
,
'X'
,
'W'
],
[
'Out'
],
no_grad_set
=
set
(
'Label'
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录