Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ee13b396
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ee13b396
编写于
6月 15, 2018
作者:
W
weixing02
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some errors
上级
8bd148dc
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
73 addition
and
83 deletion
+73
-83
paddle/fluid/operators/hierarchical_sigmoid_op.cc
paddle/fluid/operators/hierarchical_sigmoid_op.cc
+19
-15
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+7
-5
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+0
-37
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+27
-5
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+9
-10
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+9
-9
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+2
-2
未找到文件。
paddle/fluid/operators/hierarchical_sigmoid_op.cc
浏览文件 @
ee13b396
...
...
@@ -62,7 +62,7 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Ids"
),
"Input(Ids
) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Label"
),
"Input(Label
) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"Input(W) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PreOut"
),
...
...
@@ -87,19 +87,18 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor, required) The input Tensor, which the shape is"
"[N
*
D], which N is the size of mini-batch,"
"[N
,
D], which N is the size of mini-batch,"
"D is the embded size"
);
AddInput
(
"W"
,
"(Tensor, required), The parameters of hierarchical "
"sigmoid operator, each of them is s a
3
-D tensor, the shape is"
"sigmoid operator, each of them is s a
2
-D tensor, the shape is"
"[num_classes - 1, D]"
);
AddInput
(
"
Ids
"
,
AddInput
(
"
Label
"
,
"(Tensor, required), The labels of training data. It's a"
"1-D tensor, which the shape is [1, N]"
);
AddInput
(
"Bias"
,
"(Tensor, optional), The bias is a 1-D tensor, "
"which is applied to the output, the shape is"
"[1, num_classes -1]"
);
"(Tensor, optional), The bias is a tensor with shape"
"[1, num_classes - 1]"
);
AddOutput
(
"Out"
,
"(Tensor, required) The output of hierarchical sigmoid operator."
"the shape is [N, 1]"
);
...
...
@@ -111,7 +110,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
2
);
AddComment
(
R"DOC(
The hierarchical sigmoid operator organize the classes into a binary tree.
At each node, a sigmoid function is used to caculate the probability of
At each node, a sigmoid function is used to ca
l
culate the probability of
belonging to the right branch. This idea is from
"F. Morin, Y. Bengio (AISTATS 05):
Hierarchical Probabilistic Neural Network Language Model."
...
...
@@ -124,7 +123,7 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"Input(W) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Ids"
),
"Input(Ids
) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Label"
),
"Input(Label
) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"PreOut"
),
"Input(Preout) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"W"
)),
...
...
@@ -155,9 +154,14 @@ REGISTER_OPERATOR(hierarchical_sigmoid, ops::HierarchicalSigmoidOp,
ops
::
HierarchicalSigmoidOpMaker
<
int
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOp
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
HierarchicalSigmoidOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
HierarchicalSigmoidGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
ee13b396
...
...
@@ -34,7 +34,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
Tensor
>
(
"W"
);
auto
*
ids
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Ids
"
);
auto
*
label
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Label
"
);
auto
*
bias
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Bias"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
pre_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PreOut"
);
...
...
@@ -50,7 +50,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
zero
(
dev_ctx
,
pre_out
,
static_cast
<
T
>
(
0.0
));
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
math
::
RowwiseSum
<
DeviceContext
,
T
>
row_sum
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
(
num_classes
,
ids
->
data
<
int64_t
>
());
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
(
num_classes
,
label
->
data
<
int64_t
>
());
std
::
vector
<
int64_t
>
sum_dims
({
batch_size
,
1UL
});
sum
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
sum_dims
),
ctx
.
GetPlace
());
...
...
@@ -87,7 +87,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto
*
w_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"W"
));
auto
*
bias_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
ids
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Ids
"
);
auto
*
label
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Label
"
);
auto
*
pre_out
=
ctx
.
Input
<
framework
::
Tensor
>
(
"PreOut"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
...
...
@@ -101,9 +101,11 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
*
pre_out
);
auto
pre_out_grad_mat
=
EigenMatrix
<
T
>::
From
(
pre_out_grad
);
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
(
num_classes
,
ids
->
data
<
int64_t
>
());
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
(
num_classes
,
label
->
data
<
int64_t
>
());
// softrelu derivative
bit_code
.
OutGrad
(
&
pre_out_grad
,
*
out_grad
);
Eigen
::
array
<
int
,
2
>
bcast
({
1
,
static_cast
<
int
>
(
pre_out_grad
.
dims
()[
1
])});
auto
out_grad_mat
=
EigenMatrix
<
T
>::
From
(
*
out_grad
);
pre_out_grad_mat
=
out_grad_mat
.
broadcast
(
bcast
);
pre_out_grad_mat
.
device
(
place
)
=
pre_out_grad_mat
*
(
static_cast
<
T
>
(
1.0
)
-
static_cast
<
T
>
(
1.0
)
/
pre_out_mat
.
exp
());
...
...
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
ee13b396
...
...
@@ -18,32 +18,6 @@ namespace paddle {
namespace
operators
{
namespace
math
{
/**
* CodeTable class should support 3 functions:
*
* size_t size()
* return the number of ids
*
* int getMaxCodeLength()
* return the maximal code length
*
* Code operator()(size_t i)
* return the i-th code. Code class is descriebed below.
*
* Code class should support 3 functions:
*
* int getLength()
* return the length of the code
*
* bool calcIndex(int bit)
* bit ranges from 0 to getLength() - 1
* return the index for the (1+bit) level parent
*
* bool calcBit(int bit)
* return true if the bit level parent is the right child of (1+bit) level
* parent
*
*/
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
vec
)
{
...
...
@@ -192,17 +166,6 @@ void MatrixBitCodeFunctor<T>::Sub(framework::Tensor* tmat) {
}
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
OutGrad
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
input
)
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
code_length
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
for
(
size_t
j
=
0
;
j
<
code_length
;
++
j
)
{
tmat
->
data
<
T
>
()[
i
*
code_length
+
j
]
=
input
.
data
<
T
>
()[
i
];
}
}
template
class
MatrixBitCodeFunctor
<
float
>;
template
class
MatrixBitCodeFunctor
<
double
>;
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
ee13b396
...
...
@@ -20,13 +20,39 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
namespace
math
{
/**
* SimpleCodeTable class should support 3 functions:
*
* size_t size()
* return the number of ids
*
* int get_max_code_length()
* return the maximal code length
*
* SimpleCode operator()(size_t i)
* return the i-th code. Code class is descriebed below.
*
* SimpleCode class should support 3 functions:
*
* int get_length()
* return the length of the code
*
* size_t cal_index(int bit)
* bit ranges from 0 to get_length() - 1
* return the index for the (1+bit) level parent
*
* bool calc_bit(int bit)
* return true if the bit level parent is the right child of (1+bit) level
* parent
*
*/
/**
* return the 1-based index of the highest bit set
*
* for x > 0:
* \f[
*
f
indLastSet(x) = 1 + \floor*{\log_{2}x}
*
F
indLastSet(x) = 1 + \floor*{\log_{2}x}
* \f]
*/
inline
constexpr
size_t
FindLastSet
(
size_t
x
)
{
...
...
@@ -100,10 +126,6 @@ class MatrixBitCodeFunctor {
*/
void
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
);
/* For j < code_length
tmat(i, j) == input(i)
*/
void
OutGrad
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
input
);
size_t
num_classes_
;
const
int64_t
*
ids_
;
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
ee13b396
...
...
@@ -3571,18 +3571,17 @@ def hsigmoid(input, label, num_classes=2, param_attr=None, bias_attr=None):
shape
=
[
num_classes
-
1
,
dim
],
is_bias
=
False
,
dtype
=
input
.
dtype
)
inputs
=
{
"X"
:
input
,
"W"
:
weights
,
"Label"
:
label
}
if
helper
.
bias_attr
:
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
[
1
,
num_classes
-
1
],
is_bias
=
True
,
dtype
=
input
.
dtype
)
inputs
[
'Bias'
]
=
bias
helper
.
append_op
(
type
=
"hierarchical_sigmoid"
,
inputs
=
{
"X"
:
input
,
"W"
:
weights
,
"Ids"
:
label
,
"Bias"
:
bias
},
inputs
=
inputs
,
outputs
=
{
"Out"
:
out
,
"PreOut"
:
pre_out
},
attrs
=
{
"num_classes"
:
num_classes
})
...
...
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
浏览文件 @
ee13b396
...
...
@@ -36,7 +36,7 @@ class CodeTable(object):
return
self
.
c
&
(
1
<<
bit
)
def
hsigmoid
(
x
,
w
,
ids
,
bias
,
num_classes
):
def
hsigmoid
(
x
,
w
,
label
,
bias
,
num_classes
):
global
pre_output
batch_size
=
x
.
shape
[
0
]
code_length
=
find_latest_set
(
num_classes
-
1
)
...
...
@@ -45,13 +45,13 @@ def hsigmoid(x, w, ids, bias, num_classes):
pre_sum
=
np
.
zeros
((
batch_size
,
1
))
out
=
np
.
zeros
((
batch_size
,
1
)).
astype
(
"float32"
)
for
i
in
range
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
ids
[
i
])
code_table
=
CodeTable
(
num_classes
,
label
[
i
])
length
=
code_table
.
get_length
()
for
j
in
range
(
length
):
idx
=
code_table
.
cal_index
(
j
)
pre_output
[
i
][
j
]
+=
bias
[
0
][
idx
]
for
j
in
range
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
ids
[
j
])
code_table
=
CodeTable
(
num_classes
,
label
[
j
])
length
=
code_table
.
get_length
()
for
k
in
range
(
length
):
idx
=
code_table
.
cal_index
(
k
)
...
...
@@ -60,10 +60,10 @@ def hsigmoid(x, w, ids, bias, num_classes):
sum
+=
w
[
idx
][
l
]
*
x
[
j
][
l
]
pre_output
[
j
][
k
]
+=
sum
# clip[-40.0, 40.0]
np
.
clip
(
pre_output
,
-
40.0
,
40.0
)
pre_output
=
np
.
clip
(
pre_output
,
-
40.0
,
40.0
)
# out(i, 0) = \sum_j bit(i, j) * preout(i, j)
for
i
in
range
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
ids
[
i
])
code_table
=
CodeTable
(
num_classes
,
label
[
i
])
length
=
code_table
.
get_length
()
sum
=
0.0
for
j
in
range
(
length
):
...
...
@@ -86,18 +86,18 @@ class TestHSigmoidOp(OpTest):
batch_size
=
1
x
=
np
.
random
.
random
((
batch_size
,
embded_size
)).
astype
(
"float32"
)
w
=
np
.
random
.
random
((
num_classes
-
1
,
embded_size
)).
astype
(
"float32"
)
ids
=
np
.
random
.
randint
(
0
,
num_classes
,
batch_size
)
label
=
np
.
random
.
randint
(
0
,
num_classes
,
batch_size
)
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
self
.
attrs
=
{
'num_classes'
:
num_classes
}
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'
Ids'
:
ids
,
'Bias'
:
bias
}
out
=
hsigmoid
(
x
,
w
,
ids
,
bias
,
num_classes
)
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'
Label'
:
label
,
'Bias'
:
bias
}
out
=
hsigmoid
(
x
,
w
,
label
,
bias
,
num_classes
)
self
.
outputs
=
{
'PreOut'
:
pre_output
,
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'Bias'
,
'X'
,
'W'
],
'Out'
,
no_grad_set
=
set
(
'
Ids
'
))
self
.
check_grad
([
'Bias'
,
'X'
,
'W'
],
'Out'
,
no_grad_set
=
set
(
'
Label
'
))
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
ee13b396
...
...
@@ -176,8 +176,8 @@ class TestBook(unittest.TestCase):
def
test_hsigmoid
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
2
,
2
],
dtype
=
'float32'
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
,
2
],
dtype
=
'int64'
)
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
2
],
dtype
=
'float32'
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
2
],
dtype
=
'int64'
)
self
.
assertIsNotNone
(
layers
.
hsigmoid
(
input
=
x
,
label
=
y
,
num_classes
=
2
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录