Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
014e50c2
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
014e50c2
编写于
11月 21, 2018
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test=develop
上级
ba9ff508
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
349 addition
and
171 deletion
+349
-171
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+6
-0
paddle/fluid/operators/hierarchical_sigmoid_op.cc
paddle/fluid/operators/hierarchical_sigmoid_op.cc
+51
-17
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+66
-26
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+43
-42
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+33
-20
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+7
-3
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+143
-63
未找到文件。
paddle/fluid/framework/mixed_vector.h
浏览文件 @
014e50c2
...
...
@@ -533,6 +533,12 @@ class CPUVector : public std::vector<T, std::allocator<T>> {
return
os
;
}
size_t
size
()
const
noexcept
{
size_t
size
=
static_cast
<
size_t
>
(
std
::
vector
<
T
,
std
::
allocator
<
T
>>::
size
());
return
size
;
}
T
&
operator
[](
size_t
id
)
{
return
this
->
at
(
id
);
}
const
T
&
operator
[](
size_t
id
)
const
{
return
this
->
at
(
id
);
}
...
...
paddle/fluid/operators/hierarchical_sigmoid_op.cc
浏览文件 @
014e50c2
...
...
@@ -70,13 +70,14 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
const
int64_t
batch_size
=
ctx
->
GetInputDim
(
"X"
)[
0
];
std
::
vector
<
int64_t
>
output_shape
({
batch_size
,
1
});
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
());
}
};
...
...
@@ -86,32 +87,34 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor, required) The input tensor with shape [N, D], "
"(
LoD
Tensor, required) The input tensor with shape [N, D], "
"where N is the size of mini-batch, and D is the feature size."
);
AddInput
(
"W"
,
"(Tensor, required), The parameters of hierarchical "
"(
LoD
Tensor, required), The parameters of hierarchical "
"sigmoid operator, each of them is a 2-D tensor, the shape is"
"[K, D]. Which K is the num of non-leaf node in Path Tree"
);
AddInput
(
"Label"
,
"(Tensor, required), The labels of training data. It's a"
"(
LoD
Tensor, required), The labels of training data. It's a"
"tensor with shape [N, 1]."
);
AddInput
(
"PTable"
,
"(Tensor, optional), The Path Table from root to current word"
"(
LoD
Tensor, optional), The Path Table from root to current word"
"it should have shape like [N, L], L is the length of the Path"
)
.
AsDispensable
();
AddInput
(
"PCode"
,
"(Tensor, optional), The Code on each Node of the Path from root "
"to current word"
"it should have shape like [N, L], L is the length of the Path"
)
AddInput
(
"PCode"
,
"(LoDTensor, optional), The Code on each Node of the Path from root "
"to current word"
"it should have shape like [N, L], L is the length of the Path"
)
.
AsDispensable
();
AddInput
(
"Bias"
,
"(Tensor, optional), The bias is a tensor with shape"
"(
LoD
Tensor, optional), The bias is a tensor with shape"
"[1, num_classes - 1]."
);
AddOutput
(
"Out"
,
"(Tensor, required) The output of hierarchical sigmoid operator."
"The shape is [N, 1]."
);
AddOutput
(
"Out"
,
"(LoDTensor, required) The output of hierarchical sigmoid operator."
"The shape is [N, 1]."
);
AddOutput
(
"PreOut"
,
"(Tensor, required) A intermedia 2-D tensor with shape "
"(
LoD
Tensor, required) A intermedia 2-D tensor with shape "
"[batch_size, code_length], where code_length represents the "
"maximum path length from root to leaf nodes."
)
.
AsIntermediate
();
...
...
@@ -124,6 +127,10 @@ belonging to the right branch. This idea is from
"F. Morin, Y. Bengio (AISTATS 05):
Hierarchical Probabilistic Neural Network Language Model."
)DOC"
);
AddAttr
<
bool
>
(
"is_sparse"
,
"(boolean, default false) "
"Sparse update."
)
.
SetDefault
(
false
);
}
};
...
...
@@ -133,6 +140,8 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"Input(W) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
),
"Input(Label) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@Grad) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"PreOut"
),
"Input(Preout) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"W"
)),
...
...
@@ -142,7 +151,9 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Bias"
),
ctx
->
GetInputDim
(
"Bias"
));
}
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"W"
),
ctx
->
GetInputDim
(
"W"
));
if
(
!
ctx
->
Attrs
().
Get
<
bool
>
(
"is_sparse"
))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"W"
),
ctx
->
GetInputDim
(
"W"
));
}
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
}
...
...
@@ -150,11 +161,33 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
());
}
};
class
HierarchicalSigmoidGradOpGradVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
out_var_name
=
op_desc
.
Output
(
framework
::
GradVarName
(
"W"
)).
front
();
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
VLOG
(
3
)
<<
"hierarchical_sigmoid_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to SelectedRows"
;
block
->
Var
(
out_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
VLOG
(
3
)
<<
"hierarchical_sigmoid_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to LoDTensor"
;
block
->
Var
(
out_var_name
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
block
->
Var
(
out_var_name
)
->
SetDataType
(
block
->
Var
(
"W"
)
->
GetDataType
());
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -162,7 +195,8 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOp
,
ops
::
HierarchicalSigmoidOpMaker
<
int
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOp
);
REGISTER_OPERATOR
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOp
,
ops
::
HierarchicalSigmoidGradOpGradVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
014e50c2
...
...
@@ -14,9 +14,10 @@ limitations under the License. */
#pragma once
#include <iostream>
#include <set>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/clip_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/matrix_bit_code.h"
...
...
@@ -29,18 +30,37 @@ template <typename T, int MajorType = Eigen::RowMajor,
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
using
platform
::
Transform
;
std
::
vector
<
int64_t
>
cal_rows
(
const
framework
::
LoDTensor
*
path
)
{
std
::
set
<
int64_t
>
tmp
;
std
::
vector
<
int64_t
>
rows
;
rows
.
clear
();
for
(
size_t
i
=
0
;
i
<
static_cast
<
size_t
>
(
path
->
dims
()[
0
]);
i
++
)
{
for
(
size_t
j
=
0
;
j
<
static_cast
<
size_t
>
(
path
->
dims
()[
1
]);
j
++
)
{
int64_t
temp
=
path
->
data
<
int64_t
>
()[
i
*
static_cast
<
size_t
>
(
path
->
dims
()[
1
])
+
j
];
if
(
temp
>=
0
)
{
tmp
.
insert
(
temp
);
}
}
}
for
(
std
::
set
<
int64_t
>::
iterator
it
=
tmp
.
begin
();
it
!=
tmp
.
end
();
++
it
)
{
rows
.
push_back
(
*
it
);
}
return
rows
;
}
template
<
typename
DeviceContext
,
typename
T
>
class
HierarchicalSigmoidOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
Tensor
>
(
"W"
);
auto
*
path
=
ctx
.
Input
<
framework
::
Tensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
Tensor
>
(
"PCode"
);
auto
*
label
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Label"
);
auto
*
bias
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Bias"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
pre_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PreOut"
);
auto
*
in
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"W"
);
auto
*
path
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"PCode"
);
auto
*
label
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"Label"
);
auto
*
bias
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"Bias"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoD
Tensor
>
(
"Out"
);
auto
*
pre_out
=
ctx
.
Output
<
framework
::
LoD
Tensor
>
(
"PreOut"
);
size_t
num_classes
=
static_cast
<
size_t
>
(
ctx
.
Attr
<
int
>
(
"num_classes"
));
bool
is_custom
=
false
;
if
(
path
)
{
...
...
@@ -51,7 +71,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
int64_t
code_length
=
path
?
path
->
dims
()[
1
]
:
math
::
FindLastSet
(
num_classes
-
1
);
int64_t
batch_size
=
in
->
dims
()[
0
];
framework
::
Tensor
sum
;
framework
::
LoD
Tensor
sum
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
*
pre_out_data
=
pre_out
->
mutable_data
<
T
>
(
framework
::
make_ddim
({
batch_size
,
code_length
}),
ctx
.
GetPlace
());
...
...
@@ -102,27 +122,26 @@ template <typename DeviceContext, typename T>
class
HierarchicalSigmoidGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
Tensor
>
(
"W"
);
auto
*
path
=
ctx
.
Input
<
framework
::
Tensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
Tensor
>
(
"PCode"
);
auto
*
in_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
w_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"W"
));
auto
*
in
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"W"
);
auto
*
path
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PCode"
);
auto
*
in_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
bool
is_sparse
=
ctx
.
Attr
<
bool
>
(
"is_sparse"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
SetConstant
<
DeviceContext
,
T
>
zero
;
auto
*
bias_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
label
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Label"
);
auto
*
pre_out
=
ctx
.
Input
<
framework
::
Tensor
>
(
"PreOut"
);
ctx
.
Output
<
framework
::
LoD
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
label
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"Label"
);
auto
*
pre_out
=
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
"PreOut"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
framework
::
Tensor
pre_out_grad
;
ctx
.
Input
<
framework
::
LoD
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
framework
::
LoD
Tensor
pre_out_grad
;
pre_out_grad
.
mutable_data
<
T
>
(
pre_out
->
dims
(),
ctx
.
GetPlace
());
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
w_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
SetConstant
<
DeviceContext
,
T
>
zero
;
zero
(
dev_ctx
,
in_grad
,
static_cast
<
T
>
(
0.0
));
zero
(
dev_ctx
,
w_grad
,
static_cast
<
T
>
(
0.0
));
size_t
num_classes
=
static_cast
<
size_t
>
(
ctx
.
Attr
<
int
>
(
"num_classes"
));
...
...
@@ -162,7 +181,28 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
zero
(
dev_ctx
,
bias_grad
,
static_cast
<
T
>
(
0.0
));
bit_code
->
AddGrad
(
pre_out_grad
,
bias_grad
);
}
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
*
in
);
if
(
!
is_sparse
)
{
auto
*
w_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
w_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
dev_ctx
,
w_grad
,
static_cast
<
T
>
(
0.0
));
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
*
in
);
}
else
{
framework
::
Vector
<
int64_t
>
real_rows
=
cal_rows
(
path
);
auto
*
w_grad
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
framework
::
GradVarName
(
"W"
));
w_grad
->
set_rows
(
real_rows
);
// build ids -> rows index map
w_grad
->
SyncIndex
();
auto
*
w_grad_value
=
w_grad
->
mutable_value
();
framework
::
DDim
temp_dim
(
w
->
dims
());
set
(
temp_dim
,
0
,
real_rows
.
size
());
w_grad_value
->
mutable_data
<
T
>
(
temp_dim
,
ctx
.
GetPlace
());
zero
(
dev_ctx
,
w_grad_value
,
static_cast
<
T
>
(
0.0
));
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
*
in
);
}
bit_code
->
MulGradError
(
pre_out_grad
,
*
w
,
in_grad
);
}
};
...
...
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
014e50c2
...
...
@@ -19,8 +19,8 @@ namespace operators {
namespace
math
{
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
vec
)
{
void
MatrixBitCodeFunctor
<
T
>::
Add
(
framework
::
LoD
Tensor
*
tmat
,
const
framework
::
LoD
Tensor
&
vec
)
{
size_t
batch_size
=
tmat
->
dims
()[
0
];
size_t
width
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
...
...
@@ -34,8 +34,8 @@ void MatrixBitCodeFunctor<T>::Add(framework::Tensor* tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
)
{
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
LoD
Tensor
*
vec
)
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
...
...
@@ -49,8 +49,8 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
)
{
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
LoD
Tensor
*
sum
,
T
scale_sum
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
...
...
@@ -69,9 +69,9 @@ void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
LoD
Tensor
*
tmat
,
const
framework
::
LoD
Tensor
&
weight
,
const
framework
::
LoD
Tensor
&
input
)
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
tmat_width
=
tmat
->
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
...
...
@@ -95,9 +95,9 @@ void MatrixBitCodeFunctor<T>::Mul(framework::Tensor* tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
)
{
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
LoD
Tensor
*
weight
,
const
framework
::
LoD
Tensor
&
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
...
...
@@ -119,37 +119,38 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
}
}
// template <typename T>
// void MatrixBitCodeFunctor<T>::MulGradSparseWeight(const framework::Tensor&
// tmat,
// framework::SelectedRows* weight,
// const framework::Tensor& input) {
// size_t num_samples = tmat.dims()[0];
// size_t input_width = input.dims()[1];
// size_t tmat_width = tmat.dims()[1];
// size_t weight_width = weight->dims()[1];
// auto tmat_value = tmat.data<T>();
// auto weight_value = weight->data<T>();
// auto input_value = input.data<T>();
// for (size_t i = 0; i < num_samples; ++i) {
// auto code = code_table->get_code(i);
// int code_length = code->get_length();
// for (int j = 0; j < code_length; ++j) {
// // size_t index = code->calc_index(j);
// for (size_t k = 0; k < input_width; ++k) {
// weight_value[j * weight_width + k] +=
// tmat_value[i * tmat_width + j] * input_value[input_width * i +
// k];
// }
// }
// }
// }
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
LoDTensor
&
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
weight_width
=
weight
->
value
().
dims
()[
1
];
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_value
=
weight
->
mutable_value
()
->
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
int64_t
row_index
=
weight
->
AutoGrownIndex
(
static_cast
<
int64_t
>
(
index
),
false
);
weight_value
[
row_index
*
weight_width
+
k
]
+=
tmat_value
[
i
*
tmat_width
+
j
]
*
input_value
[
input_width
*
i
+
k
];
}
}
}
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
)
{
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
LoD
Tensor
&
tmat
,
const
framework
::
LoD
Tensor
&
weight
,
framework
::
LoD
Tensor
*
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
->
dims
()[
1
];
...
...
@@ -174,7 +175,7 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
*
tmat
)
{
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
LoD
Tensor
*
tmat
)
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
o_width
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
014e50c2
...
...
@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
...
...
@@ -134,8 +136,9 @@ class SimpleCode : public Code {
template
<
typename
R
>
class
CustomCode
:
public
Code
{
public:
CustomCode
(
const
framework
::
Tensor
*
ptable
,
const
framework
::
Tensor
*
pcode
,
const
int64_t
*
ids
,
const
int
index
)
CustomCode
(
const
framework
::
LoDTensor
*
ptable
,
const
framework
::
LoDTensor
*
pcode
,
const
int64_t
*
ids
,
const
int
index
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
),
index_
(
index
)
{}
/**
* Here the id of root shoud be 1 rather than 0, thus the encoding of class c
...
...
@@ -169,8 +172,8 @@ class CustomCode : public Code {
}
private:
const
framework
::
Tensor
*
ptable_
;
const
framework
::
Tensor
*
pcode_
;
const
framework
::
LoD
Tensor
*
ptable_
;
const
framework
::
LoD
Tensor
*
pcode_
;
const
int64_t
*
ids_
;
const
int
index_
;
};
...
...
@@ -194,8 +197,9 @@ class SimpleCodeTable : public CodeTable {
template
<
typename
R
>
class
CustomCodeTable
:
public
CodeTable
{
public:
explicit
CustomCodeTable
(
const
framework
::
Tensor
*
ptable
,
const
framework
::
Tensor
*
pcode
,
const
int64_t
*
ids
)
explicit
CustomCodeTable
(
const
framework
::
LoDTensor
*
ptable
,
const
framework
::
LoDTensor
*
pcode
,
const
int64_t
*
ids
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
...
...
@@ -209,8 +213,8 @@ class CustomCodeTable : public CodeTable {
}
private:
const
framework
::
Tensor
*
ptable_
;
const
framework
::
Tensor
*
pcode_
;
const
framework
::
LoD
Tensor
*
ptable_
;
const
framework
::
LoD
Tensor
*
pcode_
;
const
int64_t
*
ids_
;
};
...
...
@@ -222,8 +226,8 @@ class MatrixBitCodeFunctor {
ids_
(
ids
),
code_table
(
new
SimpleCodeTable
(
num_classes
,
ids
))
{}
explicit
MatrixBitCodeFunctor
(
const
framework
::
Tensor
*
ptable
,
const
framework
::
Tensor
*
pcode
,
explicit
MatrixBitCodeFunctor
(
const
framework
::
LoD
Tensor
*
ptable
,
const
framework
::
LoD
Tensor
*
pcode
,
const
int64_t
*
ids
)
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
->
dims
()[
1
])),
ids_
(
ids
),
...
...
@@ -231,38 +235,47 @@ class MatrixBitCodeFunctor {
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
*/
void
Add
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
vec
);
void
Add
(
framework
::
LoDTensor
*
tmat
,
const
framework
::
LoD
Tensor
&
vec
);
/* For j < code_length
vec(0, index(i, j)) += tmat(i, j)
*/
void
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
);
void
AddGrad
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoD
Tensor
*
vec
);
/* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/
void
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
);
void
Sum
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoDTensor
*
sum
,
T
scale_sum
);
/* For j < code_length
tmat(i, j) -= bit(i, j)
*/
void
Sub
(
framework
::
Tensor
*
tmat
);
void
Sub
(
framework
::
LoD
Tensor
*
tmat
);
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
void
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
void
Mul
(
framework
::
LoDTensor
*
tmat
,
const
framework
::
LoD
Tensor
&
weight
,
const
framework
::
LoD
Tensor
&
input
);
/* For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/
void
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
);
void
MulGradWeight
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoDTensor
*
weight
,
const
framework
::
LoDTensor
&
input
);
/* For SelectedRows Weight, For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/
void
MulGradWeight
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
LoDTensor
&
input
);
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
void
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
);
void
MulGradError
(
const
framework
::
LoDTensor
&
tmat
,
const
framework
::
LoDTensor
&
weight
,
framework
::
LoDTensor
*
input
);
size_t
num_classes_
;
const
int64_t
*
ids_
;
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
014e50c2
...
...
@@ -4355,7 +4355,8 @@ def hsigmoid(input,
param_attr
=
None
,
bias_attr
=
None
,
name
=
None
,
is_costum
=
False
):
is_costum
=
False
,
is_sparse
=
False
):
"""
The hierarchical sigmoid operator is used to accelerate the training
process of language model. This operator organizes the classes into a
...
...
@@ -4394,9 +4395,11 @@ def hsigmoid(input,
is not set, the bias is initialized zero. Default: None.
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
is_costum: (bool|False)using user defined binary tree instead of default complete binary tree
is_sparse: (bool|False)using sparse update instead of dense update
Returns:
Out: (Tensor) The cost of hierarchical sigmoid operator. the shape is [N, 1]
Out: (
Lod
Tensor) The cost of hierarchical sigmoid operator. the shape is [N, 1]
Examples:
...
...
@@ -4466,7 +4469,8 @@ def hsigmoid(input,
inputs
=
inputs
,
outputs
=
{
"Out"
:
out
,
"PreOut"
:
pre_out
},
attrs
=
{
"num_classes"
:
num_classes
})
attrs
=
{
"num_classes"
:
num_classes
,
"is_sparse"
:
is_sparse
})
return
out
...
...
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
浏览文件 @
014e50c2
...
...
@@ -16,10 +16,9 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
math
# import paddle.fluid as fluid
# import paddle.fluid.core as core
# from op_builder import OpBuilder
from
op_test
import
OpTest
np
.
random
.
seed
(
100
)
...
...
@@ -141,67 +140,148 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes):
return
pre_output
,
out
class
TestHSigmoidOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"hierarchical_sigmoid"
num_classes
=
6
feature_size
=
8
batch_size
=
4
x
=
np
.
random
.
random
((
batch_size
,
feature_size
)).
astype
(
"float32"
)
*
2
w
=
np
.
random
.
random
(
(
num_classes
-
1
,
feature_size
)).
astype
(
"float32"
)
*
2
label
=
np
.
random
.
randint
(
0
,
num_classes
,
(
batch_size
,
1
))
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
self
.
attrs
=
{
'num_classes'
:
num_classes
}
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'Label'
:
label
,
'Bias'
:
bias
}
pre_output
,
out
=
hsigmoid
(
x
,
w
,
label
,
bias
,
num_classes
)
self
.
outputs
=
{
'PreOut'
:
pre_output
,
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'Bias'
,
'X'
,
'W'
],
[
'Out'
],
no_grad_set
=
set
(
'Label'
))
class
TestHSigmoidOpWithCostumTree
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"hierarchical_sigmoid"
num_classes
=
6
#using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
feature_size
=
8
batch_size
=
4
x
=
np
.
random
.
random
((
batch_size
,
feature_size
)).
astype
(
"float32"
)
*
2
w
=
np
.
random
.
random
(
(
num_classes
-
1
,
feature_size
)).
astype
(
"float32"
)
*
2
label
=
np
.
array
([
0
,
1
,
4
,
5
])
ptable
=
np
.
array
(
[(
0
,
2
,
-
1
,
-
1
,
-
1
),
(
0
,
1
,
3
,
-
1
,
-
1
),
(
0
,
1
,
4
,
-
1
,
-
1
),
(
0
,
2
,
-
1
,
-
1
,
-
1
)])
#np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
pcode
=
np
.
array
([(
0
,
0
,
-
1
,
-
1
,
-
1
),
(
1
,
1
,
1
,
-
1
,
-
1
),
(
1
,
0
,
0
,
-
1
,
-
1
),
(
0
,
1
,
-
1
,
-
1
,
-
1
)])
#np.array to store
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
self
.
attrs
=
{
'num_classes'
:
num_classes
}
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'PTable'
:
ptable
,
'PCode'
:
pcode
,
'Label'
:
label
,
'Bias'
:
bias
}
pre_output
,
out
=
hsigmoidWithCustomTree
(
x
,
w
,
ptable
,
pcode
,
label
,
bias
,
num_classes
)
self
.
outputs
=
{
'PreOut'
:
pre_output
,
'Out'
:
out
}
def
test_check_output
(
self
):
print
(
"checking output in CostumTree"
)
self
.
check_output
()
def
test_check_grad
(
self
):
print
(
"checking outputGrad in CostumTree"
)
self
.
check_grad
([
'Bias'
,
'X'
,
'W'
],
[
'Out'
],
no_grad_set
=
set
(
'Label'
))
# class TestHSigmoidOp(OpTest):
# def setUp(self):
# self.op_type = "hierarchical_sigmoid"
# num_classes = 6
# feature_size = 8
# batch_size = 4
# x = np.random.random((batch_size, feature_size)).astype("float32") * 2
# w = np.random.random(
# (num_classes - 1, feature_size)).astype("float32") * 2
# label = np.random.randint(0, num_classes, (batch_size, 1))
# bias = np.random.random((1, num_classes - 1)).astype("float32")
# self.attrs = {'num_classes': num_classes, 'is_sparse': False}
# self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias}
# pre_output, out = hsigmoid(x, w, label, bias, num_classes)
# self.outputs = {'PreOut': pre_output, 'Out': out}
# def test_check_output(self):
# self.check_output()
# def test_check_grad(self):
# self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
# class TestHSigmoidOpSparse(OpTest):
# def setUp(self):
# self.op_type = "hierarchical_sigmoid"
# num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
# feature_size = 8
# batch_size = 4
# x = np.random.random((batch_size, feature_size)).astype("float32") * 2
# w = np.random.random(
# (num_classes - 1, feature_size)).astype("float32") * 2
# label = np.array([0, 1, 4, 5])
# ptable = np.array(
# [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
# (0, 2, -1, -1,
# -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
# pcode = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
# 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store
# bias = np.random.random((1, num_classes - 1)).astype("float32")
# self.attrs = {'num_classes': num_classes, 'is_sparse': True}
# self.inputs = {
# 'X': x,
# 'W': w,
# 'PTable': ptable,
# 'PCode': pcode,
# 'Label': label,
# 'Bias': bias
# }
# pre_output, out = hsigmoidWithCustomTree(x, w, ptable, pcode, label,
# bias, num_classes)
# self.outputs = {'PreOut': pre_output, 'Out': out}
# def test_check_output(self):
# print("checking output in CostumTree")
# self.check_output()
class
TestHSigmoidOpWithSparseGrad
():
def
hs_net_conf
(
self
):
emb
=
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
3
],
dtype
=
'int64'
)
ptable
=
fluid
.
layers
.
data
(
name
=
'ptable'
,
shape
=
[
3
],
dtype
=
'int64'
)
pcode
=
fluid
.
layers
.
data
(
name
=
'pcode'
,
shape
=
[
3
],
dtype
=
'int64'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
data_list
=
[
emb
,
ptable
,
pcode
,
label
]
cost
=
fluid
.
layers
.
hsigmoid
(
input
=
emb
,
label
=
predict_word
,
non_leaf_num
=
4
,
ptable
=
ptable
,
pcode
=
pcode
,
is_costum
=
True
,
is_sparse
=
True
)
avg_cost
=
fluid
.
layers
.
reduce_mean
(
cost
)
return
avg_cost
,
data_list
def
test_training_test
(
self
):
print
(
"im here"
)
w
=
np
.
arange
(
12
).
reshape
(
4
,
3
)
x
=
np
.
ones
((
2
,
3
))
ptable
=
np
.
array
([(
1
,
2
,
-
1
),
(
1
,
2
,
-
1
)])
pcode
=
np
.
array
([(
1
,
0
,
-
1
),
(
0
,
0
,
-
1
)])
label
=
np
.
array
([(
1
,
4
)])
loss
,
data_list
=
hs_net_conf
()
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
1e-3
)
optimizer
.
minimize
(
loss
)
main_program
=
fluid
.
default_main_program
()
place
=
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
data_list
,
place
=
place
)
data_name_list
=
[
var
.
name
for
var
in
data_list
]
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
args
.
num_passes
):
for
i
in
range
(
10
):
data
=
[
w
,
x
[
i
%
2
],
ptable
[
i
%
2
],
pcode
[
i
%
2
],
label
[
i
%
2
]]
loss_val
=
exe
.
run
(
main_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
print
(
"loss is: {loss}"
.
format
(
loss
=
loss
))
# class TestHSigmoidOpWithCostumTree(OpTest):
# def setUp(self):
# self.op_type = "hierarchical_sigmoid"
# num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
# feature_size = 8
# batch_size = 4
# x = np.random.random((batch_size, feature_size)).astype("float32") * 2
# w = np.random.random(
# (num_classes - 1, feature_size)).astype("float32") * 2
# label = np.array([0, 1, 4, 5])
# ptable = np.array(
# [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
# (0, 2, -1, -1,
# -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
# pcode = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
# 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store
# bias = np.random.random((1, num_classes - 1)).astype("float32")
# self.attrs = {'num_classes': num_classes, 'is_sparse': False}
# self.inputs = {
# 'X': x,
# 'W': w,
# 'PTable': ptable,
# 'PCode': pcode,
# 'Label': label,
# 'Bias': bias
# }
# pre_output, out = hsigmoidWithCustomTree(x, w, ptable, pcode, label,
# bias, num_classes)
# self.outputs = {'PreOut': pre_output, 'Out': out}
# def test_check_output(self):
# print("checking output in CostumTree")
# self.check_output()
# def test_check_grad(self):
# print("checking outputGrad in CostumTree")
# self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录