Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c3c3c0b3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c3c3c0b3
编写于
11月 27, 2018
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
polish code, test=develop
上级
7389597c
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
176 addition
and
182 deletion
+176
-182
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+0
-6
paddle/fluid/framework/selected_rows.cc
paddle/fluid/framework/selected_rows.cc
+52
-0
paddle/fluid/framework/selected_rows.h
paddle/fluid/framework/selected_rows.h
+9
-46
paddle/fluid/operators/hierarchical_sigmoid_op.cc
paddle/fluid/operators/hierarchical_sigmoid_op.cc
+1
-1
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+38
-41
paddle/fluid/operators/math/matrix_bit_code.cc
paddle/fluid/operators/math/matrix_bit_code.cc
+30
-32
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+42
-52
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+1
-1
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+3
-3
未找到文件。
paddle/fluid/framework/mixed_vector.h
浏览文件 @
c3c3c0b3
...
...
@@ -488,12 +488,6 @@ class CPUVector : public std::vector<T, std::allocator<T>> {
return
os
;
}
size_t
size
()
const
noexcept
{
size_t
size
=
static_cast
<
size_t
>
(
std
::
vector
<
T
,
std
::
allocator
<
T
>>::
size
());
return
size
;
}
T
&
operator
[](
size_t
id
)
{
return
this
->
at
(
id
);
}
const
T
&
operator
[](
size_t
id
)
const
{
return
this
->
at
(
id
);
}
...
...
paddle/fluid/framework/selected_rows.cc
浏览文件 @
c3c3c0b3
...
...
@@ -140,6 +140,58 @@ bool SelectedRows::HasKey(int64_t key) const {
:
true
;
}
int64_t
SelectedRows
::
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
,
bool
is_test
)
{
if
(
is_test
)
{
auto
iter
=
id_to_index_
.
find
(
key
);
if
(
iter
==
id_to_index_
.
end
())
{
return
-
1
;
}
else
{
return
iter
->
second
;
}
}
rwlock_
->
RDLock
();
auto
iter
=
id_to_index_
.
find
(
key
);
if
(
iter
==
id_to_index_
.
end
())
{
rwlock_
->
UNLock
();
if
(
!
auto_grown
)
{
PADDLE_THROW
(
"key %d not found"
,
key
);
}
rwlock_
->
WRLock
();
auto
map_size
=
id_to_index_
.
size
();
auto
vector_size
=
rows_
.
size
();
if
(
map_size
!=
vector_size
)
{
rwlock_
->
UNLock
();
PADDLE_THROW
(
"id_to_index_ size %d should have the same size with rows_ %d"
,
map_size
,
vector_size
);
}
auto
write_iter
=
id_to_index_
.
find
(
key
);
if
(
write_iter
==
id_to_index_
.
end
())
{
int
row_num
=
rows_
.
size
();
if
(
row_num
==
value_
->
dims
()[
0
])
{
rwlock_
->
UNLock
();
PADDLE_THROW
(
"selected rows is full, then length exceed %d"
,
row_num
);
}
// key logic to put a key into id_to_index_
rows_
.
push_back
(
key
);
auto
index
=
static_cast
<
int64_t
>
(
rows_
.
size
()
-
1
);
id_to_index_
[
key
]
=
index
;
rwlock_
->
UNLock
();
return
index
;
}
else
{
auto
index
=
write_iter
->
second
;
rwlock_
->
UNLock
();
return
index
;
}
}
else
{
auto
index
=
iter
->
second
;
rwlock_
->
UNLock
();
return
index
;
}
}
void
SelectedRows
::
SyncIndex
()
{
rwlock_
->
WRLock
();
id_to_index_
.
clear
();
...
...
paddle/fluid/framework/selected_rows.h
浏览文件 @
c3c3c0b3
...
...
@@ -118,9 +118,12 @@ class SelectedRows {
*
* @return index of the key.
*/
inline
int64_t
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
,
bool
is_test
=
false
)
{
if
(
is_test
)
{
int64_t
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
,
bool
is_test
=
false
);
/*
* @brief Get the index of the key from id_to_index_ map.
*/
inline
int64_t
GetIndexFromId
(
int64_t
key
)
{
auto
iter
=
id_to_index_
.
find
(
key
);
if
(
iter
==
id_to_index_
.
end
())
{
return
-
1
;
...
...
@@ -128,46 +131,6 @@ class SelectedRows {
return
iter
->
second
;
}
}
rwlock_
->
RDLock
();
auto
iter
=
id_to_index_
.
find
(
key
);
if
(
iter
==
id_to_index_
.
end
())
{
rwlock_
->
UNLock
();
if
(
!
auto_grown
)
{
PADDLE_THROW
(
"key %d not found"
,
key
);
}
rwlock_
->
WRLock
();
auto
map_size
=
id_to_index_
.
size
();
auto
vector_size
=
rows_
.
size
();
if
(
map_size
!=
vector_size
)
{
rwlock_
->
UNLock
();
PADDLE_THROW
(
"id_to_index_ size %d should have the same size with rows_ %d"
,
map_size
,
vector_size
);
}
auto
write_iter
=
id_to_index_
.
find
(
key
);
if
(
write_iter
==
id_to_index_
.
end
())
{
int
row_num
=
rows_
.
size
();
if
(
row_num
==
value_
->
dims
()[
0
])
{
rwlock_
->
UNLock
();
PADDLE_THROW
(
"selected rows is full, then length exceed %d"
,
row_num
);
}
// key logic to put a key into id_to_index_
rows_
.
push_back
(
key
);
auto
index
=
static_cast
<
int64_t
>
(
rows_
.
size
()
-
1
);
id_to_index_
[
key
]
=
index
;
rwlock_
->
UNLock
();
return
index
;
}
else
{
auto
index
=
write_iter
->
second
;
rwlock_
->
UNLock
();
return
index
;
}
}
else
{
auto
index
=
iter
->
second
;
rwlock_
->
UNLock
();
return
index
;
}
}
void
SyncIndex
();
/*
...
...
@@ -185,7 +148,7 @@ class SelectedRows {
// SelectedRows add a Tensor, will the duplicate rows be handled.
Vector
<
int64_t
>
rows_
;
std
::
unordered_map
<
int64_t
,
int64_t
>
id_to_index_
;
// should not be used when
ids
has duplicate member
id_to_index_
;
// should not be used when
rows_
has duplicate member
std
::
unique_ptr
<
Tensor
>
value_
{
nullptr
};
int64_t
height_
;
// height indicates the underline tensor's height
std
::
unique_ptr
<
RWLock
>
rwlock_
{
nullptr
};
...
...
paddle/fluid/operators/hierarchical_sigmoid_op.cc
浏览文件 @
c3c3c0b3
...
...
@@ -101,7 +101,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
"it should have shape like [N, L], L is the length of the Path"
)
.
AsDispensable
();
AddInput
(
"PCode"
,
"P
ath
Code"
,
"(LoDTensor, optional), The Code on each Node of the Path from root "
"to current word"
"it should have shape like [N, L], L is the length of the Path"
)
...
...
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
c3c3c0b3
...
...
@@ -19,9 +19,11 @@ limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/clip_op.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/matrix_bit_code.h"
#include "paddle/fluid/platform/transform.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -30,31 +32,26 @@ template <typename T, int MajorType = Eigen::RowMajor,
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
using
platform
::
Transform
;
std
::
vector
<
int64_t
>
cal_rows
(
const
framework
::
LoDTensor
&
path
)
{
std
::
set
<
int64_t
>
tmp
;
std
::
vector
<
int64_t
>
rows
;
for
(
size_t
i
=
0
;
i
<
static_cast
<
size_t
>
(
path
.
dims
()[
0
]);
i
++
)
{
for
(
size_t
j
=
0
;
j
<
static_cast
<
size_t
>
(
path
.
dims
()[
1
]);
j
++
)
{
int64_t
temp
=
path
.
data
<
int64_t
>
()[
i
*
static_cast
<
size_t
>
(
path
.
dims
()[
1
])
+
j
];
if
(
temp
>=
0
)
{
tmp
.
insert
(
temp
);
}
static
std
::
vector
<
int64_t
>
PathToRows
(
const
framework
::
LoDTensor
&
path
)
{
std
::
set
<
int64_t
>
rows
;
for
(
int64_t
i
=
0
;
i
<
path
.
numel
();
++
i
)
{
int64_t
row
=
path
.
data
<
int64_t
>
()[
i
];
if
(
row
<
0
)
{
continue
;
}
rows
.
emplace
(
row
);
}
rows
.
assign
(
tmp
.
begin
(),
tmp
.
end
());
return
rows
;
return
std
::
vector
<
int64_t
>
(
rows
.
begin
(),
rows
.
end
());
}
template
<
typename
DeviceContext
,
typename
T
>
class
HierarchicalSigmoidOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"W"
);
auto
in
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
);
auto
w
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"W"
)
);
auto
*
path
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PCode"
);
auto
*
label
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Label"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"P
ath
Code"
);
auto
label
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Label"
)
);
auto
*
bias
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Bias"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
auto
*
pre_out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"PreOut"
);
...
...
@@ -65,7 +62,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
}
int64_t
code_length
=
path
?
path
->
dims
()[
1
]
:
math
::
FindLastSet
(
num_classes
-
1
);
int64_t
batch_size
=
in
->
dims
()[
0
];
int64_t
batch_size
=
in
.
dims
()[
0
];
framework
::
LoDTensor
sum
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
*
pre_out_data
=
pre_out
->
mutable_data
<
T
>
(
...
...
@@ -81,10 +78,10 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
std
::
unique_ptr
<
math
::
MatrixBitCodeFunctor
<
T
>>
bit_code
;
if
(
!
is_custom
)
{
bit_code
.
reset
(
new
math
::
MatrixBitCodeFunctor
<
T
>
(
num_classes
,
label
->
data
<
int64_t
>
()));
label
.
data
<
int64_t
>
()));
}
else
{
bit_code
.
reset
(
new
math
::
MatrixBitCodeFunctor
<
T
>
(
path
,
code
,
label
->
data
<
int64_t
>
()));
bit_code
.
reset
(
new
math
::
MatrixBitCodeFunctor
<
T
>
(
*
path
,
*
code
,
label
.
data
<
int64_t
>
()));
}
std
::
vector
<
int64_t
>
sum_dims
({
batch_size
,
1UL
});
...
...
@@ -95,7 +92,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
if
(
bias
)
{
bit_code
->
Add
(
*
bias
,
pre_out
);
}
bit_code
->
Mul
(
pre_out
,
*
w
,
*
in
);
bit_code
->
Mul
(
pre_out
,
w
,
in
);
// clip to [-40, 40]
Transform
<
DeviceContext
>
trans
;
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
...
...
@@ -117,23 +114,23 @@ template <typename DeviceContext, typename T>
class
HierarchicalSigmoidGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"W"
);
auto
in
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
);
auto
w
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"W"
)
);
auto
*
path
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PTable"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PCode"
);
auto
*
code
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"P
ath
Code"
);
auto
*
bias
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Bias"
);
auto
*
in_grad
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
bool
is_sparse
=
ctx
.
Attr
<
bool
>
(
"is_sparse"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
SetConstant
<
DeviceContext
,
T
>
zero
;
auto
*
label
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Label"
);
auto
*
pre_out
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PreOut"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
label
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Label"
)
);
auto
pre_out
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"PreOut"
)
);
auto
out_grad
=
detail
::
Ref
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
)
;
framework
::
LoDTensor
pre_out_grad
;
pre_out_grad
.
mutable_data
<
T
>
(
pre_out
->
dims
(),
ctx
.
GetPlace
());
pre_out_grad
.
mutable_data
<
T
>
(
pre_out
.
dims
(),
ctx
.
GetPlace
());
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
dev_ctx
,
in_grad
,
static_cast
<
T
>
(
0.0
));
...
...
@@ -147,16 +144,16 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
std
::
unique_ptr
<
math
::
MatrixBitCodeFunctor
<
T
>>
bit_code
;
if
(
!
is_custom
)
{
bit_code
.
reset
(
new
math
::
MatrixBitCodeFunctor
<
T
>
(
num_classes
,
label
->
data
<
int64_t
>
()));
label
.
data
<
int64_t
>
()));
}
else
{
bit_code
.
reset
(
new
math
::
MatrixBitCodeFunctor
<
T
>
(
path
,
code
,
label
->
data
<
int64_t
>
()));
bit_code
.
reset
(
new
math
::
MatrixBitCodeFunctor
<
T
>
(
*
path
,
*
code
,
label
.
data
<
int64_t
>
()));
}
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
*
pre_out
);
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
auto
pre_out_grad_mat
=
EigenMatrix
<
T
>::
From
(
pre_out_grad
);
auto
out_grad_mat
=
EigenMatrix
<
T
>::
From
(
*
out_grad
);
auto
out_grad_mat
=
EigenMatrix
<
T
>::
From
(
out_grad
);
Eigen
::
array
<
int
,
2
>
bcast
{
1
,
static_cast
<
int
>
(
pre_out_grad
.
dims
()[
1
])};
...
...
@@ -181,17 +178,17 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
ctx
.
Output
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
w_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
dev_ctx
,
w_grad
,
static_cast
<
T
>
(
0.0
));
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
*
in
);
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
in
);
}
else
{
framework
::
Vector
<
int64_t
>
real_rows
=
cal_r
ows
(
*
path
);
framework
::
Vector
<
int64_t
>
real_rows
=
PathToR
ows
(
*
path
);
auto
*
w_grad
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
framework
::
GradVarName
(
"W"
));
w_grad
->
set_rows
(
real_rows
);
// Build a map of id -> row_index to speed up finding the index of one id
w_grad
->
SyncIndex
();
w_grad
->
set_height
(
w
->
dims
()[
0
]);
w_grad
->
set_height
(
w
.
dims
()[
0
]);
auto
*
w_grad_value
=
w_grad
->
mutable_value
();
framework
::
DDim
temp_dim
(
w
->
dims
());
framework
::
DDim
temp_dim
(
w
.
dims
());
set
(
temp_dim
,
0
,
real_rows
.
size
());
w_grad_value
->
mutable_data
<
T
>
(
temp_dim
,
ctx
.
GetPlace
());
...
...
@@ -211,9 +208,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
zero
(
dev_ctx
,
bias_grad_value
,
static_cast
<
T
>
(
0.0
));
bit_code
->
AddGrad
(
pre_out_grad
,
bias_grad
);
}
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
*
in
);
bit_code
->
MulGradWeight
(
pre_out_grad
,
w_grad
,
in
);
}
bit_code
->
MulGradError
(
pre_out_grad
,
*
w
,
in_grad
);
bit_code
->
MulGradError
(
pre_out_grad
,
w
,
in_grad
);
}
};
...
...
paddle/fluid/operators/math/matrix_bit_code.cc
浏览文件 @
c3c3c0b3
...
...
@@ -19,12 +19,12 @@ namespace operators {
namespace
math
{
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
const
framework
::
LoD
Tensor
&
vec
,
framework
::
LoD
Tensor
*
tmat
)
{
void
MatrixBitCodeFunctor
<
T
>::
Add
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
)
{
size_t
batch_size
=
tmat
->
dims
()[
0
];
size_t
width
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
...
...
@@ -34,12 +34,12 @@ void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
LoD
Tensor
*
vec
)
{
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
)
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
...
...
@@ -49,17 +49,16 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
LoD
Tensor
&
tmat
,
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
)
{
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
int64_t
row_index
=
vec
->
AutoGrownIndex
(
static_cast
<
int64_t
>
(
index
),
false
,
true
);
int64_t
row_index
=
vec
->
GetIndexFromId
(
static_cast
<
int64_t
>
(
index
));
vec
->
mutable_value
()
->
data
<
T
>
()[
row_index
]
+=
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
}
...
...
@@ -67,13 +66,13 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
LoD
Tensor
*
sum
,
T
scale_sum
)
{
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
static_cast
<
T
>
(
0.0
);
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
->
calc_bit
(
j
))
{
...
...
@@ -87,9 +86,9 @@ void MatrixBitCodeFunctor<T>::Sum(const framework::LoDTensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
LoD
Tensor
*
tmat
,
const
framework
::
LoD
Tensor
&
weight
,
const
framework
::
LoD
Tensor
&
input
)
{
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
tmat_width
=
tmat
->
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
...
...
@@ -98,7 +97,7 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
auto
weight_value
=
weight
.
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
...
...
@@ -113,9 +112,9 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
LoD
Tensor
*
weight
,
const
framework
::
LoD
Tensor
&
input
)
{
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
...
...
@@ -124,7 +123,7 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
auto
weight_value
=
weight
->
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
...
...
@@ -138,9 +137,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
LoD
Tensor
&
tmat
,
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
LoD
Tensor
&
input
)
{
const
framework
::
Tensor
&
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
...
...
@@ -149,13 +148,12 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
auto
weight_value
=
weight
->
mutable_value
()
->
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
int64_t
row_index
=
weight
->
AutoGrownIndex
(
static_cast
<
int64_t
>
(
index
),
false
,
true
);
int64_t
row_index
=
weight
->
GetIndexFromId
(
static_cast
<
int64_t
>
(
index
));
weight_value
[
row_index
*
weight_width
+
k
]
+=
tmat_value
[
i
*
tmat_width
+
j
]
*
input_value
[
input_width
*
i
+
k
];
}
...
...
@@ -164,9 +162,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
LoD
Tensor
&
tmat
,
const
framework
::
LoD
Tensor
&
weight
,
framework
::
LoD
Tensor
*
input
)
{
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
->
dims
()[
1
];
...
...
@@ -176,7 +174,7 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
auto
input_value
=
input
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
->
calc_index
(
j
);
...
...
@@ -191,11 +189,11 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
LoD
Tensor
*
tmat
)
{
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
*
tmat
)
{
size_t
num_samples
=
tmat
->
dims
()[
0
];
size_t
o_width
=
tmat
->
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
->
get_code
(
i
);
auto
code
=
code_table
_
->
get_code
(
i
);
int
code_length
=
code
->
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
->
calc_bit
(
j
))
{
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
c3c3c0b3
...
...
@@ -132,13 +132,15 @@ class SimpleCode : public Code {
size_t
c_
;
};
template
<
typename
R
>
template
<
typename
T
>
class
CustomCode
:
public
Code
{
public:
CustomCode
(
const
framework
::
LoDTensor
*
ptable
,
const
framework
::
LoDTensor
*
pcode
,
const
int64_t
*
ids
,
const
int
index
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
),
index_
(
index
)
{}
CustomCode
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
,
int
index
)
:
ids_
(
ids
),
index_
(
index
)
{
ptable_
=
ptable
.
Slice
(
index
,
index
+
1
);
pcode_
=
pcode
.
Slice
(
index
,
index
+
1
);
}
/**
* Here the id of root shoud be 1 rather than 0, thus the encoding of class c
* is `c + num_classes` and all siblings can get the same weight indice using
...
...
@@ -148,20 +150,13 @@ class CustomCode : public Code {
* Binary classification path is the suffixes of encoding, thus leave out the
* left most bit in calc_bit.
*/
size_t
calc_index
(
int
bit
)
const
{
return
ptable_
->
data
<
R
>
()[
index_
*
static_cast
<
int
>
(
ptable_
->
dims
()[
1
])
+
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_
->
data
<
R
>
()[
index_
*
static_cast
<
int
>
(
ptable_
->
dims
()[
1
])
+
bit
];
}
size_t
calc_index
(
int
bit
)
const
{
return
ptable_
.
data
<
T
>
()[
bit
];
}
bool
calc_bit
(
int
bit
)
const
{
return
pcode_
.
data
<
T
>
()[
bit
];
}
int
get_length
()
const
{
int
length
=
0
;
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
ptable_
->
dims
()[
1
]);
i
++
)
{
if
(
ptable_
->
data
<
R
>
()[
index_
*
static_cast
<
int
>
(
ptable_
->
dims
()[
1
])
+
i
]
>=
0
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
ptable_
.
dims
()[
1
]);
i
++
)
{
if
(
ptable_
.
data
<
T
>
()[
i
]
>=
0
)
{
length
++
;
}
else
{
return
length
;
...
...
@@ -171,15 +166,15 @@ class CustomCode : public Code {
}
private:
const
framework
::
LoDTensor
*
ptable_
;
const
framework
::
LoDTensor
*
pcode_
;
framework
::
Tensor
ptable_
;
framework
::
Tensor
pcode_
;
const
int64_t
*
ids_
;
const
int
index_
;
};
class
SimpleCodeTable
:
public
CodeTable
{
public:
explicit
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
SimpleCodeTable
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
std
::
unique_ptr
<
Code
>
coder
(
new
SimpleCode
(
code
,
num_classes_
,
ids_
));
...
...
@@ -193,97 +188,92 @@ class SimpleCodeTable : public CodeTable {
const
int64_t
*
ids_
;
};
template
<
typename
R
>
template
<
typename
T
>
class
CustomCodeTable
:
public
CodeTable
{
public:
explicit
CustomCodeTable
(
const
framework
::
LoDTensor
*
ptable
,
const
framework
::
LoDTensor
*
pcode
,
const
int64_t
*
ids
)
CustomCodeTable
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
ptable_
(
ptable
),
pcode_
(
pcode
),
ids_
(
ids
)
{}
std
::
unique_ptr
<
Code
>
get_code
(
int64_t
code
)
const
{
std
::
unique_ptr
<
Code
>
coder
(
new
CustomCode
<
R
>
(
ptable_
,
pcode_
,
ids_
,
code
));
std
::
unique_ptr
<
Code
>
coder
(
new
CustomCode
<
T
>
(
ptable_
,
pcode_
,
ids_
,
code
));
return
coder
;
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
->
dims
()[
1
]);
}
size_t
size
()
const
{
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
int
get_max_code_length
()
const
{
return
static_cast
<
size_t
>
(
ptable_
->
dims
()[
1
]);
return
static_cast
<
size_t
>
(
ptable_
.
dims
()[
1
]);
}
private:
const
framework
::
LoDTensor
*
ptable_
;
const
framework
::
LoDTensor
*
pcode_
;
const
framework
::
Tensor
&
ptable_
;
const
framework
::
Tensor
&
pcode_
;
const
int64_t
*
ids_
;
};
template
<
typename
T
>
class
MatrixBitCodeFunctor
{
public:
explicit
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
),
code_table
(
new
SimpleCodeTable
(
num_classes
,
ids
))
{}
code_table
_
(
new
SimpleCodeTable
(
num_classes
,
ids
))
{}
explicit
MatrixBitCodeFunctor
(
const
framework
::
LoDTensor
*
ptable
,
const
framework
::
LoDTensor
*
pcode
,
const
int64_t
*
ids
)
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
->
dims
()[
1
])),
MatrixBitCodeFunctor
(
const
framework
::
Tensor
&
ptable
,
const
framework
::
Tensor
&
pcode
,
const
int64_t
*
ids
)
:
num_classes_
(
static_cast
<
size_t
>
(
ptable
.
dims
()[
1
])),
ids_
(
ids
),
code_table
(
new
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
code_table
_
(
new
CustomCodeTable
<
int64_t
>
(
ptable
,
pcode
,
ids
))
{}
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
*/
void
Add
(
const
framework
::
LoDTensor
&
vec
,
framework
::
LoD
Tensor
*
tmat
);
void
Add
(
const
framework
::
Tensor
&
vec
,
framework
::
Tensor
*
tmat
);
/* For j < code_length
vec(0, index(i, j)) += tmat(i, j)
*/
void
AddGrad
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoD
Tensor
*
vec
);
void
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
vec
);
/* For selected rows For j < code_length
vec(0, index(i, j)) += tmat(i, j)
*/
void
AddGrad
(
const
framework
::
LoD
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
);
void
AddGrad
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
vec
);
/* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/
void
Sum
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoDTensor
*
sum
,
T
scale_sum
);
void
Sum
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
sum
,
T
scale_sum
);
/* For j < code_length
tmat(i, j) -= bit(i, j)
*/
void
Sub
(
framework
::
LoD
Tensor
*
tmat
);
void
Sub
(
framework
::
Tensor
*
tmat
);
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
void
Mul
(
framework
::
LoDTensor
*
tmat
,
const
framework
::
LoD
Tensor
&
weight
,
const
framework
::
LoD
Tensor
&
input
);
void
Mul
(
framework
::
Tensor
*
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
/* For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/
void
MulGradWeight
(
const
framework
::
LoDTensor
&
tmat
,
framework
::
LoDTensor
*
weight
,
const
framework
::
LoDTensor
&
input
);
void
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
*
weight
,
const
framework
::
Tensor
&
input
);
/* For SelectedRows Weight, For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/
void
MulGradWeight
(
const
framework
::
LoD
Tensor
&
tmat
,
void
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
SelectedRows
*
weight
,
const
framework
::
LoD
Tensor
&
input
);
const
framework
::
Tensor
&
input
);
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
void
MulGradError
(
const
framework
::
LoDTensor
&
tmat
,
const
framework
::
LoDTensor
&
weight
,
framework
::
LoDTensor
*
input
);
void
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
input
);
size_t
num_classes_
;
const
int64_t
*
ids_
;
std
::
unique_ptr
<
CodeTable
>
code_table
;
std
::
unique_ptr
<
CodeTable
>
code_table
_
;
};
}
// namespace math
}
// namespace operators
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
c3c3c0b3
...
...
@@ -4639,7 +4639,7 @@ def hsigmoid(input,
"X"
:
input
,
"W"
:
weights
,
"PTable"
:
ptable
,
"PCode"
:
pcode
,
"P
ath
Code"
:
pcode
,
"Label"
:
label
}
if
helper
.
bias_attr
:
...
...
python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
浏览文件 @
c3c3c0b3
...
...
@@ -185,7 +185,7 @@ class TestHSigmoidOpSparse(OpTest):
'X'
:
x
,
'W'
:
w
,
'PTable'
:
ptable
,
'PCode'
:
pcode
,
'P
ath
Code'
:
pcode
,
'Label'
:
label
,
'Bias'
:
bias
}
...
...
@@ -285,7 +285,7 @@ class TestHSigmoidOpWithCostumTree(OpTest):
'X'
:
x
,
'W'
:
w
,
'PTable'
:
ptable
,
'PCode'
:
pcode
,
'P
ath
Code'
:
pcode
,
'Label'
:
label
,
'Bias'
:
bias
}
...
...
@@ -322,7 +322,7 @@ class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest):
'X'
:
x
,
'W'
:
w
,
'PTable'
:
ptable
,
'PCode'
:
pcode
,
'P
ath
Code'
:
pcode
,
'Label'
:
label
,
}
pre_output
,
out
=
hsigmoidWithCustomTree
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录