Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
80ce7edb
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
80ce7edb
编写于
1月 09, 2018
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make farward correct
上级
74f519ff
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
170 addition
and
137 deletion
+170
-137
paddle/operators/hierarchical_sigmoid_op.cc
paddle/operators/hierarchical_sigmoid_op.cc
+2
-2
paddle/operators/hierarchical_sigmoid_op.h
paddle/operators/hierarchical_sigmoid_op.h
+15
-20
paddle/operators/math/math_function_impl.h
paddle/operators/math/math_function_impl.h
+4
-4
paddle/operators/math/matrix_bit_code.cc
paddle/operators/math/matrix_bit_code.cc
+65
-91
paddle/operators/math/matrix_bit_code.h
paddle/operators/math/matrix_bit_code.h
+12
-13
python/paddle/v2/fluid/tests/op_test.py
python/paddle/v2/fluid/tests/op_test.py
+5
-4
python/paddle/v2/fluid/tests/test_hsigmoid_op.py
python/paddle/v2/fluid/tests/test_hsigmoid_op.py
+67
-3
未找到文件。
paddle/operators/hierarchical_sigmoid_op.cc
浏览文件 @
80ce7edb
...
@@ -70,7 +70,7 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
...
@@ -70,7 +70,7 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
}
}
protected:
protected:
framework
::
OpKernelType
GetKernelType
(
framework
::
OpKernelType
Get
Actual
KernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
...
@@ -96,7 +96,7 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
...
@@ -96,7 +96,7 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
}
}
protected:
protected:
framework
::
OpKernelType
GetKernelType
(
framework
::
OpKernelType
Get
Actual
KernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
...
...
paddle/operators/hierarchical_sigmoid_op.h
浏览文件 @
80ce7edb
...
@@ -49,34 +49,31 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
...
@@ -49,34 +49,31 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
RowwiseSum
<
DeviceContext
,
T
>
row_sum
;
math
::
RowwiseSum
<
DeviceContext
,
T
>
row_sum
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
(
num_classes
,
ids
->
data
<
int64_t
>
())
;
std
::
vector
<
int64_t
>
sum_dims
({
batch_size
,
1UL
});
std
::
vector
<
int64_t
>
sum_dims
({
batch_size
,
1UL
});
sum
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
sum_dims
),
ctx
.
GetPlace
());
sum
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
sum_dims
),
ctx
.
GetPlace
());
auto
sum_mat
=
EigenMatrix
<
T
>::
From
(
sum
);
auto
sum_mat
=
EigenMatrix
<
T
>::
From
(
sum
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
out_mat
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
out_mat
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
if
(
bias
)
{
if
(
bias
)
{
bit_code
.
Add
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
,
*
bias
);
bit_code
.
Add
(
pre_out
,
*
bias
);
}
}
for
(
int
i
=
0
;
i
<
in
->
dims
()[
0
]
;
++
i
)
{
for
(
int
64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
bit_code
.
Mul
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
,
auto
w_i
=
w
->
Slice
(
i
,
i
+
1
);
w
->
Slice
(
i
,
i
+
1
),
in
->
Slice
(
i
,
i
+
1
)
);
bit_code
.
Mul
(
pre_out
,
w_i
,
*
in
);
}
}
// clip the matrix with (-40, 40)
// clip the matrix with (-40, 40)
Transform
<
DeviceContext
>
trans
;
Transform
<
DeviceContext
>
trans
;
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
pre_out_data
+
pre_out
.
numel
(),
pre_out_data
,
pre_out_data
+
pre_out
.
numel
(),
pre_out_data
,
ClipFunctor
<
T
>
(
static_cast
<
T
>
(
-
40.0
),
static_cast
<
T
>
(
40.0
)));
ClipFunctor
<
T
>
(
static_cast
<
T
>
(
-
40.0
),
static_cast
<
T
>
(
40.0
)));
bit_code
.
Sum
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
,
*
out
,
bit_code
.
Sum
(
pre_out
,
*
out
,
static_cast
<
T
>
(
-
1
));
static_cast
<
T
>
(
-
1
));
// softrelu with threshold is 40.0
// softrelu with threshold is 40.0
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
pre_out_data
+
pre_out
.
numel
(),
pre_out_data
,
pre_out_data
+
pre_out
.
numel
(),
pre_out_data
,
ClipFunctor
<
T
>
(
static_cast
<
T
>
(
-
40.0
),
static_cast
<
T
>
(
40.0
)));
ClipFunctor
<
T
>
(
static_cast
<
T
>
(
-
40.0
),
static_cast
<
T
>
(
40.0
)));
pre_out_mat
.
device
(
place
)
=
(
static_cast
<
T
>
(
1.0
)
+
pre_out_mat
.
exp
()).
log
();
pre_out_mat
.
device
(
place
)
=
(
static_cast
<
T
>
(
1.0
)
+
pre_out_mat
.
exp
()).
log
();
row_sum
(
device_ctx
,
pre_out
,
&
sum
);
row_sum
(
device_ctx
,
pre_out
,
&
sum
);
out_mat
.
device
(
place
)
=
sum_mat
+
out_mat
;
out_mat
.
device
(
place
)
=
sum_mat
+
out_mat
;
}
}
...
@@ -103,28 +100,26 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
...
@@ -103,28 +100,26 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
// init pre_out matrix with {1.0}
// init pre_out matrix with {1.0}
math
::
SetConstant
<
DeviceContext
,
T
>
one
;
math
::
SetConstant
<
DeviceContext
,
T
>
one
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
(
num_classes
,
ids
->
data
<
int64_t
>
())
;
one
(
device_ctx
,
&
pre_out
,
static_cast
<
T
>
(
1.0
));
one
(
device_ctx
,
&
pre_out
,
static_cast
<
T
>
(
1.0
));
// softrelu derivative
// softrelu derivative
pre_out_mat
.
device
(
place
)
=
pre_out_mat
.
device
(
place
)
=
pre_out_mat
*
(
static_cast
<
T
>
(
1.0
)
-
static_cast
<
T
>
(
1.0
)
/
pre_out_mat
);
pre_out_mat
*
(
static_cast
<
T
>
(
1.0
)
-
static_cast
<
T
>
(
1.0
)
/
pre_out_mat
);
bit_code
.
Sub
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
);
bit_code
.
Sub
(
pre_out
);
if
(
bias
)
{
if
(
bias
)
{
bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bit_code
.
AddGrad
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
,
*
bias
);
bit_code
.
AddGrad
(
pre_out
,
*
bias
);
}
}
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
w
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
w
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int
i
=
0
;
i
<
in_grad
->
dims
()[
0
];
++
i
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
p_sliced
=
w
->
Slice
(
i
,
i
+
1
);
auto
w_i
=
w
->
Slice
(
i
,
i
+
1
);
auto
in_sliced
=
in
->
Slice
(
i
,
i
+
1
);
// auto in_i = in->Slice(i, i + 1);
auto
in_grad_sliced
=
in_grad
->
Slice
(
i
,
i
+
1
);
// auto in_grad_i = in_grad->Slice(i, i + 1);
bit_code
.
MulGradWeight
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
,
bit_code
.
MulGradWeight
(
pre_out
,
w_i
,
*
in
);
p_sliced
,
in_sliced
);
bit_code
.
MulGradError
(
pre_out
,
w_i
,
*
in_grad
);
bit_code
.
MulGradError
(
num_classes
,
ids
->
data
<
int64_t
>
(),
pre_out
,
p_sliced
,
in_grad_sliced
);
}
}
}
}
};
};
...
...
paddle/operators/math/math_function_impl.h
浏览文件 @
80ce7edb
...
@@ -62,13 +62,13 @@ void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
...
@@ -62,13 +62,13 @@ void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
void
RowwiseSum
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
void
RowwiseSum
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
vector
)
{
framework
::
Tensor
*
out
)
{
auto
in_dims
=
input
.
dims
();
auto
in_dims
=
input
.
dims
();
auto
size
=
input
.
numel
()
/
in_dims
[
1
];
auto
size
=
input
.
numel
()
/
in_dims
[
1
];
PADDLE_ENFORCE_EQ
(
vector
->
numel
(),
size
);
PADDLE_ENFORCE_EQ
(
out
->
numel
(),
size
);
auto
in
=
framework
::
EigenMatrix
<
T
,
Eigen
::
ColMajor
>::
From
(
input
);
auto
in
=
framework
::
EigenMatrix
<
T
>::
From
(
input
);
auto
vec
=
framework
::
Eigen
Matrix
<
T
,
Eigen
::
ColMajor
>::
From
(
*
vector
);
auto
vec
=
framework
::
Eigen
Vector
<
T
>::
Flatten
(
*
out
);
vec
.
device
(
*
context
.
eigen_device
())
=
in
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
1
}}));
vec
.
device
(
*
context
.
eigen_device
())
=
in
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
1
}}));
}
}
...
...
paddle/operators/math/matrix_bit_code.cc
浏览文件 @
80ce7edb
...
@@ -22,7 +22,7 @@ namespace math {
...
@@ -22,7 +22,7 @@ namespace math {
* CodeTable class should support 3 functions:
* CodeTable class should support 3 functions:
*
*
* size_t size()
* size_t size()
* return the number of
code
s
* return the number of
id
s
*
*
* int getMaxCodeLength()
* int getMaxCodeLength()
* return the maximal code length
* return the maximal code length
...
@@ -45,56 +45,47 @@ namespace math {
...
@@ -45,56 +45,47 @@ namespace math {
*
*
*/
*/
/*
template
<
typename
T
>
for i:
void
MatrixBitCodeFunctor
<
T
>::
Add
(
framework
::
Tensor
&
tmat
,
for j < codeLength:
const
framework
::
Tensor
&
vec
)
{
op(a(i, j), b(0, index(i, j)))
SimpleCodeTable
code_table
(
num_classes_
);
*/
size_t
batch_size
=
tmat
.
dims
()[
0
];
template
<
typename
T
,
class
CodeTable
,
class
Op
>
size_t
width
=
tmat
.
dims
()[
1
];
static
void
AddByBitCodeT
(
Op
op
,
CodeTable
code_table
,
const
int64_t
*
codes
,
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
const
framework
::
Tensor
&
tmat
,
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
const
framework
::
Tensor
&
vec
)
{
size_t
num_sample
=
tmat
.
dims
()[
0
];
size_t
width
=
vec
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_sample
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
int
code_length
=
code
.
get_length
();
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
size_t
index
=
code
.
calc_index
(
j
);
auto
t
=
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
tmat
.
data
<
T
>
()[
i
*
width
+
j
]
+=
vec
.
data
<
T
>
()[
index
];
auto
v
=
vec
.
data
<
T
>
()[
index
];
op
(
t
,
v
);
}
}
}
}
}
}
template
<
typename
T
,
class
CodeTable
>
template
<
typename
T
>
void
SubByBitCodeT
(
CodeTable
code_table
,
const
int64_t
*
codes
,
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
tmat
)
{
framework
::
Tensor
&
vec
)
{
// size_t max_code_length = code_table.get_max_code_length(
);
SimpleCodeTable
code_table
(
num_classes_
);
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
batch_size
=
tmat
.
dims
()[
0
];
size_t
o_
width
=
tmat
.
dims
()[
1
];
size_t
width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
int
code_length
=
code
.
get_length
();
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
size_t
index
=
code
.
calc_index
(
j
);
tmat
.
data
<
T
>
()[
i
*
o_width
+
j
]
-=
1
;
vec
.
data
<
T
>
()[
index
]
+=
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
}
}
}
}
}
}
}
template
<
typename
T
,
class
CodeTable
>
template
<
typename
T
>
void
SumByBitCodeT
(
CodeTable
code_table
,
const
int64_t
*
codes
,
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
framework
::
Tensor
&
sum
,
T
scale_sum
)
{
const
T
&
scale_sum
)
{
SimpleCodeTable
code_table
(
num_classes_
);
// size_t max_code_length = code_table.get_max_code_length();
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
size_t
o_width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
static_cast
<
T
>
(
0.0
);
T
sm
=
static_cast
<
T
>
(
0.0
);
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
int
code_length
=
code
.
get_length
();
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
if
(
code
.
calc_bit
(
j
))
{
...
@@ -106,116 +97,99 @@ void SumByBitCodeT(CodeTable code_table, const int64_t* codes,
...
@@ -106,116 +97,99 @@ void SumByBitCodeT(CodeTable code_table, const int64_t* codes,
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Add
(
size_t
num_classes
,
const
int64_t
*
codes
,
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
)
{
auto
op
=
[](
T
&
t
,
const
T
&
v
)
{
t
+=
v
;
};
AddByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
vec
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
)
{
auto
op
=
[](
T
&
t
,
T
&
v
)
{
v
+=
t
;
};
AddByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
vec
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
T
scale_sum
)
{
SumByBitCodeT
<
T
>
(
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
sum
,
scale_sum
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
const
framework
::
Tensor
&
input
)
{
SimpleCodeTable
code_table
(
num_classes_
);
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
2
];
auto
tmat_p
=
tmat
.
data
<
T
>
();
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_p
=
weight
.
data
<
T
>
();
auto
weight_value
=
weight
.
data
<
T
>
();
auto
input_p
=
input
.
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
auto
code_table
=
SimpleCodeTable
(
num_classes
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
int
code_length
=
code
.
get_length
();
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
size_t
index
=
code
.
calc_index
(
j
);
T
sum
=
static_cast
<
T
>
(
0.0
);
T
sum
=
static_cast
<
T
>
(
0.0
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
sum
+=
sum
+=
weight_value
[
weight_width
*
index
+
k
]
*
weight_p
[
weight_width
*
index
+
k
]
*
input_p
[
input_width
*
i
+
k
];
input_value
[
input_width
*
i
+
k
];
}
}
tmat_
p
[
i
*
tmat_width
+
j
]
+=
sum
;
tmat_
value
[
i
*
tmat_width
+
j
]
+=
sum
;
}
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
size_t
num_classes
,
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
const
framework
::
Tensor
&
input
)
{
SimpleCodeTable
code_table
(
num_classes_
);
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_p
=
tmat
.
data
<
T
>
();
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_p
=
weight
.
data
<
T
>
();
auto
weight_value
=
weight
.
data
<
T
>
();
auto
input_p
=
input
.
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
auto
code_table
=
SimpleCodeTable
(
num_classes
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
int
code_length
=
code
.
get_length
();
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
size_t
index
=
code
.
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
weight_
p
[
weight_width
*
index
*
k
]
+=
weight_
value
[
weight_width
*
index
*
k
]
+=
tmat_
p
[
i
*
weight_width
*
j
]
*
input_p
[
input_width
*
i
+
k
];
tmat_
value
[
i
*
weight_width
*
j
]
*
input_value
[
input_width
*
i
+
k
];
}
}
}
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
size_t
num_classes
,
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
)
{
framework
::
Tensor
&
input
)
{
SimpleCodeTable
code_table
(
num_classes_
);
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_p
=
tmat
.
data
<
T
>
();
auto
tmat_value
=
tmat
.
data
<
T
>
();
auto
weight_p
=
weight
.
data
<
T
>
();
auto
weight_value
=
weight
.
data
<
T
>
();
auto
input_p
=
input
.
data
<
T
>
();
auto
input_value
=
input
.
data
<
T
>
();
auto
code_table
=
SimpleCodeTable
(
num_classes
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
int
code_length
=
code
.
get_length
();
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
size_t
index
=
code
.
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
input_p
[
weight_width
*
index
*
k
]
+=
input_value
[
weight_width
*
index
*
k
]
+=
tmat_p
[
i
*
weight_width
*
j
]
*
weight_p
[
weight_width
*
i
+
k
];
tmat_value
[
i
*
weight_width
*
j
]
*
weight_value
[
weight_width
*
i
+
k
];
}
}
}
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
size_t
num_classes
,
const
int64_t
*
codes
,
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
framework
::
Tensor
&
tmat
)
{
framework
::
Tensor
&
tmat
)
{
SimpleCodeTable
code_table
(
num_classes_
);
SubByBitCodeT
<
T
>
(
SimpleCodeTable
(
num_classes
),
codes
,
tmat
);
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
ids_
[
i
]));
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
tmat
.
data
<
T
>
()[
i
*
o_width
+
j
]
-=
1
;
}
}
}
}
}
template
class
MatrixBitCodeFunctor
<
float
>;
template
class
MatrixBitCodeFunctor
<
float
>;
...
...
paddle/operators/math/matrix_bit_code.h
浏览文件 @
80ce7edb
...
@@ -63,46 +63,45 @@ struct SimpleCodeTable {
...
@@ -63,46 +63,45 @@ struct SimpleCodeTable {
template
<
typename
T
>
template
<
typename
T
>
class
MatrixBitCodeFunctor
{
class
MatrixBitCodeFunctor
{
public:
public:
explicit
MatrixBitCodeFunctor
(
size_t
num_classes
,
const
int64_t
*
ids
)
:
num_classes_
(
num_classes
),
ids_
(
ids
)
{}
/* For j < code_length
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
tmat(i, j) += vec(0, index(i, j))
*/
*/
void
Add
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
void
Add
(
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
);
const
framework
::
Tensor
&
vec
);
/* For j < code_length
/* For j < code_length
vec(0, index(i, j)) += tmat(i, j)
vec(0, index(i, j)) += tmat(i, j)
*/
*/
void
AddGrad
(
size_t
num_classes
,
const
int64_t
*
codes
,
void
AddGrad
(
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
);
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
);
/* For j < code_length
/* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/
*/
void
Sum
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
void
Sum
(
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
T
scale_sum
);
framework
::
Tensor
&
sum
,
T
scale_sum
);
/* For j < code_length
/* For j < code_length
tmat(i, j) -= bit(i, j)
tmat(i, j) -= bit(i, j)
*/
*/
void
Sub
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
);
void
Sub
(
framework
::
Tensor
&
tmat
);
/* For j < code_length
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
*/
void
Mul
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tma
t
,
void
Mul
(
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weigh
t
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
const
framework
::
Tensor
&
input
);
/* For index(i, j) >= 0:
/* For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/
*/
void
MulGradWeight
(
size_t
num_classes
,
const
int64_t
*
codes
,
void
MulGradWeight
(
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
const
framework
::
Tensor
&
input
);
/* For j < code_length
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
*/
void
MulGradError
(
size_t
num_classes
,
const
int64_t
*
codes
,
void
MulGradError
(
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
);
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
);
size_t
num_classes_
;
const
int64_t
*
ids_
;
};
};
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
python/paddle/v2/fluid/tests/op_test.py
浏览文件 @
80ce7edb
...
@@ -49,6 +49,7 @@ def create_op(scope, op_type, inputs, outputs, attrs):
...
@@ -49,6 +49,7 @@ def create_op(scope, op_type, inputs, outputs, attrs):
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
if
attr_name
in
attrs
:
if
attr_name
in
attrs
:
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
return
Operator
(
op_type
,
**
kwargs
)
return
Operator
(
op_type
,
**
kwargs
)
...
@@ -104,8 +105,6 @@ def get_numeric_gradient(scope,
...
@@ -104,8 +105,6 @@ def get_numeric_gradient(scope,
tensor_to_check_dtype
=
np
.
float32
tensor_to_check_dtype
=
np
.
float32
elif
tensor_to_check_dtype
==
core
.
DataType
.
FP64
:
elif
tensor_to_check_dtype
==
core
.
DataType
.
FP64
:
tensor_to_check_dtype
=
np
.
float64
tensor_to_check_dtype
=
np
.
float64
elif
tensor_to_check_dtype
==
core
.
DataType
.
INT64
:
tensor_to_check_dtype
=
np
.
int64
else
:
else
:
raise
ValueError
(
"Not supported data type "
+
str
(
raise
ValueError
(
"Not supported data type "
+
str
(
tensor_to_check_dtype
))
tensor_to_check_dtype
))
...
@@ -115,8 +114,6 @@ def get_numeric_gradient(scope,
...
@@ -115,8 +114,6 @@ def get_numeric_gradient(scope,
def
__get_elem__
(
tensor
,
i
):
def
__get_elem__
(
tensor
,
i
):
if
tensor_to_check_dtype
==
np
.
float32
:
if
tensor_to_check_dtype
==
np
.
float32
:
return
tensor
.
get_float_element
(
i
)
return
tensor
.
get_float_element
(
i
)
elif
tensor_to_check_dtype
==
np
.
int64
:
return
tensor
.
get_int64_element
(
i
)
else
:
else
:
return
tensor
.
get_double_element
(
i
)
return
tensor
.
get_double_element
(
i
)
...
@@ -356,11 +353,13 @@ class OpTest(unittest.TestCase):
...
@@ -356,11 +353,13 @@ class OpTest(unittest.TestCase):
op_attrs
=
self
.
attrs
if
hasattr
(
self
,
"attrs"
)
else
dict
()
op_attrs
=
self
.
attrs
if
hasattr
(
self
,
"attrs"
)
else
dict
()
self
.
op
=
create_op
(
self
.
scope
,
self
.
op_type
,
op_inputs
,
op_outputs
,
self
.
op
=
create_op
(
self
.
scope
,
self
.
op_type
,
op_inputs
,
op_outputs
,
op_attrs
)
op_attrs
)
if
no_grad_set
is
None
:
if
no_grad_set
is
None
:
no_grad_set
=
set
()
no_grad_set
=
set
()
if
not
type
(
output_names
)
is
list
:
if
not
type
(
output_names
)
is
list
:
output_names
=
[
output_names
]
output_names
=
[
output_names
]
numeric_grads
=
user_defined_grads
or
[
numeric_grads
=
user_defined_grads
or
[
get_numeric_gradient
(
get_numeric_gradient
(
self
.
scope
,
self
.
scope
,
...
@@ -456,7 +455,9 @@ class OpTest(unittest.TestCase):
...
@@ -456,7 +455,9 @@ class OpTest(unittest.TestCase):
# infer variable type and infer shape in compile-time
# infer variable type and infer shape in compile-time
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
mean_inputs
=
map
(
block
.
var
,
output_names
)
mean_inputs
=
map
(
block
.
var
,
output_names
)
if
len
(
mean_inputs
)
==
1
:
if
len
(
mean_inputs
)
==
1
:
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
op
=
block
.
append_op
(
...
...
python/paddle/v2/fluid/tests/test_hsigmoid_op.py
浏览文件 @
80ce7edb
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
from
op_test
import
OpTest
from
op_test
import
OpTest
import
math
def
find_latest_set
(
num
):
return
1
+
int
(
math
.
floor
(
math
.
log
(
num
,
2
)))
class
CodeTable
(
object
):
def
__init__
(
self
,
num_classes
,
code
):
self
.
c
=
num_classes
+
code
def
cal_index
(
self
,
bit
):
return
(
self
.
c
>>
(
bit
+
1
))
-
1
def
get_length
(
self
):
return
find_latest_set
(
self
.
c
)
-
1
def
cal_bit
(
self
,
bit
):
return
self
.
c
&
(
1
<<
bit
)
def
hsigmoid
(
x
,
w
,
ids
,
bias
,
num_classes
):
# code length =
# initialize pre out with dims={batch_size, code_length}
batch_size
=
x
.
shape
[
0
]
code_length
=
find_latest_set
(
num_classes
-
1
)
code_table
=
[
0
for
_
in
range
(
code_length
)]
pre_output
=
np
.
zeros
((
batch_size
,
code_length
))
pre_sum
=
np
.
zeros
((
batch_size
,
1
))
out
=
np
.
zeros
((
batch_size
,
1
)).
astype
(
"float32"
)
# pre_out += code(bias)
for
i
in
xrange
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
ids
[
i
])
length
=
code_table
.
get_length
()
for
j
in
xrange
(
length
):
idx
=
code_table
.
cal_index
(
j
)
pre_output
[
i
][
j
]
+=
bias
[
0
][
idx
]
# pre_out += code(w) * x
for
i
in
xrange
(
batch_size
):
for
j
in
xrange
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
ids
[
j
])
length
=
code_table
.
get_length
()
for
k
in
xrange
(
length
):
idx
=
code_table
.
cal_index
(
k
)
sum
=
0.0
for
l
in
xrange
(
x
.
shape
[
1
]):
sum
+=
w
[
i
][
idx
][
l
]
*
x
[
j
][
l
]
pre_output
[
j
][
k
]
+=
sum
# clip[-40.0, 40.0]
np
.
clip
(
pre_output
,
-
40.0
,
40.0
)
# out(i, 0) = \sum_j bit(i, j) * preout(i, j)
for
i
in
xrange
(
batch_size
):
code_table
=
CodeTable
(
num_classes
,
ids
[
i
])
length
=
code_table
.
get_length
()
sum
=
0.0
for
j
in
xrange
(
length
):
if
code_table
.
cal_bit
(
j
):
sum
+=
pre_output
[
i
][
j
]
out
[
i
]
=
-
1.0
*
sum
# soft relu
np
.
clip
(
pre_output
,
-
40.0
,
40.0
)
pre_output
=
np
.
log
(
1
+
np
.
exp
(
pre_output
))
pre_sum
=
pre_output
.
sum
(
1
).
reshape
((
batch_size
,
1
))
out
+=
pre_sum
return
out
class
TestHSigmoidOp
(
OpTest
):
class
TestHSigmoidOp
(
OpTest
):
...
@@ -16,9 +81,8 @@ class TestHSigmoidOp(OpTest):
...
@@ -16,9 +81,8 @@ class TestHSigmoidOp(OpTest):
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
)).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'Ids'
:
ids
,
'Bias'
:
bias
}
self
.
inputs
=
{
'X'
:
x
,
'W'
:
w
,
'Ids'
:
ids
,
'Bias'
:
bias
}
self
.
attrs
=
{
'num_classes'
:
num_classes
}
self
.
attrs
=
{
'num_classes'
:
num_classes
}
self
.
outputs
=
{
out
=
hsigmoid
(
x
,
w
,
ids
,
bias
,
num_classes
)
'Out'
:
np
.
random
.
random
((
batch_size
,
1
)).
astype
(
"float32"
)
self
.
outputs
=
{
'Out'
:
out
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录