Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
fb9c08f0
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fb9c08f0
编写于
12月 25, 2017
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make forward work
上级
28630dd8
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
285 addition
and
223 deletion
+285
-223
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+1
-1
paddle/operators/hierarchical_sigmoid_op.cc
paddle/operators/hierarchical_sigmoid_op.cc
+40
-10
paddle/operators/hierarchical_sigmoid_op.h
paddle/operators/hierarchical_sigmoid_op.h
+52
-43
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+1
-1
paddle/operators/math/math_function.cc
paddle/operators/math/math_function.cc
+6
-6
paddle/operators/math/math_function.h
paddle/operators/math/math_function.h
+3
-3
paddle/operators/math/math_function_impl.h
paddle/operators/math/math_function_impl.h
+8
-8
paddle/operators/math/matrix_bit_code.cc
paddle/operators/math/matrix_bit_code.cc
+116
-95
paddle/operators/math/matrix_bit_code.h
paddle/operators/math/matrix_bit_code.h
+40
-46
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+2
-0
python/paddle/v2/fluid/tests/op_test.py
python/paddle/v2/fluid/tests/op_test.py
+6
-5
python/paddle/v2/fluid/tests/test_hsigmoid_op.py
python/paddle/v2/fluid/tests/test_hsigmoid_op.py
+10
-5
未找到文件。
paddle/operators/CMakeLists.txt
浏览文件 @
fb9c08f0
...
...
@@ -207,7 +207,7 @@ set(DEPS_OPS
gru_op
adagrad_op
sgd_op
hierarchical_sigmoid_op
)
hierarchical_sigmoid_op
save_op
load_op
send_op
...
...
paddle/operators/hierarchical_sigmoid_op.cc
浏览文件 @
fb9c08f0
...
...
@@ -60,19 +60,48 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
h
asInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
H
asInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
),
"Input(Label) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Parameters"
),
"Input(Parameters)"
"should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null."
);
const
int64_t
batch_size
=
ctx
->
GetInputDim
(
"X"
)[
0
];
std
::
vector
<
int64_t
>
output_shape
({
batch_size
,
num_classes_
-
1
});
std
::
vector
<
int64_t
>
output_shape
({
batch_size
,
1
});
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
}
protected:
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
());
}
};
class
HierarchicalSigmoidGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Parameters"
),
"Input(Parameters)"
"should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
),
"Input(Label)"
"should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Parameters"
)),
"Input(Parameters@Grad should not be null.)"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)));
}
protected:
framework
::
OpKernelType
GetKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
());
}
};
class
HierarchicalSigmoidOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -98,7 +127,8 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Out"
,
"(Tensor, required) The output of hierarchical sigmoid operator."
"the shape is [N, 1]"
);
AddAttr
<
int
>
(
"num_classes"
,
"(int, required)"
,
"The number of classes"
);
AddAttr
<
int
>
(
"num_classes"
,
"(int, required)"
,
"The number of classes"
)
.
SetDefault
(
2
);
AddComment
(
R"DOC(
The hierarchical sigmoid operator organize the classes into a binary tree.
At each node, a sigmoid function is used to caculate the probability of
...
...
@@ -116,9 +146,9 @@ namespace ops = paddle::operators;
REGISTER_OP
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOp
,
ops
::
HierarchicalSigmoidOpMaker
,
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOp
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid
,
ops
::
HierarchicalSigmoidOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
hierarchical_sigmoid_grad
,
ops
::
HierarchicalSigmoidGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle/operators/hierarchical_sigmoid_op.h
浏览文件 @
fb9c08f0
...
...
@@ -14,8 +14,10 @@ limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/clip_op.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/matrix_bit_code.h"
#include "paddle/platform/transform.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -23,60 +25,64 @@ namespace operators {
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
using
platform
::
Transform
;
template
<
typename
Place
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
HierarchicalSigmoidOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Parameter
"
);
auto
*
param
s
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Parameters
"
);
auto
*
label
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Label"
);
auto
*
bias
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Bias"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
size_t
num_classes
=
static_cast
<
size_t
>
(
ctx
.
Attr
<
int
>
(
"num_classes"
));
framework
::
Tensor
sum
;
int64_t
code_length
=
math
::
FindLastSet
(
num_classes
-
1
);
int64_t
batch_size
=
in
->
dims
()[
0
];
auto
*
ids
=
label
->
data
<
int64_t
>
();
framework
::
Tensor
pre_out
;
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
auto
&
device_ctx
=
ctx
.
device_context
();
math
::
ColwiseSum
<
Place
,
T
>
col_sum
;
math
::
RowwiseSum
<
Place
,
T
>
row_sum
;
framework
::
Tensor
sum
;
auto
pre_out_data
=
pre_out
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
batch_size
,
code_length
}),
ctx
.
GetPlace
());
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
int64_t
batch_size
=
ins
[
0
]
->
dims
()[
0
];
int64_t
code_length
=
math
::
FindLastSet
(
num_classes
-
1
);
std
::
vector
<
int64_t
>
pre_out_dims
({
batch_size
,
code_length
});
pre_out
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
pre_out_dims
),
ctx
.
GetPlace
());
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
RowwiseSum
<
DeviceContext
,
T
>
row_sum
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
;
std
::
vector
<
int64_t
>
sum_dims
({
batch_size
,
1UL
});
sum
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
sum_dims
),
ctx
.
GetPlace
());
auto
sum_mat
=
EigenMatrix
<
T
>::
From
(
sum
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
out_mat
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
if
(
bias
)
{
math
::
AddByBitCode
<
T
>
(
num_classes
,
*
label
,
pre_out
,
*
bias
);
bit_code
.
Add
(
num_classes
,
ids
,
pre_out
,
*
bias
);
}
for
(
size_t
i
=
0
;
i
<
in
.
dims
()[
0
];
++
i
)
{
math
::
MulByBitCode
<
T
>
(
num_classes
,
*
label
,
pre_out
,
*
params
->
Slice
(
i
,
i
+
1
),
*
in
->
Slice
(
i
,
i
+
1
));
for
(
int
i
=
0
;
i
<
in
->
dims
()[
0
];
++
i
)
{
bit_code
.
Mul
(
num_classes
,
ids
,
pre_out
,
params
->
Slice
(
i
,
i
+
1
),
in
->
Slice
(
i
,
i
+
1
));
}
// clip the matrix with (-40, 40)
pre_out_mat
.
device
(
place
)
=
pre_out_mat
.
abs
().
cwiseMax
(
static_cast
<
T
>
(
40.0
));
math
::
SumByBitCode
<
T
>
(
num_classes
,
*
label
,
*
out
,
pre_out
,
static_cast
<
T
>
(
-
1
));
Transform
<
DeviceContext
>
trans
;
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
pre_out_data
+
pre_out
.
numel
(),
pre_out_data
,
ClipFunctor
<
T
>
(
static_cast
<
T
>
(
-
40.0
),
static_cast
<
T
>
(
40.0
)
));
bit_code
.
Sum
(
num_classes
,
ids
,
pre_out
,
*
out
,
static_cast
<
T
>
(
-
1
));
// softrelu with threshold is 40.0
pre_out_mat
.
device
(
place
)
=
pre_out_mat
.
abs
().
cwiseMax
(
static_cast
<
T
>
(
40.0
));
trans
(
ctx
.
template
device_context
<
DeviceContext
>(),
pre_out_data
,
pre_out_data
+
pre_out
.
numel
(),
pre_out_data
,
ClipFunctor
<
T
>
(
static_cast
<
T
>
(
-
40.0
),
static_cast
<
T
>
(
40.0
)));
pre_out_mat
.
device
(
place
)
=
(
static_cast
<
T
>
(
1.0
)
+
pre_out_mat
.
exp
()).
log
();
row_sum
(
device_ctx
,
pre_out
,
&
sum
);
col_sum
(
device_ctx
,
*
out
,
&
sum
)
;
out_mat
.
device
(
place
)
=
sum_mat
+
out_mat
;
}
};
template
<
typename
Place
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
HierarchicalSigmoidGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -85,37 +91,40 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto
*
params
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Parameters"
));
auto
*
bias
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
label
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Label"
));
auto
*
label
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Label"
);
size_t
num_classes
=
static_cast
<
size_t
>
(
ctx
.
Attr
<
int
>
(
"num_classes"
));
int64_t
code_length
=
math
::
FindLastSet
(
num_classes
-
1
);
int64_t
batch_size
=
in
->
dims
()[
0
];
framework
::
Tensor
pre_out
;
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
auto
&
dev_ctx
=
ctx
.
device_context
(
);
int64_t
batch_size
=
in_grad
.
dims
()[
0
]
;
int64_t
code_length
=
math
::
FindLastSet
(
num_classes
-
1
);
pre_out
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
batch_size
,
code_length
}),
ctx
.
GetPlace
()
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
()
;
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>(
);
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
pre_out
);
auto
*
ids
=
label
->
data
<
int64_t
>
();
// init pre_out matrix with {1.0}
std
::
vector
<
int64_t
>
pre_out_dims
({
batch_size
,
code_length
});
pre_out
.
mutable_data
<
T
>
(
framework
::
make_ddim
(
pre_out_dims
),
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set
;
set
(
dev_ctx
,
&
pre_out
,
static_cast
<
T
>
(
1.0
));
math
::
SetConstant
<
DeviceContext
,
T
>
one
;
math
::
MatrixBitCodeFunctor
<
T
>
bit_code
;
one
(
device_ctx
,
&
pre_out
,
static_cast
<
T
>
(
1.0
));
// softrelu derivative
pre_out_mat
.
device
(
place
)
=
pre_out_mat
*
(
static_cast
<
T
>
(
1.0
)
-
static_cast
<
T
>
(
1.0
)
/
pre_out_mat
);
math
::
SubByBitCode
<
T
>
(
num_classes
,
*
label
,
pre_out
);
bit_code
.
Sub
(
num_classes
,
ids
,
pre_out
);
if
(
bias
)
{
math
::
AddByBitCodeGrad
<
T
>
(
num_classes
,
*
label
,
pre_out
,
*
bias
);
bit_code
.
AddGrad
(
num_classes
,
ids
,
pre_out
,
*
bias
);
}
for
(
size_t
i
=
0
;
i
<
in_grad
.
dims
()[
0
];
++
i
)
{
math
::
MulByBitCodeGradWeight
<
T
>
(
num_classes
,
*
label
,
pre_out
,
*
params
[
i
],
*
in
[
i
]
->
Slice
(
i
,
i
+
1
));
math
::
MulByBitCodeGradError
<
T
>
(
num_classes
,
*
label
,
pre_out
,
*
params
[
i
],
*
ins_grad
[
i
]
->
Slice
(
i
,
i
+
1
));
for
(
int
i
=
0
;
i
<
in_grad
->
dims
()[
0
];
++
i
)
{
auto
p_sliced
=
params
->
Slice
(
i
,
i
+
1
);
auto
in_sliced
=
in
->
Slice
(
i
,
i
+
1
);
auto
in_grad_sliced
=
in_grad
->
Slice
(
i
,
i
+
1
);
bit_code
.
MulGradWeight
(
num_classes
,
ids
,
pre_out
,
p_sliced
,
in_sliced
);
bit_code
.
MulGradError
(
num_classes
,
ids
,
pre_out
,
p_sliced
,
in_grad_sliced
);
}
}
};
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
fb9c08f0
...
...
@@ -27,7 +27,7 @@ else()
cc_library
(
context_project SRCS context_project.cc DEPS device_context math_function
)
cc_library
(
sequence2batch SRCS sequence2batch.cc DEPS device_context
)
cc_library
(
lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions
)
cc_library
(
matrix_bit_code SRCS matrix_bit_code.cc
)
cc_library
(
matrix_bit_code SRCS matrix_bit_code.cc
DEPS device_context
)
cc_library
(
maxouting SRCS maxouting.cc DEPS device_context
)
cc_library
(
unpooling SRCS unpooling.cc DEPS device_context
)
cc_library
(
gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function
)
...
...
paddle/operators/math/math_function.cc
浏览文件 @
fb9c08f0
...
...
@@ -302,12 +302,12 @@ void set_constant(const platform::DeviceContext& context,
#endif
}
template
struct
RowwiseAdd
<
platform
::
CPU
Place
,
float
>;
template
struct
RowwiseAdd
<
platform
::
CPU
Place
,
double
>;
template
struct
ColwiseSum
<
platform
::
CPU
Place
,
float
>;
template
struct
ColwiseSum
<
platform
::
CPU
Place
,
double
>;
template
struct
RowwiseSum
<
platform
::
CPU
Place
,
float
>;
template
struct
RowwiseSum
<
platform
::
CPU
Place
,
double
>;
template
struct
RowwiseAdd
<
platform
::
CPU
DeviceContext
,
float
>;
template
struct
RowwiseAdd
<
platform
::
CPU
DeviceContext
,
double
>;
template
struct
ColwiseSum
<
platform
::
CPU
DeviceContext
,
float
>;
template
struct
ColwiseSum
<
platform
::
CPU
DeviceContext
,
double
>;
template
struct
RowwiseSum
<
platform
::
CPU
DeviceContext
,
float
>;
template
struct
RowwiseSum
<
platform
::
CPU
DeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/operators/math/math_function.h
浏览文件 @
fb9c08f0
...
...
@@ -128,10 +128,10 @@ struct ColwiseSum {
framework
::
Tensor
*
vec
);
};
template
<
typename
Place
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
RowwiseSum
{
void
operator
()(
const
platform
::
DeviceContext
&
contex
t
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
vec
);
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
inpu
t
,
framework
::
Tensor
*
vec
);
};
}
// namespace math
...
...
paddle/operators/math/math_function_impl.h
浏览文件 @
fb9c08f0
...
...
@@ -79,19 +79,19 @@ void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
in
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}})).
reshape
(
shape
);
}
template
<
typename
Place
,
typename
T
>
void
RowwiseSum
<
Place
,
T
>::
operator
()(
const
platform
::
DeviceContext
&
context
,
template
<
typename
DeviceContext
,
typename
T
>
void
RowwiseSum
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
vector
)
{
auto
in_dims
=
input
.
dims
();
auto
size
=
input
.
numel
()
/
in_dims
[
1
];
PADDLE_ENFORCE_EQ
(
vector
->
numel
(),
size
);
auto
in
=
framework
::
EigenMatrix
<
T
>::
From
(
input
);
auto
vec
=
framework
::
EigenMatrix
<
T
>::
From
(
*
vector
);
auto
in
=
framework
::
EigenMatrix
<
T
,
Eigen
::
ColMajor
>::
From
(
input
);
auto
vec
=
framework
::
EigenMatrix
<
T
,
Eigen
::
ColMajor
>::
From
(
*
vector
);
Eigen
::
array
<
int
,
2
>
shape
({{
static_cast
<
int
>
(
size
),
1
}});
vec
.
reshape
(
shape
).
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
in
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}})).
reshape
(
shape
);
vec
.
reshape
(
shape
).
device
(
*
context
.
eigen_device
())
=
in
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
1
}})).
reshape
(
shape
);
}
}
// namespace math
}
// namespace operators
...
...
paddle/operators/math/matrix_bit_code.cc
浏览文件 @
fb9c08f0
...
...
@@ -50,50 +50,52 @@ namespace math {
for j < codeLength:
op(a(i, j), b(0, index(i, j)))
*/
template
<
class
CodeTable
,
class
Op
,
typename
T
>
static
void
AddByBitCodeT
(
Op
op
,
CodeTable
code_table
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
template
<
typename
T
,
class
CodeTable
,
class
Op
>
static
void
AddByBitCodeT
(
Op
op
,
CodeTable
code_table
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
)
{
size_t
num_classes
=
code_table
.
size
();
size_t
max_code_length
=
code_table
.
get_max_code_length
();
size_t
num_sample
=
tmat
.
dims
()[
0
];
size_t
width
=
vec
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_sample
;
++
i
)
{
auto
code
=
code_table
(
codes
.
data
<
T
>
()[
i
]
);
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
])
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
+
j
)
{
for
(
int
j
=
0
;
j
<
code_length
;
+
+
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
op
(
tmat
.
data
<
T
>
()[
i
*
width
+
j
],
vec
.
data
<
T
>
()[
index
]);
auto
t
=
tmat
.
data
<
T
>
()[
i
*
width
+
j
];
auto
v
=
vec
.
data
<
T
>
()[
index
];
op
(
t
,
v
);
}
}
}
template
<
typename
T
>
void
AddByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
)
{
auto
op
=
[](
T
&
t
,
T
&
v
)
{
t
+=
v
;
};
AddByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
vec
);
}
template
<
typename
T
>
void
AddByBitCodeGrad
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
)
{
auto
op
=
[](
T
&
t
,
T
&
v
)
{
v
+=
t
;
};
AddByBitCode
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
vec
);
template
<
typename
T
,
class
CodeTable
>
void
SubByBitCodeT
(
CodeTable
code_table
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
)
{
// size_t max_code_length = code_table.get_max_code_length();
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
tmat
.
data
<
T
>
()[
i
*
o_width
+
j
]
-=
1
;
}
}
}
}
template
<
class
CodeTable
,
typename
T
>
void
SumByBitCodeT
(
CodeTable
code_table
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
sum
,
template
<
typename
T
,
class
CodeTable
>
void
SumByBitCodeT
(
CodeTable
code_table
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
const
T
&
scale_sum
)
{
size_t
max_code_length
=
code_table
.
get_max_code_length
();
//
size_t max_code_length = code_table.get_max_code_length();
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
T
sm
=
0
;
auto
code
=
code_table
(
codes
.
data
<
T
>
()[
i
]
);
T
sm
=
static_cast
<
T
>
(
0.0
)
;
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
])
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
...
...
@@ -103,105 +105,124 @@ void SumByBitCodeT(CodeTable code_table, const framework::Tensor& codes,
sum
.
data
<
T
>
()[
i
]
=
scale_sum
*
sm
;
}
}
/* For j < codeLength:
sum(i, 0) = \sum_j bit(i, j) * input(i, j)
*/
template
<
typename
T
>
void
SumByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
T
scale_sum
)
{
SumByBitCodeT
(
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
scale_sum
);
void
MatrixBitCodeFunctor
<
T
>::
Add
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
)
{
auto
op
=
[](
T
&
t
,
const
T
&
v
)
{
t
+=
v
;
};
AddByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
vec
);
}
template
<
class
Op
,
class
CodeTable
,
typename
T
>
void
MulByBitCodeT
(
Op
op
,
CodeTable
code_table
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
)
{
size_t
num_classes
=
code_table
.
size
();
size_t
max_code_length
=
code_table
.
get_max_code_length
();
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_dim
=
input
.
dims
()[
1
];
size_t
o_width
=
tmat
.
dims
()[
1
];
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
AddGrad
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
)
{
auto
op
=
[](
T
&
t
,
T
&
v
)
{
v
+=
t
;
};
AddByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
vec
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Sum
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
T
scale_sum
)
{
SumByBitCodeT
<
T
>
(
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
sum
,
scale_sum
);
}
template
<
typename
T
>
void
MatrixBitCodeFunctor
<
T
>::
Mul
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
tmat_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_p
=
tmat
.
data
<
T
>
();
auto
weight_p
=
weight
.
data
<
T
>
();
auto
input_p
=
input
.
data
<
T
>
();
auto
code_table
=
SimpleCodeTable
(
num_classes
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
codes
.
data
<
T
>
()[
i
]
);
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
])
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
op
(
tmat
.
data
<
T
>
()[
i
*
o_width
+
j
],
weight
.
data
<
T
>
()
+
index
*
weight
.
dims
()[
1
],
input
.
data
<
T
>
()
+
i
*
input
.
dims
()[
1
],
input_dim
);
T
sum
=
static_cast
<
T
>
(
0.0
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
sum
+=
weight_p
[
weight_width
*
index
+
k
]
*
input_p
[
input_width
*
i
+
k
];
}
std
::
cout
<<
sum
<<
std
::
endl
;
tmat_p
[
i
*
tmat_width
+
j
]
+=
sum
;
}
}
}
template
<
typename
T
>
void
MulByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
auto
op
=
[](
T
&
t
,
const
T
*
weight_row
,
const
T
*
input_row
,
size_t
input_dim
)
{
T
sum
=
0
;
for
(
size_t
k
=
0
;
k
<
input_dim
;
++
k
)
{
sum
+=
weight_row
[
k
]
*
input_row
[
k
];
}
t
+=
sum
;
};
MulByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
weight
,
input
);
}
template
<
typename
T
>
void
MulByBitCodeGradWeight
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
void
MatrixBitCodeFunctor
<
T
>::
MulGradWeight
(
size_t
num_classes
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
)
{
auto
op
=
[](
const
T
t
,
T
*
weight_row
,
const
T
*
input_row
,
size_t
input_dim
)
{
for
(
size_t
k
=
0
;
k
<
input_dim
;
++
k
)
{
weight_row
[
k
]
+=
t
*
input_row
[
k
];
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_p
=
tmat
.
data
<
T
>
();
auto
weight_p
=
weight
.
data
<
T
>
();
auto
input_p
=
input
.
data
<
T
>
();
auto
code_table
=
SimpleCodeTable
(
num_classes
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
]));
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
size_t
index
=
code
.
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
weight_p
[
weight_width
*
index
*
k
]
+=
tmat_p
[
i
*
weight_width
*
j
]
*
input_p
[
input_width
*
i
+
k
];
}
}
}
};
MulByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
weight
,
input
);
}
template
<
typename
T
>
void
MulByBitCodeGradError
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
void
MatrixBitCodeFunctor
<
T
>::
MulGradError
(
size_t
num_classes
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
)
{
auto
op
=
[](
const
T
t
,
const
T
*
weight_row
,
T
*
input_row
,
size_t
input_dim
)
{
for
(
size_t
k
=
0
;
k
<
input_dim
;
++
k
)
{
input_row
[
k
]
+=
t
*
weight_row
[
k
];
}
};
MulByBitCodeT
<
T
>
(
op
,
SimpleCodeTable
(
num_classes
),
codes
,
tmat
,
weight
,
input
);
}
template
<
class
CodeTable
,
typename
T
>
void
SubByBitCodeT
(
CodeTable
code_table
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
)
{
size_t
max_code_length
=
code_table
.
get_max_code_length
();
size_t
num_samples
=
tmat
.
dims
()[
0
];
size_t
o_width
=
tmat
.
dims
()[
1
];
size_t
input_width
=
input
.
dims
()[
1
];
size_t
weight_width
=
weight
.
dims
()[
1
];
auto
tmat_p
=
tmat
.
data
<
T
>
();
auto
weight_p
=
weight
.
data
<
T
>
();
auto
input_p
=
input
.
data
<
T
>
();
auto
code_table
=
SimpleCodeTable
(
num_classes
);
for
(
size_t
i
=
0
;
i
<
num_samples
;
++
i
)
{
auto
code
=
code_table
(
codes
.
data
<
T
>
()[
i
]
);
auto
code
=
code_table
(
static_cast
<
size_t
>
(
codes
[
i
])
);
int
code_length
=
code
.
get_length
();
for
(
int
j
=
0
;
j
<
code_length
;
++
j
)
{
if
(
code
.
calc_bit
(
j
))
{
tmat
.
data
<
T
>
()[
i
*
o_width
+
j
]
-=
1
;
size_t
index
=
code
.
calc_index
(
j
);
for
(
size_t
k
=
0
;
k
<
input_width
;
++
k
)
{
input_p
[
weight_width
*
index
*
k
]
+=
tmat_p
[
i
*
weight_width
*
j
]
*
weight_p
[
weight_width
*
i
+
k
];
}
}
}
}
template
<
typename
T
>
void
SubByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
void
MatrixBitCodeFunctor
<
T
>::
Sub
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
)
{
SubByBitCodeT
<
T
>
(
SimpleCodeTable
(
num_classes
),
codes
,
tmat
);
}
template
class
MatrixBitCodeFunctor
<
float
>;
template
class
MatrixBitCodeFunctor
<
double
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/math/matrix_bit_code.h
浏览文件 @
fb9c08f0
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -59,57 +60,50 @@ struct SimpleCodeTable {
int
max_code_length_
;
};
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
*/
template
<
typename
T
>
void
AddByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
);
class
MatrixBitCodeFunctor
{
public:
/* For j < code_length
tmat(i, j) += vec(0, index(i, j))
*/
void
Add
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
vec
);
/* For j < code_length
/* For j < code_length
vec(0, index(i, j)) += tmat(i, j)
*/
template
<
typename
T
>
void
AddByBitCodeGrad
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
);
/* For j < code_length
*/
void
AddGrad
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
vec
);
/* For j < code_length
sum(i, 0) = \sum_j bit(i, j) * tmat(i, j)
*/
template
<
typename
T
>
void
SumByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
T
scale_sum
);
*/
void
Sum
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
sum
,
T
scale_sum
);
/* For j < code_length
/* For j < code_length
tmat(i, j) -= bit(i, j)
*/
void
Sub
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
);
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
template
<
typename
T
>
void
MulByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
*/
void
Mul
(
size_t
num_classes
,
const
int64_t
*
codes
,
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
/* For index(i, j) >= 0:
/* For index(i, j) >= 0:
weight.row(index(i, j)) += tmat(i, j) * input.row(i)
*/
template
<
typename
T
>
void
MulByBitCodeGradWeight
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
*/
void
MulGradWeight
(
size_t
num_classes
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
input
);
/* For j < code_length
/* For j < code_length
input.row(i) += tmat(i, j) * weight.row(index(i, j))
*/
template
<
typename
T
>
void
MulByBitCodeGradError
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
*/
void
MulGradError
(
size_t
num_classes
,
const
int64_t
*
codes
,
const
framework
::
Tensor
&
tmat
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
);
/* For j < code_length
tmat(i, j) -= bit(i, j)
*/
template
<
typename
T
>
void
SubByBitCode
(
size_t
num_classes
,
const
framework
::
Tensor
&
codes
,
framework
::
Tensor
&
tmat
);
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
&
input
);
};
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/pybind/pybind.cc
浏览文件 @
fb9c08f0
...
...
@@ -126,6 +126,8 @@ PYBIND11_PLUGIN(core) {
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"set_float_element"
,
TensorSetElement
<
float
>
)
.
def
(
"get_float_element"
,
TensorGetElement
<
float
>
)
.
def
(
"set_int64_element"
,
TensorSetElement
<
int64_t
>
)
.
def
(
"get_int64_element"
,
TensorGetElement
<
int64_t
>
)
.
def
(
"set_double_element"
,
TensorSetElement
<
double
>
)
.
def
(
"get_double_element"
,
TensorGetElement
<
double
>
)
.
def
(
"dtype"
,
[](
Tensor
&
self
)
{
return
ToDataType
(
self
.
type
());
});
...
...
python/paddle/v2/fluid/tests/op_test.py
浏览文件 @
fb9c08f0
...
...
@@ -49,7 +49,6 @@ def create_op(scope, op_type, inputs, outputs, attrs):
for
attr_name
in
Operator
.
get_op_attr_names
(
op_type
):
if
attr_name
in
attrs
:
kwargs
[
attr_name
]
=
attrs
[
attr_name
]
return
Operator
(
op_type
,
**
kwargs
)
...
...
@@ -107,6 +106,8 @@ def get_numeric_gradient(scope,
tensor_to_check_dtype
=
np
.
float32
elif
tensor_to_check_dtype
==
core
.
DataType
.
FP64
:
tensor_to_check_dtype
=
np
.
float64
elif
tensor_to_check_dtype
==
core
.
DataType
.
INT64
:
tensor_to_check_dtype
=
np
.
int64
else
:
raise
ValueError
(
"Not supported data type "
+
str
(
tensor_to_check_dtype
))
...
...
@@ -116,12 +117,16 @@ def get_numeric_gradient(scope,
def
__get_elem__
(
tensor
,
i
):
if
tensor_to_check_dtype
==
np
.
float32
:
return
tensor
.
get_float_element
(
i
)
elif
tensor_to_check_dtype
==
np
.
int64
:
return
tensor
.
get_int64_element
(
i
)
else
:
return
tensor
.
get_double_element
(
i
)
def
__set_elem__
(
tensor
,
i
,
e
):
if
tensor_to_check_dtype
==
np
.
float32
:
tensor
.
set_float_element
(
i
,
e
)
elif
tensor_to_check_dtype
==
np
.
int64
:
tensor
.
set_int64_element
(
i
,
e
)
else
:
tensor
.
set_double_element
(
i
,
e
)
...
...
@@ -355,13 +360,11 @@ class OpTest(unittest.TestCase):
op_attrs
=
self
.
attrs
if
hasattr
(
self
,
"attrs"
)
else
dict
()
self
.
op
=
create_op
(
self
.
scope
,
self
.
op_type
,
op_inputs
,
op_outputs
,
op_attrs
)
if
no_grad_set
is
None
:
no_grad_set
=
set
()
if
not
type
(
output_names
)
is
list
:
output_names
=
[
output_names
]
numeric_grads
=
user_defined_grads
or
[
get_numeric_gradient
(
self
.
scope
,
...
...
@@ -457,9 +460,7 @@ class OpTest(unittest.TestCase):
# infer variable type and infer shape in compile-time
op
.
desc
.
infer_var_type
(
block
.
desc
)
op
.
desc
.
infer_shape
(
block
.
desc
)
mean_inputs
=
map
(
block
.
var
,
output_names
)
if
len
(
mean_inputs
)
==
1
:
loss
=
block
.
create_var
(
dtype
=
mean_inputs
[
0
].
dtype
,
shape
=
[
1
])
op
=
block
.
append_op
(
...
...
python/paddle/v2/fluid/tests/test_hsigmoid_op.py
浏览文件 @
fb9c08f0
...
...
@@ -5,15 +5,15 @@ from op_test import OpTest
class
TestHSigmoidOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"hierarchical_sigmoid
_op
"
self
.
op_type
=
"hierarchical_sigmoid"
num_classes
=
6
embded_size
=
10
batch_size
=
5
x
=
np
.
random
.
random
((
batch_size
,
embded_size
)).
astype
(
"float32"
)
parameter
=
np
.
random
.
random
(
(
batch_size
,
num_classes
-
1
,
embded_size
)).
astype
(
"float32"
)
label
=
np
.
random
.
randint
(
0
,
num_classes
,
batch_size
)
.
astype
(
"int64"
)
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
))
label
=
np
.
random
.
randint
(
0
,
num_classes
,
batch_size
)
bias
=
np
.
random
.
random
((
1
,
num_classes
-
1
))
.
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x
,
'Parameters'
:
parameter
,
...
...
@@ -21,13 +21,18 @@ class TestHSigmoidOp(OpTest):
'Bias'
:
bias
}
self
.
attrs
=
{
'num_classes'
:
num_classes
}
self
.
outputs
=
{
'Out'
:
label
}
self
.
outputs
=
{
'Out'
:
np
.
random
.
random
((
batch_size
,
1
)).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'x0'
],
'Out'
)
self
.
check_grad
(
[
'X'
,
'Parameters'
,
'Label'
,
'Bias'
],
'Out'
,
no_grad_set
=
set
([
'Label'
]))
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录