Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
49aa2c04
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
49aa2c04
编写于
8月 15, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Implement GPU kernel for cross entropy operator.
上级
245f6225
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
120 addition
and
18 deletion
+120
-18
paddle/framework/pybind.cc
paddle/framework/pybind.cc
+1
-1
paddle/operators/cross_entropy_op.cc
paddle/operators/cross_entropy_op.cc
+6
-9
paddle/operators/cross_entropy_op.cu
paddle/operators/cross_entropy_op.cu
+103
-5
paddle/operators/cross_entropy_op.h
paddle/operators/cross_entropy_op.h
+9
-2
python/paddle/v2/framework/tests/test_cross_entropy_op.py
python/paddle/v2/framework/tests/test_cross_entropy_op.py
+1
-1
未找到文件。
paddle/framework/pybind.cc
浏览文件 @
49aa2c04
...
@@ -31,7 +31,7 @@ limitations under the License. */
...
@@ -31,7 +31,7 @@ limitations under the License. */
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
USE_OP
(
add_two
);
USE_OP
(
add_two
);
USE_
CPU_ONLY_
OP
(
onehot_cross_entropy
);
USE_OP
(
onehot_cross_entropy
);
USE_OP
(
sgd
);
USE_OP
(
sgd
);
USE_OP
(
mul
);
USE_OP
(
mul
);
USE_OP
(
mean
);
USE_OP
(
mean
);
...
...
paddle/operators/cross_entropy_op.cc
浏览文件 @
49aa2c04
...
@@ -39,11 +39,10 @@ class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel {
...
@@ -39,11 +39,10 @@ class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel {
protected:
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
auto
X_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
dX
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
// TODO(superjom) add enforce here after helper functions ready
dX
->
Resize
(
X
->
dims
());
X_grad
->
Resize
(
X
->
dims
());
}
}
};
};
...
@@ -70,9 +69,7 @@ namespace ops = paddle::operators;
...
@@ -70,9 +69,7 @@ namespace ops = paddle::operators;
REGISTER_OP
(
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOp
,
REGISTER_OP
(
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOp
,
ops
::
OnehotCrossEntropyOpMaker
,
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyOpMaker
,
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyGradientOp
);
ops
::
OnehotCrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
onehot_cross_entropy
,
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOpKernel
<
float
>
);
ops
::
OnehotCrossEntropyOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
onehot_cross_entropy_grad
,
REGISTER_OP_CPU_KERNEL
(
ops
::
OnehotCrossEntropyGradientOpKernel
<
float
>
);
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyGradientOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/cross_entropy_op.cu
浏览文件 @
49aa2c04
...
@@ -12,10 +12,108 @@
...
@@ -12,10 +12,108 @@
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/operators/cross_entropy_op.h"
#include "paddle/platform/assert.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
__global__
void
CrossEntropyKernel
(
T
*
Y
,
const
T
*
X
,
const
int
*
label
,
const
int
N
,
const
int
D
)
{
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
// CUDA_1D_KERNEL_LOOP(i, N) {
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
PADDLE_ASSERT
(
label
[
i
]
>=
0
&&
label
[
i
]
<
D
);
Y
[
i
]
=
-
log
(
X
[
i
*
D
+
label
[
i
]]);
}
}
template
<
typename
T
>
__global__
void
zero
(
T
*
X
,
const
int
N
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
X
[
i
]
=
0.0
;
}
}
template
<
typename
T
>
__global__
void
CrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
const
int
*
label
,
const
int
N
,
const
int
D
)
{
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
// CUDA_1D_KERNEL_LOOP(i, N) {
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
idx
=
i
*
D
+
label
[
i
];
dX
[
idx
]
=
-
dY
[
i
]
/
X
[
idx
];
}
}
template
<
typename
T
>
class
OnehotCrossEntropyOpCUDAKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
T
*
Xdata
=
X
->
data
<
T
>
();
const
int
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"label"
)
->
data
<
int
>
();
auto
Y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
Y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
Ydata
=
Y
->
data
<
T
>
();
int
N
=
X
->
dims
()[
0
];
int
D
=
X
->
dims
()[
1
];
int
block
=
512
;
int
grid
=
(
N
+
block
-
1
)
/
block
;
// TODO(qingqing) launch kernel on specified stream
// base on ExecutionContext.
CrossEntropyKernel
<
T
><<<
grid
,
block
>>>
(
Ydata
,
Xdata
,
label_data
,
N
,
D
);
}
};
template
<
typename
T
>
class
OnehotCrossEntropyGradientOpCUDAKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use GPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
dX
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
dY
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
label
=
ctx
.
Input
<
Tensor
>
(
"label"
);
auto
*
dXdata
=
dX
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
auto
*
dYdata
=
dY
->
template
data
<
T
>();
auto
*
Xdata
=
X
->
template
data
<
T
>();
auto
*
label_data
=
label
->
data
<
int
>
();
int
N
=
X
->
dims
()[
0
];
int
D
=
X
->
dims
()[
1
];
int
block
=
512
;
int
grid
=
(
N
*
D
+
block
-
1
)
/
block
;
// TODO(qingqing): make zero an common function.
zero
<
T
><<<
grid
,
block
>>>
(
dXdata
,
N
*
D
);
grid
=
(
N
+
block
-
1
)
/
block
;
// TODO(qingqing): launch kernel on specified stream
// base on ExecutionContext.
CrossEntropyGradientKernel
<
T
><<<
grid
,
block
>>>
(
dXdata
,
dYdata
,
Xdata
,
label_data
,
N
,
D
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_GPU_KERNEL
(
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy
,
onehot_cross_entropy
,
ops
::
OnehotCrossEntropyOpCUDAKernel
<
float
>
);
ops
::
OnehotCrossEntropyOpKernel
<
paddle
::
platform
::
GPUPlace
,
float
>
);
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy_grad
,
ops
::
OnehotCrossEntropyGradientOpCUDAKernel
<
float
>
);
paddle/operators/cross_entropy_op.h
浏览文件 @
49aa2c04
...
@@ -39,10 +39,13 @@ T tolerable_value(T x) {
...
@@ -39,10 +39,13 @@ T tolerable_value(T x) {
return
x
;
return
x
;
}
}
template
<
typename
Place
,
typename
T
>
template
<
typename
T
>
class
OnehotCrossEntropyOpKernel
:
public
framework
::
OpKernel
{
class
OnehotCrossEntropyOpKernel
:
public
framework
::
OpKernel
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
T
*
Xdata
=
X
->
data
<
T
>
();
const
T
*
Xdata
=
X
->
data
<
T
>
();
const
int
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"label"
)
->
data
<
int
>
();
const
int
*
label_data
=
ctx
.
Input
<
Tensor
>
(
"label"
)
->
data
<
int
>
();
...
@@ -62,10 +65,13 @@ class OnehotCrossEntropyOpKernel : public framework::OpKernel {
...
@@ -62,10 +65,13 @@ class OnehotCrossEntropyOpKernel : public framework::OpKernel {
}
}
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
T
>
class
OnehotCrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
{
class
OnehotCrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
dX
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
dX
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
dY
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
dY
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
...
@@ -79,6 +85,7 @@ class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel {
...
@@ -79,6 +85,7 @@ class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel {
const
int
batch_size
=
X
->
dims
()[
0
];
const
int
batch_size
=
X
->
dims
()[
0
];
const
int
class_num
=
X
->
dims
()[
1
];
const
int
class_num
=
X
->
dims
()[
1
];
memset
(
dXdata
,
0
,
sizeof
(
T
)
*
batch_size
*
class_num
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
int
index
=
i
*
class_num
+
label_data
[
i
];
int
index
=
i
*
class_num
+
label_data
[
i
];
dXdata
[
index
]
=
-
tolerable_value
(
dYdata
[
i
]
/
Xdata
[
index
]);
dXdata
[
index
]
=
-
tolerable_value
(
dYdata
[
i
]
/
Xdata
[
index
]);
...
...
python/paddle/v2/framework/tests/test_cross_entropy_op.py
浏览文件 @
49aa2c04
...
@@ -22,7 +22,7 @@ class TestCrossEntropy(unittest.TestCase):
...
@@ -22,7 +22,7 @@ class TestCrossEntropy(unittest.TestCase):
class
CrossEntropyGradOpTest
(
GradientChecker
):
class
CrossEntropyGradOpTest
(
GradientChecker
):
def
test_
softmax
_grad
(
self
):
def
test_
check
_grad
(
self
):
op
=
create_op
(
"onehot_cross_entropy"
)
op
=
create_op
(
"onehot_cross_entropy"
)
batch_size
=
100
batch_size
=
100
class_num
=
10
class_num
=
10
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录