Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8a0c7e2e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8a0c7e2e
编写于
5月 03, 2018
作者:
Y
Yu Yang
提交者:
GitHub
5月 03, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #10280 from reyoung/feature/add_stable_test_of_cross_entropy
Clean cross entropy
上级
4a497b82
53c27682
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
89 addition
and
137 deletion
+89
-137
paddle/fluid/operators/cross_entropy_op.cc
paddle/fluid/operators/cross_entropy_op.cc
+6
-4
paddle/fluid/operators/cross_entropy_op.cu
paddle/fluid/operators/cross_entropy_op.cu
+6
-93
paddle/fluid/operators/cross_entropy_op.h
paddle/fluid/operators/cross_entropy_op.h
+77
-40
未找到文件。
paddle/fluid/operators/cross_entropy_op.cc
浏览文件 @
8a0c7e2e
...
@@ -164,11 +164,13 @@ or not. But the output only shares the LoD information with input X.
...
@@ -164,11 +164,13 @@ or not. But the output only shares the LoD information with input X.
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
using
CPUCtx
=
paddle
::
platform
::
CPUDeviceContext
;
REGISTER_OPERATOR
(
cross_entropy
,
ops
::
CrossEntropyOp
,
ops
::
CrossEntropyOpMaker
,
REGISTER_OPERATOR
(
cross_entropy
,
ops
::
CrossEntropyOp
,
ops
::
CrossEntropyOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OPERATOR
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
float
>
,
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
float
>
,
ops
::
CrossEntropyOpKernel
<
double
>
);
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy_grad
,
REGISTER_OP_CPU_KERNEL
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOpKernel
<
float
>
,
ops
::
CrossEntropyGradientOpKernel
<
CPUCtx
,
float
>
,
ops
::
CrossEntropyGradientOpKernel
<
double
>
);
ops
::
CrossEntropyGradientOpKernel
<
CPUCtx
,
double
>
);
paddle/fluid/operators/cross_entropy_op.cu
浏览文件 @
8a0c7e2e
...
@@ -14,98 +14,11 @@ limitations under the License. */
...
@@ -14,98 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/cross_entropy_op.h"
#include "paddle/fluid/operators/cross_entropy_op.h"
namespace
paddle
{
namespace
operators
{
namespace
{
template
<
typename
T
>
__global__
void
CrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
const
int64_t
*
label
,
const
int
N
,
const
int
D
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
idx
=
i
*
D
+
label
[
i
];
dX
[
idx
]
=
-
dY
[
i
]
/
X
[
idx
];
}
}
template
<
typename
T
>
__global__
void
SoftCrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
const
T
*
label
,
const
int
N
,
const
int
D
)
{
int
ids
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
ids
<
N
*
D
)
{
int
row_ids
=
ids
/
D
;
dX
[
ids
]
=
-
label
[
ids
]
*
dY
[
row_ids
]
/
X
[
ids
];
}
}
}
// namespace
template
<
typename
T
>
class
CrossEntropyOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
CrossEntropyFunctor
<
platform
::
CUDADeviceContext
,
T
>
()(
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
y
,
x
,
label
,
ctx
.
Attr
<
bool
>
(
"soft_label"
));
}
};
template
<
typename
T
>
class
CrossEntropyGradientOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
dy_data
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
))
->
data
<
T
>
();
T
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
x_data
=
x
->
data
<
T
>
();
int64_t
batch_size
=
x
->
dims
()[
0
];
int64_t
class_num
=
x
->
dims
()[
1
];
int
block
=
512
;
int
grid
=
(
batch_size
*
class_num
+
block
-
1
)
/
block
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
stream
=
dev_ctx
.
stream
();
if
(
ctx
.
Attr
<
bool
>
(
"soft_label"
))
{
auto
*
label_data
=
label
->
data
<
T
>
();
SoftCrossEntropyGradientKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
batch_size
,
class_num
);
}
else
{
math
::
SetConstant
<
platform
::
CUDADeviceContext
,
T
>
functor
;
functor
(
dev_ctx
,
dx
,
0
);
auto
*
label_data
=
label
->
data
<
int64_t
>
();
grid
=
(
batch_size
+
block
-
1
)
/
block
;
CrossEntropyGradientKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
batch_size
,
class_num
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpCUDAKernel
<
float
>
,
using
CUDACtx
=
paddle
::
platform
::
CUDADeviceContext
;
ops
::
CrossEntropyOpCUDAKernel
<
double
>
);
REGISTER_OP_CUDA_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
CUDACtx
,
float
>
,
ops
::
CrossEntropyOpKernel
<
CUDACtx
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
cross_entropy_grad
,
REGISTER_OP_CUDA_KERNEL
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
CUDAKernel
<
float
>
,
ops
::
CrossEntropyGradientOp
Kernel
<
CUDACtx
,
float
>
,
ops
::
CrossEntropyGradientOp
CUDAKernel
<
double
>
);
ops
::
CrossEntropyGradientOp
Kernel
<
CUDACtx
,
double
>
);
paddle/fluid/operators/cross_entropy_op.h
浏览文件 @
8a0c7e2e
...
@@ -17,69 +17,106 @@ limitations under the License. */
...
@@ -17,69 +17,106 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
CrossEntropyOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CrossEntropyOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
"This kernel only runs on CPU."
);
auto
*
labels
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
const
Tensor
*
labels
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
CrossEntropyFunctor
<
platform
::
CPU
DeviceContext
,
T
>
()(
math
::
CrossEntropyFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
platform
::
CPU
DeviceContext
>(),
y
,
x
,
labels
,
ctx
.
template
device_context
<
DeviceContext
>(),
y
,
x
,
labels
,
ctx
.
Attr
<
bool
>
(
"soft_label"
));
ctx
.
Attr
<
bool
>
(
"soft_label"
));
}
}
};
};
template
<
typename
T
>
template
<
typename
T
>
class
XeSoftlabelGradFunctor
{
public:
XeSoftlabelGradFunctor
(
T
*
dx
,
const
T
*
dy
,
// NOLINT
const
T
*
x
,
// NOLINT
const
T
*
label
,
// NOLINT
size_t
num_classes
)
:
dx_
(
dx
),
dy_
(
dy
),
x_
(
x
),
label_
(
label
),
num_classes_
(
num_classes
)
{}
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_ids
=
i
/
num_classes_
;
dx_
[
i
]
=
-
label_
[
i
]
*
dy_
[
row_ids
]
/
x_
[
i
];
}
private:
T
*
dx_
;
const
T
*
dy_
;
const
T
*
x_
;
const
T
*
label_
;
size_t
num_classes_
;
};
template
<
typename
T
>
class
XeGradFunctor
{
public:
XeGradFunctor
(
T
*
dx
,
const
T
*
dy
,
// NOLINT
const
T
*
x
,
// NOLINT
const
int64_t
*
label
,
// NOLINT
size_t
num_classes
)
:
dx_
(
dx
),
dy_
(
dy
),
x_
(
x
),
label_
(
label
),
num_classes_
(
num_classes
)
{}
HOSTDEVICE
void
operator
()(
size_t
sample_id
)
{
auto
x_is_true_offset
=
sample_id
*
num_classes_
+
label_
[
sample_id
];
for
(
size_t
x_offset
=
sample_id
*
num_classes_
;
x_offset
<
(
sample_id
+
1
)
*
num_classes_
;
++
x_offset
)
{
dx_
[
x_offset
]
=
x_offset
!=
x_is_true_offset
?
static_cast
<
T
>
(
0
)
:
-
dy_
[
sample_id
]
/
x_
[
x_offset
];
}
}
private:
T
*
dx_
;
const
T
*
dy_
;
const
T
*
x_
;
const
int64_t
*
label_
;
size_t
num_classes_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
CrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
"This kernel only runs on CPU."
);
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
const
Tensor
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
const
Tensor
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
Tensor
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
T
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
class_num
=
x
->
dims
()[
1
];
int64_t
class_num
=
x
->
dims
()[
1
];
if
(
ctx
.
Attr
<
bool
>
(
"soft_label"
))
{
if
(
ctx
.
Attr
<
bool
>
(
"soft_label"
))
{
auto
x_mat
=
EigenMatrix
<
T
>::
From
(
*
x
);
XeSoftlabelGradFunctor
<
T
>
functor
(
dx_data
,
dy
->
data
<
T
>
(),
x
->
data
<
T
>
(),
auto
dy_mat
=
EigenMatrix
<
T
>::
From
(
*
dy
);
label
->
data
<
T
>
(),
auto
lbl_mat
=
EigenMatrix
<
T
>::
From
(
*
label
);
static_cast
<
size_t
>
(
class_num
));
auto
dx_mat
=
EigenMatrix
<
T
>::
From
(
*
dx
);
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
.
template
device_context
<
DeviceContext
>(),
dx_mat
.
device
(
*
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>()
static_cast
<
size_t
>
(
dx
->
numel
()));
.
eigen_device
())
=
for_range
(
functor
);
-
(
lbl_mat
*
dy_mat
.
broadcast
(
Eigen
::
DSizes
<
int64_t
,
2
>
(
1
,
class_num
))
/
x_mat
);
}
else
{
}
else
{
int64_t
batch_size
=
x
->
dims
()[
0
];
XeGradFunctor
<
T
>
functor
(
dx_data
,
dy
->
data
<
T
>
(),
x
->
data
<
T
>
(),
const
T
*
dy_data
=
dy
->
data
<
T
>
();
label
->
data
<
int64_t
>
(),
const
T
*
x_data
=
x
->
data
<
T
>
();
static_cast
<
size_t
>
(
class_num
));
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
.
template
device_context
<
DeviceContext
>(),
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
functor
;
static_cast
<
size_t
>
(
dy
->
numel
()));
functor
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
dx
,
0
);
for_range
(
functor
);
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
PADDLE_ASSERT
(
label_data
[
i
]
>=
0
||
label_data
[
i
]
<
class_num
);
int64_t
index
=
i
*
class_num
+
label_data
[
i
];
dx_data
[
index
]
=
math
::
TolerableValue
<
T
>
()(
-
dy_data
[
i
]
/
x_data
[
index
]);
}
}
}
}
}
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录