Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
76174ec0
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
76174ec0
编写于
4月 28, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Clean cross entropy and add sync in executor
上级
3948b58b
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
92 addition
and
192 deletion
+92
-192
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+3
-0
paddle/fluid/operators/cross_entropy_op.cc
paddle/fluid/operators/cross_entropy_op.cc
+6
-4
paddle/fluid/operators/cross_entropy_op.cu
paddle/fluid/operators/cross_entropy_op.cu
+6
-93
paddle/fluid/operators/cross_entropy_op.h
paddle/fluid/operators/cross_entropy_op.h
+77
-40
python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
+0
-55
未找到文件。
paddle/fluid/framework/executor.cc
浏览文件 @
76174ec0
...
...
@@ -348,6 +348,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
}
}
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
if
(
create_vars
&&
create_local_scope
)
{
scope
->
DeleteScope
(
local_scope
);
}
...
...
paddle/fluid/operators/cross_entropy_op.cc
浏览文件 @
76174ec0
...
...
@@ -164,11 +164,13 @@ or not. But the output only shares the LoD information with input X.
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
using
CPUCtx
=
paddle
::
platform
::
CPUDeviceContext
;
REGISTER_OPERATOR
(
cross_entropy
,
ops
::
CrossEntropyOp
,
ops
::
CrossEntropyOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
float
>
,
ops
::
CrossEntropyOpKernel
<
double
>
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
float
>
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOpKernel
<
float
>
,
ops
::
CrossEntropyGradientOpKernel
<
double
>
);
ops
::
CrossEntropyGradientOpKernel
<
CPUCtx
,
float
>
,
ops
::
CrossEntropyGradientOpKernel
<
CPUCtx
,
double
>
);
paddle/fluid/operators/cross_entropy_op.cu
浏览文件 @
76174ec0
...
...
@@ -14,98 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/cross_entropy_op.h"
namespace
paddle
{
namespace
operators
{
namespace
{
template
<
typename
T
>
__global__
void
CrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
const
int64_t
*
label
,
const
int
N
,
const
int
D
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
idx
=
i
*
D
+
label
[
i
];
dX
[
idx
]
=
-
dY
[
i
]
/
X
[
idx
];
}
}
template
<
typename
T
>
__global__
void
SoftCrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
const
T
*
label
,
const
int
N
,
const
int
D
)
{
int
ids
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
ids
<
N
*
D
)
{
int
row_ids
=
ids
/
D
;
dX
[
ids
]
=
-
label
[
ids
]
*
dY
[
row_ids
]
/
X
[
ids
];
}
}
}
// namespace
template
<
typename
T
>
class
CrossEntropyOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
CrossEntropyFunctor
<
platform
::
CUDADeviceContext
,
T
>
()(
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>(),
y
,
x
,
label
,
ctx
.
Attr
<
bool
>
(
"soft_label"
));
}
};
template
<
typename
T
>
class
CrossEntropyGradientOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
dy_data
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
))
->
data
<
T
>
();
T
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
x_data
=
x
->
data
<
T
>
();
int64_t
batch_size
=
x
->
dims
()[
0
];
int64_t
class_num
=
x
->
dims
()[
1
];
int
block
=
512
;
int
grid
=
(
batch_size
*
class_num
+
block
-
1
)
/
block
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
stream
=
dev_ctx
.
stream
();
if
(
ctx
.
Attr
<
bool
>
(
"soft_label"
))
{
auto
*
label_data
=
label
->
data
<
T
>
();
SoftCrossEntropyGradientKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
batch_size
,
class_num
);
}
else
{
math
::
SetConstant
<
platform
::
CUDADeviceContext
,
T
>
functor
;
functor
(
dev_ctx
,
dx
,
0
);
auto
*
label_data
=
label
->
data
<
int64_t
>
();
grid
=
(
batch_size
+
block
-
1
)
/
block
;
CrossEntropyGradientKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
batch_size
,
class_num
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpCUDAKernel
<
float
>
,
ops
::
CrossEntropyOpCUDAKernel
<
double
>
);
using
CUDACtx
=
paddle
::
platform
::
CUDADeviceContext
;
REGISTER_OP_CUDA_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
CUDACtx
,
float
>
,
ops
::
CrossEntropyOpKernel
<
CUDACtx
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
CUDAKernel
<
float
>
,
ops
::
CrossEntropyGradientOp
CUDAKernel
<
double
>
);
ops
::
CrossEntropyGradientOp
Kernel
<
CUDACtx
,
float
>
,
ops
::
CrossEntropyGradientOp
Kernel
<
CUDACtx
,
double
>
);
paddle/fluid/operators/cross_entropy_op.h
浏览文件 @
76174ec0
...
...
@@ -17,69 +17,106 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
CrossEntropyOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
labels
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
labels
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
CrossEntropyFunctor
<
platform
::
CPU
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
platform
::
CPU
DeviceContext
>(),
y
,
x
,
labels
,
math
::
CrossEntropyFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
y
,
x
,
labels
,
ctx
.
Attr
<
bool
>
(
"soft_label"
));
}
};
template
<
typename
T
>
class
XeSoftlabelGradFunctor
{
public:
XeSoftlabelGradFunctor
(
T
*
dx
,
const
T
*
dy
,
// NOLINT
const
T
*
x
,
// NOLINT
const
T
*
label
,
// NOLINT
size_t
num_classes
)
:
dx_
(
dx
),
dy_
(
dy
),
x_
(
x
),
label_
(
label
),
num_classes_
(
num_classes
)
{}
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_ids
=
i
/
num_classes_
;
dx_
[
i
]
=
-
label_
[
i
]
*
dy_
[
row_ids
]
/
x_
[
i
];
}
private:
T
*
dx_
;
const
T
*
dy_
;
const
T
*
x_
;
const
T
*
label_
;
size_t
num_classes_
;
};
template
<
typename
T
>
class
XeGradFunctor
{
public:
XeGradFunctor
(
T
*
dx
,
const
T
*
dy
,
// NOLINT
const
T
*
x
,
// NOLINT
const
int64_t
*
label
,
// NOLINT
size_t
num_classes
)
:
dx_
(
dx
),
dy_
(
dy
),
x_
(
x
),
label_
(
label
),
num_classes_
(
num_classes
)
{}
HOSTDEVICE
void
operator
()(
size_t
label_id
)
{
auto
x_is_true_offset
=
label_id
*
num_classes_
+
label_
[
label_id
];
for
(
size_t
x_offset
=
label_id
*
num_classes_
;
x_offset
<
(
label_id
+
1
)
*
num_classes_
;
++
x_offset
)
{
dx_
[
x_offset
]
=
x_offset
!=
x_is_true_offset
?
static_cast
<
T
>
(
0
)
:
-
dy_
[
label_id
]
/
x_
[
x_offset
];
}
}
private:
T
*
dx_
;
const
T
*
dy_
;
const
T
*
x_
;
const
int64_t
*
label_
;
size_t
num_classes_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
CrossEntropyGradientOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
const
Tensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
const
Tensor
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
Tensor
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
T
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
class_num
=
x
->
dims
()[
1
];
if
(
ctx
.
Attr
<
bool
>
(
"soft_label"
))
{
auto
x_mat
=
EigenMatrix
<
T
>::
From
(
*
x
);
auto
dy_mat
=
EigenMatrix
<
T
>::
From
(
*
dy
);
auto
lbl_mat
=
EigenMatrix
<
T
>::
From
(
*
label
);
auto
dx_mat
=
EigenMatrix
<
T
>::
From
(
*
dx
);
dx_mat
.
device
(
*
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>()
.
eigen_device
())
=
-
(
lbl_mat
*
dy_mat
.
broadcast
(
Eigen
::
DSizes
<
int64_t
,
2
>
(
1
,
class_num
))
/
x_mat
);
XeSoftlabelGradFunctor
<
T
>
functor
(
dx_data
,
dy
->
data
<
T
>
(),
x
->
data
<
T
>
(),
label
->
data
<
T
>
(),
static_cast
<
size_t
>
(
class_num
));
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
.
template
device_context
<
DeviceContext
>(),
static_cast
<
size_t
>
(
dx
->
numel
()));
for_range
(
functor
);
}
else
{
int64_t
batch_size
=
x
->
dims
()[
0
];
const
T
*
dy_data
=
dy
->
data
<
T
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
functor
;
functor
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
dx
,
0
);
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
PADDLE_ASSERT
(
label_data
[
i
]
>=
0
||
label_data
[
i
]
<
class_num
);
int64_t
index
=
i
*
class_num
+
label_data
[
i
];
dx_data
[
index
]
=
math
::
TolerableValue
<
T
>
()(
-
dy_data
[
i
]
/
x_data
[
index
]);
}
XeGradFunctor
<
T
>
functor
(
dx_data
,
dy
->
data
<
T
>
(),
x
->
data
<
T
>
(),
label
->
data
<
int64_t
>
(),
static_cast
<
size_t
>
(
class_num
));
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
.
template
device_context
<
DeviceContext
>(),
static_cast
<
size_t
>
(
dy
->
numel
()));
for_range
(
functor
);
}
}
};
...
...
python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
浏览文件 @
76174ec0
...
...
@@ -106,60 +106,5 @@ class TestCrossEntropyOp3(OpTest):
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyStable
(
unittest
.
TestCase
):
def
main
(
self
,
place
):
if
isinstance
(
place
,
fluid
.
CUDAPlace
)
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
class
DataRandom
(
object
):
def
__init__
(
self
):
self
.
random
=
np
.
random
.
RandomState
(
seed
=
1
)
def
next
(
self
):
return
{
'input'
:
self
.
random
.
uniform
(
low
=-
1
,
high
=
1
,
size
=
(
64
,
200
)).
astype
(
'float32'
),
'label'
:
self
.
random
.
uniform
(
low
=
0
,
high
=
10000
,
size
=
(
64
,
1
)).
astype
(
'int64'
),
}
losses
=
[]
for
_
in
xrange
(
2
):
startup
=
fluid
.
Program
()
startup
.
random_seed
=
1
main
=
fluid
.
Program
()
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main
,
startup
):
img
=
fluid
.
layers
.
data
(
'input'
,
shape
=
[
200
])
label
=
fluid
.
layers
.
data
(
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
prediction
=
fluid
.
layers
.
fc
(
input
=
img
,
size
=
10000
,
act
=
'softmax'
)
xe
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
xe
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
data
=
DataRandom
()
for
i
in
xrange
(
1000
):
exe
.
run
(
feed
=
next
(
data
))
losses
.
append
(
exe
.
run
(
feed
=
next
(
data
),
fetch_list
=
[
loss
])[
0
])
print
losses
self
.
assertAlmostEqual
(
losses
[
0
][
0
],
losses
[
1
][
0
])
def
test_cpu
(
self
):
self
.
main
(
fluid
.
CPUPlace
())
def
test_cuda
(
self
):
self
.
main
(
fluid
.
CUDAPlace
(
0
))
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录