Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7cc5ae99
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7cc5ae99
编写于
9月 28, 2017
作者:
C
Cao Ying
提交者:
GitHub
9月 28, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4492 from QiJune/refine_some_functors
Pass DeviceContext to functors not ExecutionContext.
上级
9fbf94b6
b611a479
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
91 addition
and
72 deletion
+91
-72
paddle/framework/operator.cc
paddle/framework/operator.cc
+2
-2
paddle/framework/operator.h
paddle/framework/operator.h
+2
-17
paddle/operators/cross_entropy_op.cu
paddle/operators/cross_entropy_op.cu
+2
-14
paddle/operators/cross_entropy_op.h
paddle/operators/cross_entropy_op.h
+3
-3
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+2
-1
paddle/operators/math/cross_entropy.cc
paddle/operators/math/cross_entropy.cc
+3
-3
paddle/operators/math/cross_entropy.cu
paddle/operators/math/cross_entropy.cu
+9
-11
paddle/operators/math/cross_entropy.h
paddle/operators/math/cross_entropy.h
+1
-3
paddle/operators/math/math_function.h
paddle/operators/math/math_function.h
+8
-0
paddle/operators/math/math_function_test.cc
paddle/operators/math/math_function_test.cc
+21
-0
paddle/operators/math/softmax.h
paddle/operators/math/softmax.h
+3
-3
paddle/operators/softmax_op.h
paddle/operators/softmax_op.h
+1
-1
paddle/operators/softmax_with_cross_entropy_op.cu
paddle/operators/softmax_with_cross_entropy_op.cu
+4
-2
paddle/operators/softmax_with_cross_entropy_op.h
paddle/operators/softmax_with_cross_entropy_op.h
+4
-2
paddle/platform/device_context.cc
paddle/platform/device_context.cc
+8
-7
paddle/platform/device_context.h
paddle/platform/device_context.h
+17
-2
paddle/platform/device_context_test.cc
paddle/platform/device_context_test.cc
+1
-1
未找到文件。
paddle/framework/operator.cc
浏览文件 @
7cc5ae99
...
@@ -22,14 +22,14 @@ namespace framework {
...
@@ -22,14 +22,14 @@ namespace framework {
template
<
>
template
<
>
Eigen
::
DefaultDevice
&
ExecutionContext
::
GetEigenDevice
<
Eigen
::
DefaultDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
CPUPlace
,
Eigen
::
DefaultDevice
>
()
const
{
platform
::
CPUPlace
,
Eigen
::
DefaultDevice
>
()
const
{
return
*
device_context_
.
get_eigen_device
<
Eigen
::
DefaultDevi
ce
>
();
return
*
device_context_
.
GetEigenDevice
<
platform
::
CPUPla
ce
>
();
}
}
#ifndef PADDLE_ONLY_CPU
#ifndef PADDLE_ONLY_CPU
template
<
>
template
<
>
Eigen
::
GpuDevice
&
Eigen
::
GpuDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
return
*
device_context_
.
get_eigen_device
<
Eigen
::
GpuDevi
ce
>
();
return
*
device_context_
.
GetEigenDevice
<
platform
::
GPUPla
ce
>
();
}
}
#endif
#endif
...
...
paddle/framework/operator.h
浏览文件 @
7cc5ae99
...
@@ -296,21 +296,6 @@ template <>
...
@@ -296,21 +296,6 @@ template <>
std
::
vector
<
Tensor
*>
InferShapeContext
::
MultiOutput
<
Tensor
>
(
std
::
vector
<
Tensor
*>
InferShapeContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
;
template
<
typename
T
>
struct
EigenDeviceConverter
;
template
<>
struct
EigenDeviceConverter
<
platform
::
CPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
DefaultDevice
;
};
#ifndef PADDLE_ONLY_CPU
template
<>
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
};
#endif
class
ExecutionContext
:
public
InferShapeContext
{
class
ExecutionContext
:
public
InferShapeContext
{
public:
public:
ExecutionContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
ExecutionContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
...
@@ -318,8 +303,8 @@ class ExecutionContext : public InferShapeContext {
...
@@ -318,8 +303,8 @@ class ExecutionContext : public InferShapeContext {
:
InferShapeContext
(
op
,
scope
),
device_context_
(
device_context
)
{}
:
InferShapeContext
(
op
,
scope
),
device_context_
(
device_context
)
{}
template
<
typename
PlaceType
,
template
<
typename
PlaceType
,
typename
DeviceType
=
typename
DeviceType
=
typename
platform
::
EigenDeviceConverter
<
typename
EigenDeviceConverter
<
PlaceType
>::
EigenDeviceType
>
PlaceType
>::
EigenDeviceType
>
DeviceType
&
GetEigenDevice
()
const
;
DeviceType
&
GetEigenDevice
()
const
;
platform
::
Place
GetPlace
()
const
{
return
device_context_
.
GetPlace
();
}
platform
::
Place
GetPlace
()
const
{
return
device_context_
.
GetPlace
();
}
...
...
paddle/operators/cross_entropy_op.cu
浏览文件 @
7cc5ae99
...
@@ -18,14 +18,6 @@ namespace paddle {
...
@@ -18,14 +18,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
{
namespace
{
// TODO(qingqing): make zero setting a common function.
template
<
typename
T
>
__global__
void
Zero
(
T
*
X
,
const
int
N
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
N
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
X
[
i
]
=
0.0
;
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
CrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
__global__
void
CrossEntropyGradientKernel
(
T
*
dX
,
const
T
*
dY
,
const
T
*
X
,
...
@@ -64,7 +56,7 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -64,7 +56,7 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
CrossEntropyFunctor
<
platform
::
GPUPlace
,
T
>
()(
math
::
CrossEntropyFunctor
<
platform
::
GPUPlace
,
T
>
()(
ctx
,
y
,
x
,
label
,
ctx
.
Attr
<
bool
>
(
"softLabel"
));
ctx
.
device_context
()
,
y
,
x
,
label
,
ctx
.
Attr
<
bool
>
(
"softLabel"
));
}
}
};
};
...
@@ -99,11 +91,7 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -99,11 +91,7 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel<T> {
.
stream
()
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
.
stream
()
>>>
(
dx_data
,
dy_data
,
x_data
,
label_data
,
batch_size
,
class_num
);
batch_size
,
class_num
);
}
else
{
}
else
{
Zero
<
T
><<<
grid
,
block
,
0
,
math
::
SetConstant
<
platform
::
GPUPlace
,
T
>
(
ctx
.
device_context
(),
dx
,
0
);
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
.
device_context
())
.
stream
()
>>>
(
dx_data
,
batch_size
*
class_num
);
auto
*
label_data
=
label
->
data
<
int
>
();
auto
*
label_data
=
label
->
data
<
int
>
();
grid
=
(
batch_size
+
block
-
1
)
/
block
;
grid
=
(
batch_size
+
block
-
1
)
/
block
;
CrossEntropyGradientKernel
<
T
><<<
CrossEntropyGradientKernel
<
T
><<<
...
...
paddle/operators/cross_entropy_op.h
浏览文件 @
7cc5ae99
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/framework/eigen.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/cross_entropy.h"
#include "paddle/operators/math/cross_entropy.h"
#include "paddle/operators/math/math_function.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -37,7 +38,7 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
...
@@ -37,7 +38,7 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
CrossEntropyFunctor
<
platform
::
CPUPlace
,
T
>
()(
math
::
CrossEntropyFunctor
<
platform
::
CPUPlace
,
T
>
()(
ctx
,
y
,
x
,
labels
,
ctx
.
Attr
<
bool
>
(
"softLabel"
));
ctx
.
device_context
()
,
y
,
x
,
labels
,
ctx
.
Attr
<
bool
>
(
"softLabel"
));
}
}
};
};
...
@@ -69,8 +70,7 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
...
@@ -69,8 +70,7 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
int
*
label_data
=
label
->
data
<
int
>
();
const
int
*
label_data
=
label
->
data
<
int
>
();
// TODO(qingqing): make zero setting a common function.
math
::
SetConstant
<
platform
::
CPUPlace
,
T
>
(
ctx
.
device_context
(),
dx
,
0
);
memset
(
dx_data
,
0
,
sizeof
(
T
)
*
batch_size
*
class_num
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
PADDLE_ASSERT
(
label_data
[
i
]
>=
0
||
label_data
[
i
]
<
class_num
);
PADDLE_ASSERT
(
label_data
[
i
]
>=
0
||
label_data
[
i
]
<
class_num
);
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
7cc5ae99
if
(
WITH_GPU
)
if
(
WITH_GPU
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc
im2col.cu DEPS cblas device_context operator
)
im2col.cu DEPS cblas device_context operator
)
nv_test
(
math_function_test SRCS math_function_test.cc DEPS math_function tensor
)
nv_library
(
softmax_function SRCS softmax.cc softmax.cu
nv_library
(
softmax_function SRCS softmax.cc softmax.cu
DEPS operator
)
DEPS operator
)
nv_library
(
cross_entropy_function SRCS cross_entropy.cc cross_entropy.cu
nv_library
(
cross_entropy_function SRCS cross_entropy.cc cross_entropy.cu
...
@@ -8,9 +9,9 @@ if(WITH_GPU)
...
@@ -8,9 +9,9 @@ if(WITH_GPU)
else
()
else
()
cc_library
(
math_function SRCS math_function.cc im2col.cc
cc_library
(
math_function SRCS math_function.cc im2col.cc
DEPS cblas device_context operator
)
DEPS cblas device_context operator
)
cc_test
(
math_function_test SRCS math_function_test.cc DEPS math_function tensor
)
cc_library
(
softmax_function SRCS softmax.cc DEPS operator
)
cc_library
(
softmax_function SRCS softmax.cc DEPS operator
)
cc_library
(
cross_entropy_function SRCS cross_entropy.cc DEPS operator
)
cc_library
(
cross_entropy_function SRCS cross_entropy.cc DEPS operator
)
endif
()
endif
()
nv_test
(
math_function_test SRCS math_function_test.cc DEPS math_function tensor
)
cc_test
(
im2col_test SRCS im2col_test.cc DEPS math_function tensor
)
cc_test
(
im2col_test SRCS im2col_test.cc DEPS math_function tensor
)
paddle/operators/math/cross_entropy.cc
浏览文件 @
7cc5ae99
...
@@ -26,8 +26,8 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
...
@@ -26,8 +26,8 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template
<
typename
T
>
template
<
typename
T
>
class
CrossEntropyFunctor
<
platform
::
CPUPlace
,
T
>
{
class
CrossEntropyFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
out
,
const
framework
::
Tensor
*
prob
,
const
framework
::
Tensor
*
prob
,
const
framework
::
Tensor
*
labels
,
const
bool
softLabel
)
{
const
framework
::
Tensor
*
labels
,
const
bool
softLabel
)
{
const
int
batch_size
=
prob
->
dims
()[
0
];
const
int
batch_size
=
prob
->
dims
()[
0
];
if
(
softLabel
)
{
if
(
softLabel
)
{
...
@@ -35,7 +35,7 @@ class CrossEntropyFunctor<platform::CPUPlace, T> {
...
@@ -35,7 +35,7 @@ class CrossEntropyFunctor<platform::CPUPlace, T> {
auto
lbl
=
EigenMatrix
<
T
>::
From
(
*
labels
);
auto
lbl
=
EigenMatrix
<
T
>::
From
(
*
labels
);
auto
loss
=
EigenMatrix
<
T
>::
From
(
*
out
);
auto
loss
=
EigenMatrix
<
T
>::
From
(
*
out
);
loss
.
device
(
ctx
.
GetEigenDevice
<
platform
::
CPUPlace
>
())
=
loss
.
device
(
*
ctx
.
GetEigenDevice
<
platform
::
CPUPlace
>
())
=
-
((
lbl
*
in
.
log
().
unaryExpr
(
math
::
TolerableValue
<
T
>
()))
-
((
lbl
*
in
.
log
().
unaryExpr
(
math
::
TolerableValue
<
T
>
()))
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
1
))
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
1
))
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
batch_size
,
1
)));
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
batch_size
,
1
)));
...
...
paddle/operators/math/cross_entropy.cu
浏览文件 @
7cc5ae99
...
@@ -74,8 +74,8 @@ using Tensor = framework::Tensor;
...
@@ -74,8 +74,8 @@ using Tensor = framework::Tensor;
template
<
typename
T
>
template
<
typename
T
>
class
CrossEntropyFunctor
<
platform
::
GPUPlace
,
T
>
{
class
CrossEntropyFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
public:
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
out
,
const
framework
::
Tensor
*
prob
,
const
framework
::
Tensor
*
prob
,
const
framework
::
Tensor
*
labels
,
bool
softLabel
)
{
const
framework
::
Tensor
*
labels
,
bool
softLabel
)
{
const
T
*
prob_data
=
prob
->
data
<
T
>
();
const
T
*
prob_data
=
prob
->
data
<
T
>
();
T
*
loss_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
loss_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
@@ -87,20 +87,18 @@ class CrossEntropyFunctor<platform::GPUPlace, T> {
...
@@ -87,20 +87,18 @@ class CrossEntropyFunctor<platform::GPUPlace, T> {
const
T
*
label_data
=
labels
->
data
<
T
>
();
const
T
*
label_data
=
labels
->
data
<
T
>
();
int
block
=
class_num
>
512
?
512
:
pow
(
2
,
int
(
std
::
log2
(
class_num
)));
int
block
=
class_num
>
512
?
512
:
pow
(
2
,
int
(
std
::
log2
(
class_num
)));
SoftCrossEntropyKernel
<
SoftCrossEntropyKernel
<
T
><<<
T
><<<
batch_size
,
block
,
block
*
sizeof
(
T
),
batch_size
,
block
,
block
*
sizeof
(
T
),
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
()
>>>
(
ctx
.
device_context
())
loss_data
,
prob_data
,
label_data
,
class_num
);
.
stream
()
>>>
(
loss_data
,
prob_data
,
label_data
,
class_num
);
}
else
{
}
else
{
const
int
*
label_data
=
labels
->
data
<
int
>
();
const
int
*
label_data
=
labels
->
data
<
int
>
();
int
block
=
512
;
int
block
=
512
;
int
grid
=
(
batch_size
+
block
-
1
)
/
block
;
int
grid
=
(
batch_size
+
block
-
1
)
/
block
;
CrossEntropyKernel
<
T
><<<
CrossEntropyKernel
<
T
><<<
grid
,
block
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
grid
,
block
,
0
,
ctx
.
device_context
())
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
()
>>>
(
.
stream
()
>>>
(
loss_data
,
prob_data
,
label_data
,
loss_data
,
prob_data
,
label_data
,
batch_size
,
class_num
);
batch_size
,
class_num
);
}
}
}
}
};
};
...
...
paddle/operators/math/cross_entropy.h
浏览文件 @
7cc5ae99
...
@@ -37,9 +37,7 @@ struct TolerableValue {
...
@@ -37,9 +37,7 @@ struct TolerableValue {
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
CrossEntropyFunctor
{
class
CrossEntropyFunctor
{
public:
public:
// (TODO caoying) it is much better to use DeviceContext as the first
void
operator
()(
const
platform
::
DeviceContext
&
context
,
// parameter.
void
operator
()(
const
framework
::
ExecutionContext
&
context
,
framework
::
Tensor
*
out
,
const
framework
::
Tensor
*
prob
,
framework
::
Tensor
*
out
,
const
framework
::
Tensor
*
prob
,
const
framework
::
Tensor
*
labels
,
const
bool
softLabel
);
const
framework
::
Tensor
*
labels
,
const
bool
softLabel
);
};
};
...
...
paddle/operators/math/math_function.h
浏览文件 @
7cc5ae99
...
@@ -52,6 +52,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,
...
@@ -52,6 +52,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,
#include <cmath>
#include <cmath>
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/enforce.h"
...
@@ -84,6 +85,13 @@ void matmul(const platform::DeviceContext& context,
...
@@ -84,6 +85,13 @@ void matmul(const platform::DeviceContext& context,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
T
alpha
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
T
alpha
,
framework
::
Tensor
*
matrix_out
,
T
beta
);
framework
::
Tensor
*
matrix_out
,
T
beta
);
template
<
typename
Place
,
typename
T
>
void
SetConstant
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
T
num
)
{
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
tensor
);
t
.
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
t
.
constant
(
static_cast
<
T
>
(
num
));
}
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/operators/math/math_function_test.cc
浏览文件 @
7cc5ae99
...
@@ -243,3 +243,24 @@ TEST(math_function, gemm_trans_clbas) {
...
@@ -243,3 +243,24 @@ TEST(math_function, gemm_trans_clbas) {
EXPECT_EQ
(
input3_ptr
[
6
],
86
);
EXPECT_EQ
(
input3_ptr
[
6
],
86
);
EXPECT_EQ
(
input3_ptr
[
7
],
99
);
EXPECT_EQ
(
input3_ptr
[
7
],
99
);
}
}
TEST
(
math_function
,
zero
)
{
paddle
::
framework
::
Tensor
tensor
;
auto
*
cpu_place
=
new
paddle
::
platform
::
CPUPlace
();
float
*
t
=
tensor
.
mutable_data
<
float
>
({
2
,
2
},
*
cpu_place
);
paddle
::
platform
::
CPUDeviceContext
context
(
*
cpu_place
);
paddle
::
operators
::
math
::
SetConstant
<
paddle
::
platform
::
CPUPlace
,
float
>
(
context
,
&
tensor
,
0
);
EXPECT_EQ
(
t
[
0
],
0
);
EXPECT_EQ
(
t
[
1
],
0
);
EXPECT_EQ
(
t
[
2
],
0
);
EXPECT_EQ
(
t
[
3
],
0
);
paddle
::
operators
::
math
::
SetConstant
<
paddle
::
platform
::
CPUPlace
,
float
>
(
context
,
&
tensor
,
1
);
EXPECT_EQ
(
t
[
0
],
1
);
EXPECT_EQ
(
t
[
1
],
1
);
EXPECT_EQ
(
t
[
2
],
1
);
EXPECT_EQ
(
t
[
3
],
1
);
}
paddle/operators/math/softmax.h
浏览文件 @
7cc5ae99
...
@@ -36,7 +36,7 @@ struct ValueClip {
...
@@ -36,7 +36,7 @@ struct ValueClip {
template
<
typename
Place
,
typename
T
>
template
<
typename
Place
,
typename
T
>
class
SoftmaxFunctor
{
class
SoftmaxFunctor
{
public:
public:
void
operator
()(
const
framework
::
Execution
Context
&
context
,
void
operator
()(
const
platform
::
Device
Context
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
...
@@ -58,8 +58,8 @@ class SoftmaxFunctor {
...
@@ -58,8 +58,8 @@ class SoftmaxFunctor {
.
broadcast
(
one_by_class
))
.
broadcast
(
one_by_class
))
.
unaryExpr
(
ValueClip
<
T
>
());
.
unaryExpr
(
ValueClip
<
T
>
());
softmax
.
device
(
context
.
GetEigenDevice
<
Place
>
())
=
shifted_logits
.
exp
();
softmax
.
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
shifted_logits
.
exp
();
softmax
.
device
(
context
.
GetEigenDevice
<
Place
>
())
=
softmax
.
device
(
*
context
.
GetEigenDevice
<
Place
>
())
=
(
softmax
*
(
softmax
*
softmax
.
sum
(
along_class
)
softmax
.
sum
(
along_class
)
.
inverse
()
.
inverse
()
...
...
paddle/operators/softmax_op.h
浏览文件 @
7cc5ae99
...
@@ -35,7 +35,7 @@ class SoftmaxKernel : public framework::OpKernel<T> {
...
@@ -35,7 +35,7 @@ class SoftmaxKernel : public framework::OpKernel<T> {
// allocate memory on device.
// allocate memory on device.
Y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
Y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SoftmaxFunctor
<
Place
,
T
>
()(
context
,
X
,
Y
);
math
::
SoftmaxFunctor
<
Place
,
T
>
()(
context
.
device_context
()
,
X
,
Y
);
}
}
};
};
...
...
paddle/operators/softmax_with_cross_entropy_op.cu
浏览文件 @
7cc5ae99
...
@@ -66,9 +66,11 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
...
@@ -66,9 +66,11 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
softmax
->
mutable_data
<
T
>
(
context
.
GetPlace
());
softmax
->
mutable_data
<
T
>
(
context
.
GetPlace
());
loss
->
mutable_data
<
T
>
(
context
.
GetPlace
());
loss
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SoftmaxFunctor
<
platform
::
GPUPlace
,
T
>
()(
context
,
logits
,
softmax
);
math
::
SoftmaxFunctor
<
platform
::
GPUPlace
,
T
>
()(
context
.
device_context
(),
logits
,
softmax
);
math
::
CrossEntropyFunctor
<
platform
::
GPUPlace
,
T
>
()(
math
::
CrossEntropyFunctor
<
platform
::
GPUPlace
,
T
>
()(
context
,
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"softLabel"
));
context
.
device_context
(),
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"softLabel"
));
}
}
};
};
...
...
paddle/operators/softmax_with_cross_entropy_op.h
浏览文件 @
7cc5ae99
...
@@ -40,9 +40,11 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
...
@@ -40,9 +40,11 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
softmax
->
mutable_data
<
T
>
(
context
.
GetPlace
());
softmax
->
mutable_data
<
T
>
(
context
.
GetPlace
());
loss
->
mutable_data
<
T
>
(
context
.
GetPlace
());
loss
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SoftmaxFunctor
<
platform
::
CPUPlace
,
T
>
()(
context
,
logits
,
softmax
);
math
::
SoftmaxFunctor
<
platform
::
CPUPlace
,
T
>
()(
context
.
device_context
(),
logits
,
softmax
);
math
::
CrossEntropyFunctor
<
platform
::
CPUPlace
,
T
>
()(
math
::
CrossEntropyFunctor
<
platform
::
CPUPlace
,
T
>
()(
context
,
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"softLabel"
));
context
.
device_context
(),
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"softLabel"
));
}
}
};
};
...
...
paddle/platform/device_context.cc
浏览文件 @
7cc5ae99
...
@@ -16,8 +16,8 @@ namespace paddle {
...
@@ -16,8 +16,8 @@ namespace paddle {
namespace
platform
{
namespace
platform
{
template
<
>
template
<
>
Eigen
::
DefaultDevice
*
DeviceContext
::
get_eigen_device
<
Eigen
::
DefaultDevice
>
()
Eigen
::
DefaultDevice
*
DeviceContext
::
GetEigenDevice
<
const
{
platform
::
CPUPlace
,
Eigen
::
DefaultDevice
>
()
const
{
return
reinterpret_cast
<
const
CPUDeviceContext
*>
(
this
)
->
eigen_device
();
return
reinterpret_cast
<
const
CPUDeviceContext
*>
(
this
)
->
eigen_device
();
}
}
...
@@ -37,6 +37,12 @@ Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
...
@@ -37,6 +37,12 @@ Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
#ifndef PADDLE_ONLY_CPU
#ifndef PADDLE_ONLY_CPU
template
<
>
Eigen
::
GpuDevice
*
DeviceContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
return
reinterpret_cast
<
const
CUDADeviceContext
*>
(
this
)
->
eigen_device
();
}
class
EigenCudaStreamDevice
:
public
Eigen
::
StreamInterface
{
class
EigenCudaStreamDevice
:
public
Eigen
::
StreamInterface
{
public:
public:
EigenCudaStreamDevice
()
:
scratch_
(
nullptr
),
semaphore_
(
nullptr
)
{
EigenCudaStreamDevice
()
:
scratch_
(
nullptr
),
semaphore_
(
nullptr
)
{
...
@@ -90,11 +96,6 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
...
@@ -90,11 +96,6 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
mutable
unsigned
int
*
semaphore_
;
mutable
unsigned
int
*
semaphore_
;
};
};
template
<
>
Eigen
::
GpuDevice
*
DeviceContext
::
get_eigen_device
<
Eigen
::
GpuDevice
>
()
const
{
return
reinterpret_cast
<
const
CUDADeviceContext
*>
(
this
)
->
eigen_device
();
}
CUDADeviceContext
::
CUDADeviceContext
(
GPUPlace
place
)
:
place_
(
place
)
{
CUDADeviceContext
::
CUDADeviceContext
(
GPUPlace
place
)
:
place_
(
place
)
{
SetDeviceId
(
place_
.
device
);
SetDeviceId
(
place_
.
device
);
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
...
...
paddle/platform/device_context.h
浏览文件 @
7cc5ae99
...
@@ -27,13 +27,23 @@ limitations under the License. */
...
@@ -27,13 +27,23 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
template
<
typename
T
>
struct
EigenDeviceConverter
;
template
<
>
struct
EigenDeviceConverter
<
platform
::
CPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
DefaultDevice
;
};
class
DeviceContext
{
class
DeviceContext
{
public:
public:
virtual
~
DeviceContext
()
{}
virtual
~
DeviceContext
()
{}
virtual
Place
GetPlace
()
const
=
0
;
virtual
Place
GetPlace
()
const
=
0
;
template
<
typename
DeviceType
>
template
<
typename
PlaceType
,
DeviceType
*
get_eigen_device
()
const
;
typename
DeviceType
=
typename
EigenDeviceConverter
<
PlaceType
>
::
EigenDeviceType
>
DeviceType
*
GetEigenDevice
()
const
;
virtual
void
Wait
()
const
{}
virtual
void
Wait
()
const
{}
};
};
...
@@ -52,6 +62,11 @@ class CPUDeviceContext : public DeviceContext {
...
@@ -52,6 +62,11 @@ class CPUDeviceContext : public DeviceContext {
};
};
#ifndef PADDLE_ONLY_CPU
#ifndef PADDLE_ONLY_CPU
template
<
>
struct
EigenDeviceConverter
<
platform
::
GPUPlace
>
{
using
EigenDeviceType
=
Eigen
::
GpuDevice
;
};
class
EigenCudaStreamDevice
;
class
EigenCudaStreamDevice
;
class
CUDADeviceContext
:
public
DeviceContext
{
class
CUDADeviceContext
:
public
DeviceContext
{
...
...
paddle/platform/device_context_test.cc
浏览文件 @
7cc5ae99
...
@@ -24,7 +24,7 @@ TEST(Device, Init) {
...
@@ -24,7 +24,7 @@ TEST(Device, Init) {
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
DeviceContext
*
device_context
=
new
CUDADeviceContext
(
GPUPlace
(
i
));
DeviceContext
*
device_context
=
new
CUDADeviceContext
(
GPUPlace
(
i
));
Eigen
::
GpuDevice
*
gpu_device
=
Eigen
::
GpuDevice
*
gpu_device
=
device_context
->
template
get_eigen_device
<
Eigen
::
GpuDevi
ce
>();
device_context
->
template
GetEigenDevice
<
GPUPla
ce
>();
ASSERT_NE
(
nullptr
,
gpu_device
);
ASSERT_NE
(
nullptr
,
gpu_device
);
delete
device_context
;
delete
device_context
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录