Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
32d9beef
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
32d9beef
编写于
11月 23, 2021
作者:
Y
YuanRisheng
提交者:
GitHub
11月 23, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[PTen]Elementwise_div Kernel Refactor (#37418)
* elementwise_div refactor * fix compile bugs in windows ci
上级
c5ad3d06
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
326 addition
and
61 deletion
+326
-61
paddle/fluid/operators/elementwise/elementwise_div_op.cc
paddle/fluid/operators/elementwise/elementwise_div_op.cc
+0
-25
paddle/fluid/operators/elementwise/elementwise_div_op.cu
paddle/fluid/operators/elementwise/elementwise_div_op.cu
+0
-16
paddle/fluid/operators/elementwise/elementwise_div_op.h
paddle/fluid/operators/elementwise/elementwise_div_op.h
+13
-14
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+6
-0
paddle/pten/api/include/math.h
paddle/pten/api/include/math.h
+2
-0
paddle/pten/api/lib/math.cc
paddle/pten/api/lib/math.cc
+35
-0
paddle/pten/include/math.h
paddle/pten/include/math.h
+17
-4
paddle/pten/kernels/cpu/math.cc
paddle/pten/kernels/cpu/math.cc
+34
-0
paddle/pten/kernels/cpu/math.h
paddle/pten/kernels/cpu/math.h
+6
-0
paddle/pten/kernels/cuda/math.cu
paddle/pten/kernels/cuda/math.cu
+28
-0
paddle/pten/kernels/cuda/math.h
paddle/pten/kernels/cuda/math.h
+7
-0
paddle/pten/kernels/functions/blas/elementwise.h
paddle/pten/kernels/functions/blas/elementwise.h
+9
-0
paddle/pten/kernels/functions/general/elementwise_functor.h
paddle/pten/kernels/functions/general/elementwise_functor.h
+60
-0
paddle/pten/tests/api/test_elementwise_api.cc
paddle/pten/tests/api/test_elementwise_api.cc
+54
-0
paddle/pten/tests/kernels/test_elementwise_dev_api.cc
paddle/pten/tests/kernels/test_elementwise_dev_api.cc
+55
-2
未找到文件。
paddle/fluid/operators/elementwise/elementwise_div_op.cc
浏览文件 @
32d9beef
...
...
@@ -22,31 +22,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
struct
SameDimsElemwiseDiv
<
platform
::
CPUDeviceContext
,
T
,
typename
std
::
enable_if
<
std
::
is_floating_point
<
T
>::
value
>::
type
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
x
,
const
framework
::
Tensor
*
y
,
framework
::
Tensor
*
z
)
{
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
blas
.
VDIV
(
x
->
numel
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
z
->
data
<
T
>
());
}
};
// use default div function for int32/int64 type because of divison zero
// checking.
template
<
typename
T
>
struct
SameDimsElemwiseDiv
<
platform
::
CPUDeviceContext
,
T
,
typename
std
::
enable_if
<!
std
::
is_floating_point
<
T
>::
value
>::
type
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
x
,
const
framework
::
Tensor
*
y
,
framework
::
Tensor
*
z
)
{
default_elementwise_div
<
platform
::
CPUDeviceContext
,
T
>
(
ctx
,
x
,
y
,
z
);
}
};
class
ElementwiseDivOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Div"
;
}
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cu
浏览文件 @
32d9beef
...
...
@@ -23,22 +23,6 @@ namespace plat = paddle::platform;
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
ElementwiseDivKernel
<
platform
::
CUDADeviceContext
,
T
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
std
::
vector
<
const
framework
::
Tensor
*>
ins
;
std
::
vector
<
framework
::
Tensor
*>
outs
;
const
auto
&
cuda_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
int
axis
=
PackTensorsIntoVector
<
T
>
(
ctx
,
&
ins
,
&
outs
);
LaunchElementwiseCudaKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
cuda_ctx
,
ins
,
&
outs
,
axis
,
DivFunctor
<
T
>
());
}
};
template
<
typename
T
>
static
__global__
void
SimpleElemwiseDivGradCUDAKernel
(
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.h
浏览文件 @
32d9beef
...
...
@@ -23,6 +23,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/framework/pten_utils.h"
// only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/math.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -42,13 +48,6 @@ void default_elementwise_div(const framework::ExecutionContext& ctx,
}
}
template
<
typename
DeviceContext
,
typename
T
,
class
Enable
=
void
>
struct
SameDimsElemwiseDiv
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
x
,
const
framework
::
Tensor
*
y
,
framework
::
Tensor
*
z
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
ElementwiseDivKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -58,13 +57,13 @@ class ElementwiseDivKernel : public framework::OpKernel<T> {
auto
*
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dims_equal
=
x
->
dims
()
==
y
->
dims
();
i
f
(
dims_equal
)
{
SameDimsElemwiseDiv
<
DeviceContext
,
T
>
same_dims_div
;
same_dims_div
(
ctx
,
x
,
y
,
z
);
}
else
{
default_elementwise_div
<
DeviceContext
,
T
>
(
ctx
,
x
,
y
,
z
);
}
auto
&
dev_ctx
=
ctx
.
device_context
<
DeviceContext
>
();
i
nt
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
pt_x
=
paddle
::
experimental
::
MakePtenDenseTensor
(
*
x
)
;
auto
pt_y
=
paddle
::
experimental
::
MakePtenDenseTensor
(
*
y
);
auto
pt_z
=
paddle
::
experimental
::
MakePtenDenseTensor
(
*
z
);
pten
::
ElementwiseDiv
<
T
>
(
dev_ctx
,
*
pt_x
.
get
(),
*
pt_y
.
get
(),
axis
,
pt_z
.
get
());
}
};
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
32d9beef
...
...
@@ -154,6 +154,12 @@ class ElementwiseOp : public framework::OperatorWithKernel {
{
"axis"
},
{
"Out"
});
}
}
if
(
Type
()
==
"elementwise_div"
)
{
if
(
ctx
.
InputVar
(
"X"
)
->
IsType
<
framework
::
LoDTensor
>
())
{
return
framework
::
KernelSignature
(
"elementwise_div"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
}
}
return
framework
::
KernelSignature
(
"None"
,
{
"X"
},
{},
{
"Out"
});
}
};
...
...
paddle/pten/api/include/math.h
浏览文件 @
32d9beef
...
...
@@ -26,5 +26,7 @@ PD_DLL_DECL Tensor mean(const Tensor& x);
PD_DLL_DECL
Tensor
add
(
const
Tensor
&
x
,
const
Tensor
&
y
);
PD_DLL_DECL
Tensor
subtract
(
const
Tensor
&
x
,
const
Tensor
&
y
);
PD_DLL_DECL
Tensor
divide
(
const
Tensor
&
x
,
const
Tensor
&
y
);
}
// namespace experimental
}
// namespace paddle
paddle/pten/api/lib/math.cc
浏览文件 @
32d9beef
...
...
@@ -137,6 +137,41 @@ PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y) {
return
out
;
}
PD_DLL_DECL
Tensor
divide
(
const
Tensor
&
x
,
const
Tensor
&
y
)
{
// 1. Get kernel signature and kernel
auto
kernel_key_set
=
ParseKernelKeyByInputArgs
(
x
);
auto
kernel_key
=
kernel_key_set
.
GetHigestPriorityKernelKey
();
auto
kernel
=
pten
::
KernelFactory
::
Instance
().
SelectKernelOrThrowError
(
"elementwise_div"
,
kernel_key
);
// 2. Get Device Context
auto
*
dev_ctx
=
GetDeviceContextByBackend
(
kernel_key
.
backend
());
auto
kernel_context
=
pten
::
KernelContext
(
dev_ctx
);
// 3. Auto data transform
auto
dense_x
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
x
.
impl
());
kernel_context
.
EmplaceBackInput
(
dense_x
);
auto
dense_y
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
y
.
impl
());
kernel_context
.
EmplaceBackInput
(
dense_y
);
kernel_context
.
EmplaceBackAttr
(
-
1
);
// 4. InferShape
auto
out_meta
=
ElementwiseInferShape
(
dense_x
->
meta
(),
dense_y
->
meta
(),
-
1
);
// 5. Prepare outputs
Tensor
out
;
const
auto
allocator
=
std
::
make_shared
<
DefaultAllocator
>
(
pten
::
TransToFluidPlace
(
kernel_key
.
backend
()));
auto
dense_out
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
allocator
,
out_meta
);
kernel_context
.
EmplaceBackOutput
(
dense_out
);
out
.
set_impl
(
dense_out
);
// 6. Call kernel
kernel
(
&
kernel_context
);
return
out
;
}
}
// namespace experimental
}
// namespace paddle
...
...
paddle/pten/include/math.h
浏览文件 @
32d9beef
...
...
@@ -75,7 +75,7 @@ DenseTensor Scale(const ContextT& dev_ctx,
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Elementwise
Add
(
const
ContextT
&
dev_ctx
,
DenseTensor
Add
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
)
{
...
...
@@ -102,4 +102,17 @@ DenseTensor Subtract(const ContextT& dev_ctx,
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Divide
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
)
{
auto
out_meta
=
ElementwiseInferShape
(
x
.
meta
(),
y
.
meta
(),
axis
);
const
auto
allocator
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
dev_ctx
.
GetPlace
());
pten
::
DenseTensor
dense_out
(
allocator
,
out_meta
);
ElementwiseDiv
<
T
>
(
dev_ctx
,
x
,
y
,
axis
,
&
dense_out
);
return
dense_out
;
}
}
// namespace pten
paddle/pten/kernels/cpu/math.cc
浏览文件 @
32d9beef
...
...
@@ -114,6 +114,30 @@ void ElementwiseSub(const CPUContext& dev_ctx,
}
}
template
<
typename
T
>
void
ElementwiseDiv
(
const
CPUContext
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
// allocate memory for out
out
->
mutable_data
<
T
>
();
if
(
x
.
dims
()
==
y
.
dims
()
&&
std
::
is_floating_point
<
T
>::
value
)
{
SameDimsElementwiseCompute
<
general
::
SameDimsDivFunctor
<
CPUContext
,
T
>>
()(
dev_ctx
,
x
,
y
,
out
);
}
else
{
auto
x_dims
=
x
.
dims
();
auto
y_dims
=
y
.
dims
();
if
(
x_dims
.
size
()
>=
y_dims
.
size
())
{
ElementwiseCompute
<
general
::
DivFunctor
<
T
>
,
T
>
(
dev_ctx
,
x
,
y
,
axis
,
general
::
DivFunctor
<
T
>
(),
out
);
}
else
{
ElementwiseCompute
<
general
::
InverseDivFunctor
<
T
>
,
T
>
(
dev_ctx
,
x
,
y
,
axis
,
general
::
InverseDivFunctor
<
T
>
(),
out
);
}
}
}
}
// namespace pten
// TODO(chenweihang): replace by better impl
...
...
@@ -174,3 +198,13 @@ PT_REGISTER_KERNEL("elementwise_sub",
int64_t
,
complex64
,
complex128
)
{}
PT_REGISTER_KERNEL
(
"elementwise_div"
,
CPU
,
ANY
,
pten
::
ElementwiseDiv
,
float
,
double
,
int
,
int64_t
,
complex64
,
complex128
)
{}
paddle/pten/kernels/cpu/math.h
浏览文件 @
32d9beef
...
...
@@ -60,4 +60,10 @@ void ElementwiseSub(const CPUContext& dev_ctx,
int
axis
,
DenseTensor
*
out
);
template
<
typename
T
>
void
ElementwiseDiv
(
const
CPUContext
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
);
}
// namespace pten
paddle/pten/kernels/cuda/math.cu
浏览文件 @
32d9beef
...
...
@@ -158,6 +158,23 @@ void ElementwiseSub(const CUDAContext& dev_ctx,
dev_ctx
,
inputs
,
&
outputs
,
axis
,
general
::
SubFunctor
<
T
>
());
}
template
<
typename
T
>
void
ElementwiseDiv
(
const
CUDAContext
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
std
::
vector
<
const
DenseTensor
*>
inputs
;
std
::
vector
<
DenseTensor
*>
outputs
;
inputs
.
emplace_back
(
&
x
);
inputs
.
emplace_back
(
&
y
);
// allocate memory for out
out
->
mutable_data
<
T
>
();
outputs
.
emplace_back
(
out
);
LaunchElementwiseCudaKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
dev_ctx
,
inputs
,
&
outputs
,
axis
,
general
::
DivFunctor
<
T
>
());
}
}
// namespace pten
// TODO(chenweihang): replace by better impl
...
...
@@ -217,3 +234,14 @@ PT_REGISTER_KERNEL("elementwise_sub",
float16
,
complex64
,
complex128
)
{}
PT_REGISTER_KERNEL
(
"elementwise_div"
,
CUDA
,
ANY
,
pten
::
ElementwiseDiv
,
float
,
double
,
int
,
int64_t
,
float16
,
complex64
,
complex128
)
{}
paddle/pten/kernels/cuda/math.h
浏览文件 @
32d9beef
...
...
@@ -62,6 +62,13 @@ void ElementwiseSub(const CUDAContext& dev_ctx,
int
axis
,
DenseTensor
*
out
);
template
<
typename
T
>
void
ElementwiseDiv
(
const
CUDAContext
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
);
}
// namespace pten
#endif
paddle/pten/kernels/functions/blas/elementwise.h
浏览文件 @
32d9beef
...
...
@@ -38,5 +38,14 @@ void ElementwiseSub(const DevCtx& dev_ctx,
blas
.
VSUB
(
x
.
numel
(),
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
->
mutable_data
<
T
>
());
}
template
<
typename
DevCtx
,
typename
T
>
void
ElementwiseDiv
(
const
DevCtx
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
DevCtx
,
T
>
(
dev_ctx
);
blas
.
VDIV
(
x
.
numel
(),
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
->
mutable_data
<
T
>
());
}
}
// namespace blas
}
// namespace pten
paddle/pten/kernels/functions/general/elementwise_functor.h
浏览文件 @
32d9beef
...
...
@@ -114,5 +114,65 @@ struct InverseSubFunctor {
inline
HOSTDEVICE
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
b
-
a
;
}
};
// Divide
template
<
typename
DevCtx
,
typename
T
,
class
Enable
=
void
>
struct
SameDimsDivFunctor
{
void
operator
()(
const
DevCtx
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
z
);
};
template
<
typename
DevCtx
,
typename
T
>
struct
SameDimsDivFunctor
<
DevCtx
,
T
,
typename
std
::
enable_if
<!
std
::
is_floating_point
<
T
>::
value
>::
type
>
{
void
operator
()(
const
DevCtx
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
z
)
{
paddle
::
platform
::
errors
::
InvalidArgument
(
"If use SameDimsDivFunctor, template args(T) must be floating point. "
);
}
};
template
<
typename
DevCtx
,
typename
T
>
struct
SameDimsDivFunctor
<
DevCtx
,
T
,
typename
std
::
enable_if
<
std
::
is_floating_point
<
T
>::
value
>::
type
>
{
void
operator
()(
const
DevCtx
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
z
)
{
blas
::
ElementwiseDiv
<
DevCtx
,
T
>
(
dev_ctx
,
x
,
y
,
z
);
}
};
#define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in " \
"(floor) divide. Please check the input value."
template
<
typename
T
,
typename
Enable
=
void
>
struct
DivFunctor
{
inline
HOSTDEVICE
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
a
/
b
;
}
};
template
<
typename
T
>
struct
DivFunctor
<
T
,
typename
std
::
enable_if
<
std
::
is_integral
<
T
>::
value
>::
type
>
{
inline
HOSTDEVICE
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE
(
b
!=
0
,
DIV_ERROR_INFO
);
return
a
/
b
;
}
};
template
<
typename
T
,
typename
Enable
=
void
>
struct
InverseDivFunctor
{
inline
HOSTDEVICE
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
b
/
a
;
}
};
}
// namespace general
}
// namespace pten
paddle/pten/tests/api/test_elementwise_api.cc
浏览文件 @
32d9beef
...
...
@@ -131,3 +131,57 @@ TEST(API, subtract) {
ASSERT_NEAR
(
expect_result
[
0
][
1
],
actual_result1
,
1e-6
f
);
ASSERT_NEAR
(
expect_result
[
1
][
0
],
actual_result2
,
1e-6
f
);
}
// TODO(chenweihang): Remove this test after the API is used in the dygraph
TEST
(
API
,
divide
)
{
// 1. create tensor
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
paddle
::
platform
::
CPUPlace
());
auto
dense_x
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
alloc
,
pten
::
DenseTensorMeta
(
pten
::
DataType
::
FLOAT32
,
framework
::
make_ddim
({
3
,
10
}),
pten
::
DataLayout
::
NCHW
));
auto
*
dense_x_data
=
dense_x
->
mutable_data
<
float
>
();
auto
dense_y
=
std
::
make_shared
<
pten
::
DenseTensor
>
(
alloc
,
pten
::
DenseTensorMeta
(
pten
::
DataType
::
FLOAT32
,
framework
::
make_ddim
({
10
}),
pten
::
DataLayout
::
NCHW
));
auto
*
dense_y_data
=
dense_y
->
mutable_data
<
float
>
();
float
div
[
3
][
10
]
=
{
0.0
};
for
(
size_t
i
=
0
;
i
<
3
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
10
;
++
j
)
{
dense_x_data
[
i
*
10
+
j
]
=
(
i
*
10
+
j
)
*
1.0
;
div
[
i
][
j
]
=
(
i
*
10
+
j
)
*
1.0
/
(
j
*
2.0
+
1
);
}
}
for
(
size_t
i
=
0
;
i
<
10
;
++
i
)
{
dense_y_data
[
i
]
=
i
*
2.0
+
1
;
}
paddle
::
experimental
::
Tensor
x
(
dense_x
);
paddle
::
experimental
::
Tensor
y
(
dense_y
);
// 2. test API
auto
out
=
paddle
::
experimental
::
divide
(
x
,
y
);
// 3. check result
ASSERT_EQ
(
out
.
shape
().
size
(),
2UL
);
ASSERT_EQ
(
out
.
shape
()[
0
],
3
);
ASSERT_EQ
(
out
.
numel
(),
30
);
ASSERT_EQ
(
out
.
is_cpu
(),
true
);
ASSERT_EQ
(
out
.
type
(),
pten
::
DataType
::
FLOAT32
);
ASSERT_EQ
(
out
.
layout
(),
pten
::
DataLayout
::
NCHW
);
ASSERT_EQ
(
out
.
initialized
(),
true
);
auto
expect_result
=
div
;
auto
dense_out
=
std
::
dynamic_pointer_cast
<
pten
::
DenseTensor
>
(
out
.
impl
());
auto
actual_result0
=
dense_out
->
data
<
float
>
()[
0
];
auto
actual_result1
=
dense_out
->
data
<
float
>
()[
1
];
auto
actual_result2
=
dense_out
->
data
<
float
>
()[
10
];
ASSERT_NEAR
(
expect_result
[
0
][
0
],
actual_result0
,
1e-6
f
);
ASSERT_NEAR
(
expect_result
[
0
][
1
],
actual_result1
,
1e-6
f
);
ASSERT_NEAR
(
expect_result
[
1
][
0
],
actual_result2
,
1e-6
f
);
}
paddle/pten/tests/kernels/test_elementwise_dev_api.cc
浏览文件 @
32d9beef
...
...
@@ -24,7 +24,7 @@ limitations under the License. */
namespace
framework
=
paddle
::
framework
;
using
DDim
=
paddle
::
framework
::
DDim
;
TEST
(
DEV_API
,
elementwise_
add
)
{
TEST
(
DEV_API
,
add
)
{
// 1. create tensor
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
paddle
::
platform
::
CPUPlace
());
...
...
@@ -56,7 +56,7 @@ TEST(DEV_API, elementwise_add) {
auto
*
dev_ctx
=
pool
.
Get
(
paddle
::
platform
::
CPUPlace
());
// 2. test API
auto
dense_out
=
pten
::
Elementwise
Add
<
float
>
(
auto
dense_out
=
pten
::
Add
<
float
>
(
*
(
static_cast
<
paddle
::
platform
::
CPUDeviceContext
*>
(
dev_ctx
)),
dense_x
,
dense_y
,
...
...
@@ -129,3 +129,56 @@ TEST(DEV_API, subtract) {
ASSERT_NEAR
(
expect_result
[
0
][
1
],
actual_result1
,
1e-6
f
);
ASSERT_NEAR
(
expect_result
[
1
][
0
],
actual_result2
,
1e-6
f
);
}
TEST
(
DEV_API
,
divide
)
{
// 1. create tensor
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
paddle
::
platform
::
CPUPlace
());
pten
::
DenseTensor
dense_x
(
alloc
,
pten
::
DenseTensorMeta
(
pten
::
DataType
::
FLOAT32
,
framework
::
make_ddim
({
3
,
10
}),
pten
::
DataLayout
::
NCHW
));
auto
*
dense_x_data
=
dense_x
.
mutable_data
<
float
>
();
pten
::
DenseTensor
dense_y
(
alloc
,
pten
::
DenseTensorMeta
(
pten
::
DataType
::
FLOAT32
,
framework
::
make_ddim
({
10
}),
pten
::
DataLayout
::
NCHW
));
auto
*
dense_y_data
=
dense_y
.
mutable_data
<
float
>
();
float
div
[
3
][
10
]
=
{
0.0
};
for
(
size_t
i
=
0
;
i
<
3
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
10
;
++
j
)
{
dense_x_data
[
i
*
10
+
j
]
=
(
i
*
10
+
j
)
*
1.0
;
div
[
i
][
j
]
=
(
i
*
10
+
j
)
*
1.0
/
(
j
*
2.0
+
1
);
}
}
for
(
size_t
i
=
0
;
i
<
10
;
++
i
)
{
dense_y_data
[
i
]
=
i
*
2.0
+
1
;
}
int
axis
=
1
;
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
paddle
::
platform
::
CPUPlace
());
// 2. test API
auto
dense_out
=
pten
::
Divide
<
float
>
(
*
(
static_cast
<
paddle
::
platform
::
CPUDeviceContext
*>
(
dev_ctx
)),
dense_x
,
dense_y
,
axis
);
// 3. check result
ASSERT_EQ
(
dense_out
.
dims
().
size
(),
2
);
ASSERT_EQ
(
dense_out
.
dims
()[
0
],
3
);
ASSERT_EQ
(
dense_out
.
meta
().
dtype
,
pten
::
DataType
::
FLOAT32
);
ASSERT_EQ
(
dense_out
.
meta
().
layout
,
pten
::
DataLayout
::
NCHW
);
auto
expect_result
=
div
;
auto
actual_result0
=
dense_out
.
data
<
float
>
()[
0
];
auto
actual_result1
=
dense_out
.
data
<
float
>
()[
1
];
auto
actual_result2
=
dense_out
.
data
<
float
>
()[
10
];
ASSERT_NEAR
(
expect_result
[
0
][
0
],
actual_result0
,
1e-6
f
);
ASSERT_NEAR
(
expect_result
[
0
][
1
],
actual_result1
,
1e-6
f
);
ASSERT_NEAR
(
expect_result
[
1
][
0
],
actual_result2
,
1e-6
f
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录