Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c48bd3ff
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c48bd3ff
编写于
1月 06, 2022
作者:
C
chentianyu03
提交者:
GitHub
1月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[pten]move reduce files and dev_api (#38715)
* move eigen/reduce.h imple into cpu/reduce.h * ctx to dev_ctx
上级
4514f16d
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
250 addition
and
258 deletion
+250
-258
paddle/pten/include/math.h
paddle/pten/include/math.h
+0
-37
paddle/pten/kernels/cpu/math_kernel.cc
paddle/pten/kernels/cpu/math_kernel.cc
+3
-2
paddle/pten/kernels/cpu/reduce.h
paddle/pten/kernels/cpu/reduce.h
+177
-3
paddle/pten/kernels/funcs/reduce_functor.h
paddle/pten/kernels/funcs/reduce_functor.h
+37
-0
paddle/pten/kernels/hybird/eigen/reduce.h
paddle/pten/kernels/hybird/eigen/reduce.h
+0
-214
paddle/pten/kernels/math_kernel.h
paddle/pten/kernels/math_kernel.h
+31
-0
paddle/pten/tests/kernels/test_mean_dev_api.cc
paddle/pten/tests/kernels/test_mean_dev_api.cc
+1
-1
paddle/pten/tests/kernels/test_sum_dev_api.cc
paddle/pten/tests/kernels/test_sum_dev_api.cc
+1
-1
未找到文件。
paddle/pten/include/math.h
浏览文件 @
c48bd3ff
...
...
@@ -18,7 +18,6 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/complex_kernel.h"
#include "paddle/pten/kernels/math_kernel.h"
#include "paddle/pten/kernels/scale_kernel.h"
namespace
pten
{
...
...
@@ -34,42 +33,6 @@ DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) {
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Mean
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
)
{
auto
out_meta
=
ReduceInferMeta
(
x
.
meta
(),
axis
,
keep_dim
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
()),
std
::
move
(
out_meta
));
bool
reduce_all
=
false
;
MeanKernel
<
T
,
ContextT
>
(
dev_ctx
,
x
,
axis
,
keep_dim
,
reduce_all
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Sum
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
DataType
dtype
,
bool
keep_dim
)
{
auto
out_meta
=
ReduceInferMeta
(
x
.
meta
(),
axis
,
keep_dim
,
dtype
);
pten
::
DenseTensor
dense_out
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
dev_ctx
.
GetPlace
()),
out_meta
);
// The real value of reduce_all will be get in kernel
// so use default value(false) is OK.
bool
reduce_all
=
false
;
SumKernel
<
T
,
ContextT
>
(
dev_ctx
,
x
,
axis
,
keep_dim
,
reduce_all
,
out_meta
.
dtype
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
ContextT
>
DenseTensor
Scale
(
const
ContextT
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
paddle/pten/kernels/cpu/math_kernel.cc
浏览文件 @
c48bd3ff
...
...
@@ -21,6 +21,7 @@
#include "paddle/pten/kernels/cpu/elementwise.h"
#include "paddle/pten/kernels/cpu/reduce.h"
#include "paddle/pten/kernels/funcs/elementwise_functor.h"
#include "paddle/pten/kernels/funcs/reduce_functor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/eigen.h"
...
...
@@ -61,7 +62,7 @@ void MeanKernel(const Context& dev_ctx,
bool
reduce_all
,
DenseTensor
*
out
)
{
auto
out_dtype
=
x
.
dtype
();
pten
::
Reduce
<
CPUContext
,
T
,
pten
::
eigen
::
MeanFunctor
>
(
pten
::
Reduce
<
CPUContext
,
T
,
pten
::
funcs
::
MeanFunctor
>
(
dev_ctx
,
x
,
reduce_all
,
dims
,
keep_dim
,
out_dtype
,
out
);
}
...
...
@@ -97,7 +98,7 @@ void SumKernel(const Context& dev_ctx,
bool
reduce_all
,
DataType
out_dtype
,
DenseTensor
*
out
)
{
pten
::
Reduce
<
CPUContext
,
T
,
pten
::
eigen
::
SumFunctor
>
(
pten
::
Reduce
<
CPUContext
,
T
,
pten
::
funcs
::
SumFunctor
>
(
dev_ctx
,
x
,
reduce_all
,
dims
,
keep_dim
,
out_dtype
,
out
);
}
...
...
paddle/pten/kernels/cpu/reduce.h
浏览文件 @
c48bd3ff
...
...
@@ -19,10 +19,184 @@
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/kernels/cast_kernel.h"
#include "paddle/pten/kernels/hybird/eigen/reduce.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"
#include "paddle/pten/kernels/hybird/transpose.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace
pten
{
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
,
size_t
R_D
,
typename
Functor
>
void
ReduceFunctor
(
const
DeviceContext
&
context
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
input
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
reduce_dim
=
Eigen
::
array
<
int
,
R_D
>
();
std
::
vector
<
int64_t
>
dims_ref
=
dims
;
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
if
(
dims_ref
[
i
]
<
0
)
dims_ref
[
i
]
=
x_rank
+
dims_ref
[
i
];
reduce_dim
[
i
]
=
dims_ref
[
i
];
}
// construct the squeezed output tensor
DDim
out_dims
=
output
->
dims
();
if
(
keep_dim
&&
x_rank
>
1
)
{
const
int
kDelFlag
=
-
2
;
auto
dims_vector
=
paddle
::
framework
::
vectorize
(
out_dims
);
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
dims_vector
[
dims_ref
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
out_dims
=
paddle
::
framework
::
make_ddim
(
dims_vector
);
}
auto
&
place
=
*
context
.
eigen_device
();
Functor
functor
;
if
(
D
==
1
)
{
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
auto
out
=
EigenTensor
<
T
,
(
D
-
R_D
)
>::
From
(
*
output
,
out_dims
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
}
#define HANDLE_REDUCE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceFunctor<DeviceContext, OutT, NDIM, RDIM, Functor>( \
dev_ctx, input, output, dims, keep_dim); \
}
//////////////// HandleLargeDim
inline
void
GetShuffledDim
(
const
DDim
&
src_dims
,
DDim
*
dst_dims
,
const
std
::
vector
<
int64_t
>&
reduced_dims
,
std
::
vector
<
int64_t
>*
perm_axis
)
{
// check if it's a reduced dim
std
::
vector
<
bool
>
src_dims_check
(
src_dims
.
size
(),
false
);
size_t
src_size
=
src_dims
.
size
();
size_t
reduce_size
=
reduced_dims
.
size
();
std
::
vector
<
int64_t
>
regular_reduced_dims
=
reduced_dims
;
for
(
size_t
i
=
0
;
i
<
regular_reduced_dims
.
size
();
i
++
)
{
if
(
regular_reduced_dims
[
i
]
<
0
)
{
regular_reduced_dims
[
i
]
=
src_size
+
regular_reduced_dims
[
i
];
}
}
for
(
size_t
i
=
0
;
i
<
reduce_size
;
++
i
)
{
dst_dims
->
at
(
src_size
-
reduce_size
+
i
)
=
src_dims
[
regular_reduced_dims
[
i
]];
(
*
perm_axis
)[
src_size
-
reduce_size
+
i
]
=
regular_reduced_dims
[
i
];
src_dims_check
[
regular_reduced_dims
[
i
]]
=
true
;
}
size_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
src_dims_check
.
size
();
++
i
)
{
bool
is_reduced
=
src_dims_check
[
i
];
if
(
!
is_reduced
)
{
(
*
perm_axis
)[
offset
]
=
i
;
dst_dims
->
at
(
offset
++
)
=
src_dims
[
i
];
}
}
}
template
<
typename
DeviceContext
,
typename
OutT
>
void
GetShuffledInput
(
const
DeviceContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
shuffled_input
,
const
std
::
vector
<
int64_t
>&
dims
)
{
DDim
shuffled_dims
(
input
.
dims
());
std
::
vector
<
int64_t
>
perm_axis
(
input
.
dims
().
size
());
GetShuffledDim
(
input
.
dims
(),
&
shuffled_dims
,
dims
,
&
perm_axis
);
shuffled_input
->
Resize
(
shuffled_dims
);
shuffled_input
->
mutable_data
<
OutT
>
();
pten
::
math
::
TransposeNormal
<
DeviceContext
,
OutT
>
trans
;
trans
(
dev_ctx
,
input
,
shuffled_input
,
perm_axis
);
}
template
<
typename
DeviceContext
,
typename
OutT
,
typename
Functor
>
void
HandleLargeDim
(
const
DeviceContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
// shuffle the reduced dim to the end
pten
::
DenseTensor
shuffled_input
=
pten
::
DenseTensor
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
input
.
place
()),
input
.
meta
());
GetShuffledInput
<
DeviceContext
,
OutT
>
(
dev_ctx
,
input
,
&
shuffled_input
,
dims
);
// transpose to 2D tensor whose shape is {unreduced, reduced}.
const
int64_t
unreduced
=
output
->
numel
();
const
int64_t
reduced
=
shuffled_input
.
numel
()
/
unreduced
;
shuffled_input
.
Resize
({
unreduced
,
reduced
});
DDim
output_dim
=
output
->
dims
();
output
->
Resize
({
unreduced
});
ReduceFunctor
<
DeviceContext
,
OutT
,
2
,
1
,
Functor
>
(
dev_ctx
,
shuffled_input
,
output
,
{
1
},
keep_dim
);
output
->
Resize
(
output_dim
);
}
////////////// ReduceKernel
template
<
typename
DeviceContext
,
typename
T
,
typename
OutT
,
typename
Functor
>
void
ReduceKernelImpl
(
const
DeviceContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
bool
reduce_all
)
{
output
->
mutable_data
<
OutT
>
();
if
(
reduce_all
)
{
// Flatten and reduce 1-D tensor
auto
x
=
EigenVector
<
OutT
>::
Flatten
(
input
);
auto
out
=
EigenScalar
<
OutT
>::
From
(
*
output
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
auto
reduce_dim
=
Eigen
::
array
<
int
,
1
>
({{
0
}});
Functor
functor
;
functor
(
dev
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
int
ndim
=
input
.
dims
().
size
();
int
rdim
=
dims
.
size
();
if
(
ndim
>
6
)
{
HandleLargeDim
<
DeviceContext
,
OutT
,
Functor
>
(
dev_ctx
,
input
,
output
,
dims
,
keep_dim
);
}
else
{
HANDLE_REDUCE_DIM
(
6
,
5
);
HANDLE_REDUCE_DIM
(
6
,
4
);
HANDLE_REDUCE_DIM
(
6
,
3
);
HANDLE_REDUCE_DIM
(
6
,
2
);
HANDLE_REDUCE_DIM
(
6
,
1
);
HANDLE_REDUCE_DIM
(
5
,
4
);
HANDLE_REDUCE_DIM
(
5
,
3
);
HANDLE_REDUCE_DIM
(
5
,
2
);
HANDLE_REDUCE_DIM
(
5
,
1
);
HANDLE_REDUCE_DIM
(
4
,
3
);
HANDLE_REDUCE_DIM
(
4
,
2
);
HANDLE_REDUCE_DIM
(
4
,
1
);
HANDLE_REDUCE_DIM
(
3
,
2
);
HANDLE_REDUCE_DIM
(
3
,
1
);
HANDLE_REDUCE_DIM
(
2
,
1
);
HANDLE_REDUCE_DIM
(
1
,
1
);
}
}
}
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
void
Reduce
(
const
DeviceContext
&
dev_ctx
,
const
DenseTensor
&
x
,
...
...
@@ -52,7 +226,7 @@ void Reduce(const DeviceContext& dev_ctx,
// do reduce sum
PD_VISIT_ALL_TYPES
(
out_dtype
,
"ReduceKernelImpl"
,
([
&
]
{
pten
::
eigen
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
pten
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
dev_ctx
,
x
,
out
,
dims
,
keep_dim
,
reduce_all
);
}));
}
else
{
...
...
@@ -66,7 +240,7 @@ void Reduce(const DeviceContext& dev_ctx,
// do reduce sum
PD_VISIT_ALL_TYPES
(
out_dtype
,
"ReduceKernelImpl"
,
([
&
]
{
pten
::
eigen
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
pten
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
dev_ctx
,
tmp_tensor
,
out
,
dims
,
keep_dim
,
reduce_all
);
}));
}
...
...
paddle/pten/kernels/funcs/reduce_functor.h
0 → 100644
浏览文件 @
c48bd3ff
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace
pten
{
namespace
funcs
{
//////// Sum Functor ///////
struct
SumFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
sum
(
dim
);
}
};
//////// Mean Functor ///////
struct
MeanFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
mean
(
dim
);
}
};
}
// namespace funcs
}
// namespace pten
paddle/pten/kernels/hybird/eigen/reduce.h
已删除
100644 → 0
浏览文件 @
4514f16d
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"
#include "paddle/pten/kernels/hybird/transpose.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace
pten
{
namespace
eigen
{
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
,
size_t
R_D
,
typename
Functor
>
void
ReduceFunctor
(
const
DeviceContext
&
context
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
input
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
reduce_dim
=
Eigen
::
array
<
int
,
R_D
>
();
std
::
vector
<
int64_t
>
dims_ref
=
dims
;
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
if
(
dims_ref
[
i
]
<
0
)
dims_ref
[
i
]
=
x_rank
+
dims_ref
[
i
];
reduce_dim
[
i
]
=
dims_ref
[
i
];
}
// construct the squeezed output tensor
DDim
out_dims
=
output
->
dims
();
if
(
keep_dim
&&
x_rank
>
1
)
{
const
int
kDelFlag
=
-
2
;
auto
dims_vector
=
paddle
::
framework
::
vectorize
(
out_dims
);
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
dims_vector
[
dims_ref
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
out_dims
=
paddle
::
framework
::
make_ddim
(
dims_vector
);
}
auto
&
place
=
*
context
.
eigen_device
();
Functor
functor
;
if
(
D
==
1
)
{
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
auto
out
=
EigenTensor
<
T
,
(
D
-
R_D
)
>::
From
(
*
output
,
out_dims
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
}
#define HANDLE_REDUCE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceFunctor<DeviceContext, OutT, NDIM, RDIM, Functor>( \
dev_ctx, input, output, dims, keep_dim); \
}
//////////////// HandleLargeDim
inline
void
GetShuffledDim
(
const
DDim
&
src_dims
,
DDim
*
dst_dims
,
const
std
::
vector
<
int64_t
>&
reduced_dims
,
std
::
vector
<
int64_t
>*
perm_axis
)
{
// check if it's a reduced dim
std
::
vector
<
bool
>
src_dims_check
(
src_dims
.
size
(),
false
);
size_t
src_size
=
src_dims
.
size
();
size_t
reduce_size
=
reduced_dims
.
size
();
std
::
vector
<
int64_t
>
regular_reduced_dims
=
reduced_dims
;
for
(
size_t
i
=
0
;
i
<
regular_reduced_dims
.
size
();
i
++
)
{
if
(
regular_reduced_dims
[
i
]
<
0
)
{
regular_reduced_dims
[
i
]
=
src_size
+
regular_reduced_dims
[
i
];
}
}
for
(
size_t
i
=
0
;
i
<
reduce_size
;
++
i
)
{
dst_dims
->
at
(
src_size
-
reduce_size
+
i
)
=
src_dims
[
regular_reduced_dims
[
i
]];
(
*
perm_axis
)[
src_size
-
reduce_size
+
i
]
=
regular_reduced_dims
[
i
];
src_dims_check
[
regular_reduced_dims
[
i
]]
=
true
;
}
size_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
src_dims_check
.
size
();
++
i
)
{
bool
is_reduced
=
src_dims_check
[
i
];
if
(
!
is_reduced
)
{
(
*
perm_axis
)[
offset
]
=
i
;
dst_dims
->
at
(
offset
++
)
=
src_dims
[
i
];
}
}
}
template
<
typename
DeviceContext
,
typename
OutT
>
void
GetShuffledInput
(
const
DeviceContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
shuffled_input
,
const
std
::
vector
<
int64_t
>&
dims
)
{
DDim
shuffled_dims
(
input
.
dims
());
std
::
vector
<
int64_t
>
perm_axis
(
input
.
dims
().
size
());
GetShuffledDim
(
input
.
dims
(),
&
shuffled_dims
,
dims
,
&
perm_axis
);
shuffled_input
->
Resize
(
shuffled_dims
);
shuffled_input
->
mutable_data
<
OutT
>
();
pten
::
math
::
TransposeNormal
<
DeviceContext
,
OutT
>
trans
;
trans
(
dev_ctx
,
input
,
shuffled_input
,
perm_axis
);
}
template
<
typename
DeviceContext
,
typename
OutT
,
typename
Functor
>
void
HandleLargeDim
(
const
DeviceContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
// shuffle the reduced dim to the end
pten
::
DenseTensor
shuffled_input
=
pten
::
DenseTensor
(
pten
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
input
.
place
()),
input
.
meta
());
GetShuffledInput
<
DeviceContext
,
OutT
>
(
dev_ctx
,
input
,
&
shuffled_input
,
dims
);
// transpose to 2D tensor whose shape is {unreduced, reduced}.
const
int64_t
unreduced
=
output
->
numel
();
const
int64_t
reduced
=
shuffled_input
.
numel
()
/
unreduced
;
shuffled_input
.
Resize
({
unreduced
,
reduced
});
DDim
output_dim
=
output
->
dims
();
output
->
Resize
({
unreduced
});
ReduceFunctor
<
DeviceContext
,
OutT
,
2
,
1
,
Functor
>
(
dev_ctx
,
shuffled_input
,
output
,
{
1
},
keep_dim
);
output
->
Resize
(
output_dim
);
}
////////////// ReduceKernel
template
<
typename
DeviceContext
,
typename
T
,
typename
OutT
,
typename
Functor
>
void
ReduceKernelImpl
(
const
DeviceContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
input
,
pten
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
bool
reduce_all
)
{
output
->
mutable_data
<
OutT
>
();
if
(
reduce_all
)
{
// Flatten and reduce 1-D tensor
auto
x
=
EigenVector
<
OutT
>::
Flatten
(
input
);
auto
out
=
EigenScalar
<
OutT
>::
From
(
*
output
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
auto
reduce_dim
=
Eigen
::
array
<
int
,
1
>
({{
0
}});
Functor
functor
;
functor
(
dev
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
int
ndim
=
input
.
dims
().
size
();
int
rdim
=
dims
.
size
();
if
(
ndim
>
6
)
{
HandleLargeDim
<
DeviceContext
,
OutT
,
Functor
>
(
dev_ctx
,
input
,
output
,
dims
,
keep_dim
);
}
else
{
HANDLE_REDUCE_DIM
(
6
,
5
);
HANDLE_REDUCE_DIM
(
6
,
4
);
HANDLE_REDUCE_DIM
(
6
,
3
);
HANDLE_REDUCE_DIM
(
6
,
2
);
HANDLE_REDUCE_DIM
(
6
,
1
);
HANDLE_REDUCE_DIM
(
5
,
4
);
HANDLE_REDUCE_DIM
(
5
,
3
);
HANDLE_REDUCE_DIM
(
5
,
2
);
HANDLE_REDUCE_DIM
(
5
,
1
);
HANDLE_REDUCE_DIM
(
4
,
3
);
HANDLE_REDUCE_DIM
(
4
,
2
);
HANDLE_REDUCE_DIM
(
4
,
1
);
HANDLE_REDUCE_DIM
(
3
,
2
);
HANDLE_REDUCE_DIM
(
3
,
1
);
HANDLE_REDUCE_DIM
(
2
,
1
);
HANDLE_REDUCE_DIM
(
1
,
1
);
}
}
}
//////// Sum Functor ///////
struct
SumFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
sum
(
dim
);
}
};
//////// Mean Functor ///////
struct
MeanFunctor
{
template
<
typename
DeviceContext
,
typename
X
,
typename
Y
,
typename
Dim
>
void
operator
()(
const
DeviceContext
&
place
,
X
*
x
,
Y
*
y
,
const
Dim
&
dim
)
{
y
->
device
(
place
)
=
x
->
mean
(
dim
);
}
};
}
// namespace eigen
}
// namespace pten
paddle/pten/kernels/math_kernel.h
浏览文件 @
c48bd3ff
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/empty_kernel.h"
namespace
pten
{
...
...
@@ -121,4 +122,34 @@ DenseTensor Multiply(const ContextT& dev_ctx,
return
dense_out
;
}
template
<
typename
T
,
typename
Context
>
DenseTensor
Mean
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
bool
keep_dim
)
{
auto
out_meta
=
ReduceInferMeta
(
x
.
meta
(),
axis
,
keep_dim
);
auto
dense_out
=
pten
::
Empty
<
T
,
Context
>
(
dev_ctx
,
std
::
move
(
out_meta
));
bool
reduce_all
=
false
;
MeanKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
axis
,
keep_dim
,
reduce_all
,
&
dense_out
);
return
dense_out
;
}
template
<
typename
T
,
typename
Context
>
DenseTensor
Sum
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
int64_t
>&
axis
,
DataType
dtype
,
bool
keep_dim
)
{
auto
out_meta
=
ReduceInferMeta
(
x
.
meta
(),
axis
,
keep_dim
,
dtype
);
auto
dense_out
=
pten
::
Empty
<
T
,
Context
>
(
dev_ctx
,
std
::
move
(
out_meta
));
// The real value of reduce_all will be get in kernel
// so use default value(false) is OK.
bool
reduce_all
=
false
;
SumKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
axis
,
keep_dim
,
reduce_all
,
out_meta
.
dtype
,
&
dense_out
);
return
dense_out
;
}
}
// namespace pten
paddle/pten/tests/kernels/test_mean_dev_api.cc
浏览文件 @
c48bd3ff
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h>
#include <memory>
#include "paddle/pten/
include/math
.h"
#include "paddle/pten/
kernels/math_kernel
.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
...
...
paddle/pten/tests/kernels/test_sum_dev_api.cc
浏览文件 @
c48bd3ff
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h>
#include <memory>
#include "paddle/pten/
include/math
.h"
#include "paddle/pten/
kernels/math_kernel
.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录