Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
6b47507d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6b47507d
编写于
9月 20, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
9月 20, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move reduce func (#46248)
上级
aee4f8ab
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
199 addition
and
198 deletion
+199
-198
paddle/phi/kernels/cpu/dirichlet_kernel.cc
paddle/phi/kernels/cpu/dirichlet_kernel.cc
+2
-2
paddle/phi/kernels/cpu/reduce.h
paddle/phi/kernels/cpu/reduce.h
+5
-175
paddle/phi/kernels/funcs/matrix_reduce.cc
paddle/phi/kernels/funcs/matrix_reduce.cc
+2
-2
paddle/phi/kernels/funcs/reduce_function.h
paddle/phi/kernels/funcs/reduce_function.h
+179
-8
paddle/phi/kernels/gpu/dirichlet_kernel.cu
paddle/phi/kernels/gpu/dirichlet_kernel.cu
+2
-2
paddle/phi/kernels/impl/logsumexp_kernel_impl.h
paddle/phi/kernels/impl/logsumexp_kernel_impl.h
+5
-5
paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
+2
-2
paddle/phi/kernels/impl/solve_grad_kernel_impl.h
paddle/phi/kernels/impl/solve_grad_kernel_impl.h
+2
-2
未找到文件。
paddle/phi/kernels/cpu/dirichlet_kernel.cc
浏览文件 @
6b47507d
...
@@ -16,9 +16,9 @@
...
@@ -16,9 +16,9 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
...
@@ -83,7 +83,7 @@ struct DirichletSampler<CPUContext, T> {
...
@@ -83,7 +83,7 @@ struct DirichletSampler<CPUContext, T> {
gamma_sum
.
Resize
(
new_shape
);
gamma_sum
.
Resize
(
new_shape
);
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_sum
);
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_sum
);
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
funcs
::
SumFunctor
>
(
funcs
::
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
funcs
::
SumFunctor
>
(
dev_ctx
,
dev_ctx
,
gamma_samples
,
gamma_samples
,
&
gamma_sum
,
&
gamma_sum
,
...
...
paddle/phi/kernels/cpu/reduce.h
浏览文件 @
6b47507d
...
@@ -16,181 +16,11 @@
...
@@ -16,181 +16,11 @@
#include <set>
#include <set>
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/visit_type.h"
#include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace
phi
{
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
,
size_t
R_D
,
typename
Functor
>
void
ReduceFunctor
(
const
DeviceContext
&
context
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
input
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
reduce_dim
=
Eigen
::
array
<
int
,
R_D
>
();
std
::
vector
<
int64_t
>
dims_ref
=
dims
;
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
if
(
dims_ref
[
i
]
<
0
)
dims_ref
[
i
]
=
x_rank
+
dims_ref
[
i
];
reduce_dim
[
i
]
=
dims_ref
[
i
];
}
// construct the squeezed output tensor
DDim
out_dims
=
output
->
dims
();
if
(
keep_dim
&&
x_rank
>
1
)
{
const
int
kDelFlag
=
-
2
;
auto
dims_vector
=
phi
::
vectorize
(
out_dims
);
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
dims_vector
[
dims_ref
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
out_dims
=
phi
::
make_ddim
(
dims_vector
);
}
auto
&
place
=
*
context
.
eigen_device
();
Functor
functor
;
if
(
D
==
1
)
{
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
auto
out
=
EigenTensor
<
T
,
(
D
-
R_D
)
>::
From
(
*
output
,
out_dims
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
}
#define HANDLE_REDUCE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceFunctor<DeviceContext, OutT, NDIM, RDIM, Functor>( \
dev_ctx, input, output, dims, keep_dim); \
}
//////////////// HandleLargeDim
inline
void
GetShuffledDim
(
const
DDim
&
src_dims
,
DDim
*
dst_dims
,
const
std
::
vector
<
int64_t
>&
reduced_dims
,
std
::
vector
<
int
>*
perm_axis
)
{
// check if it's a reduced dim
std
::
vector
<
bool
>
src_dims_check
(
src_dims
.
size
(),
false
);
size_t
src_size
=
src_dims
.
size
();
size_t
reduce_size
=
reduced_dims
.
size
();
std
::
vector
<
int64_t
>
regular_reduced_dims
=
reduced_dims
;
for
(
size_t
i
=
0
;
i
<
regular_reduced_dims
.
size
();
i
++
)
{
if
(
regular_reduced_dims
[
i
]
<
0
)
{
regular_reduced_dims
[
i
]
=
src_size
+
regular_reduced_dims
[
i
];
}
}
for
(
size_t
i
=
0
;
i
<
reduce_size
;
++
i
)
{
dst_dims
->
at
(
src_size
-
reduce_size
+
i
)
=
src_dims
[
regular_reduced_dims
[
i
]];
(
*
perm_axis
)[
src_size
-
reduce_size
+
i
]
=
regular_reduced_dims
[
i
];
src_dims_check
[
regular_reduced_dims
[
i
]]
=
true
;
}
size_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
src_dims_check
.
size
();
++
i
)
{
bool
is_reduced
=
src_dims_check
[
i
];
if
(
!
is_reduced
)
{
(
*
perm_axis
)[
offset
]
=
i
;
dst_dims
->
at
(
offset
++
)
=
src_dims
[
i
];
}
}
}
template
<
typename
DeviceContext
,
typename
OutT
>
void
GetShuffledInput
(
const
DeviceContext
&
dev_ctx
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
shuffled_input
,
const
std
::
vector
<
int64_t
>&
dims
)
{
DDim
shuffled_dims
(
input
.
dims
());
std
::
vector
<
int
>
perm_axis
(
input
.
dims
().
size
());
GetShuffledDim
(
input
.
dims
(),
&
shuffled_dims
,
dims
,
&
perm_axis
);
shuffled_input
->
Resize
(
shuffled_dims
);
dev_ctx
.
template
Alloc
<
OutT
>(
shuffled_input
);
phi
::
funcs
::
TransposeNormal
<
DeviceContext
,
OutT
>
trans
;
trans
(
dev_ctx
,
input
,
shuffled_input
,
perm_axis
);
}
template
<
typename
DeviceContext
,
typename
OutT
,
typename
Functor
>
void
HandleLargeDim
(
const
DeviceContext
&
dev_ctx
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
// shuffle the reduced dim to the end
phi
::
DenseTensor
shuffled_input
;
GetShuffledInput
<
DeviceContext
,
OutT
>
(
dev_ctx
,
input
,
&
shuffled_input
,
dims
);
// transpose to 2D tensor whose shape is {unreduced, reduced}.
namespace
phi
{
const
int64_t
unreduced
=
output
->
numel
();
const
int64_t
reduced
=
shuffled_input
.
numel
()
/
unreduced
;
shuffled_input
.
ResizeAndAllocate
({
unreduced
,
reduced
});
DDim
output_dim
=
output
->
dims
();
output
->
ResizeAndAllocate
({
unreduced
});
ReduceFunctor
<
DeviceContext
,
OutT
,
2
,
1
,
Functor
>
(
dev_ctx
,
shuffled_input
,
output
,
{
1
},
keep_dim
);
output
->
ResizeAndAllocate
(
output_dim
);
}
////////////// ReduceKernel
template
<
typename
DeviceContext
,
typename
T
,
typename
OutT
,
typename
Functor
>
void
ReduceKernelImpl
(
const
DeviceContext
&
dev_ctx
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
bool
reduce_all
)
{
dev_ctx
.
template
Alloc
<
OutT
>(
output
);
if
(
reduce_all
)
{
// Flatten and reduce 1-D tensor
auto
x
=
EigenVector
<
OutT
>::
Flatten
(
input
);
auto
out
=
EigenScalar
<
OutT
>::
From
(
*
output
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
auto
reduce_dim
=
Eigen
::
array
<
int
,
1
>
({{
0
}});
Functor
functor
;
functor
(
dev
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
int
ndim
=
input
.
dims
().
size
();
int
rdim
=
dims
.
size
();
if
(
ndim
>
6
)
{
HandleLargeDim
<
DeviceContext
,
OutT
,
Functor
>
(
dev_ctx
,
input
,
output
,
dims
,
keep_dim
);
}
else
{
HANDLE_REDUCE_DIM
(
6
,
5
);
HANDLE_REDUCE_DIM
(
6
,
4
);
HANDLE_REDUCE_DIM
(
6
,
3
);
HANDLE_REDUCE_DIM
(
6
,
2
);
HANDLE_REDUCE_DIM
(
6
,
1
);
HANDLE_REDUCE_DIM
(
5
,
4
);
HANDLE_REDUCE_DIM
(
5
,
3
);
HANDLE_REDUCE_DIM
(
5
,
2
);
HANDLE_REDUCE_DIM
(
5
,
1
);
HANDLE_REDUCE_DIM
(
4
,
3
);
HANDLE_REDUCE_DIM
(
4
,
2
);
HANDLE_REDUCE_DIM
(
4
,
1
);
HANDLE_REDUCE_DIM
(
3
,
2
);
HANDLE_REDUCE_DIM
(
3
,
1
);
HANDLE_REDUCE_DIM
(
2
,
1
);
HANDLE_REDUCE_DIM
(
1
,
1
);
}
}
}
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
void
Reduce
(
const
DeviceContext
&
dev_ctx
,
void
Reduce
(
const
DeviceContext
&
dev_ctx
,
...
@@ -218,7 +48,7 @@ void Reduce(const DeviceContext& dev_ctx,
...
@@ -218,7 +48,7 @@ void Reduce(const DeviceContext& dev_ctx,
// do reduce sum
// do reduce sum
PD_VISIT_ALL_TYPES
(
PD_VISIT_ALL_TYPES
(
x
.
dtype
(),
"ReduceKernelImpl"
,
([
&
]
{
x
.
dtype
(),
"ReduceKernelImpl"
,
([
&
]
{
phi
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
phi
::
funcs
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
dev_ctx
,
x
,
out
,
dims
,
keep_dim
,
reduce_all
);
dev_ctx
,
x
,
out
,
dims
,
keep_dim
,
reduce_all
);
}));
}));
}
else
{
}
else
{
...
@@ -228,7 +58,7 @@ void Reduce(const DeviceContext& dev_ctx,
...
@@ -228,7 +58,7 @@ void Reduce(const DeviceContext& dev_ctx,
// do reduce sum
// do reduce sum
PD_VISIT_ALL_TYPES
(
PD_VISIT_ALL_TYPES
(
out_dtype
,
"ReduceKernelImpl"
,
([
&
]
{
out_dtype
,
"ReduceKernelImpl"
,
([
&
]
{
phi
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
phi
::
funcs
::
ReduceKernelImpl
<
DeviceContext
,
T
,
data_t
,
Functor
>
(
dev_ctx
,
tmp_tensor
,
out
,
dims
,
keep_dim
,
reduce_all
);
dev_ctx
,
tmp_tensor
,
out
,
dims
,
keep_dim
,
reduce_all
);
}));
}));
}
}
...
@@ -255,7 +85,7 @@ void BoolReduceKernel(const DeviceContext& dev_ctx,
...
@@ -255,7 +85,7 @@ void BoolReduceKernel(const DeviceContext& dev_ctx,
}
}
reduce_all
=
(
reduce_all
||
full_dim
);
reduce_all
=
(
reduce_all
||
full_dim
);
ReduceKernelImpl
<
DeviceContext
,
bool
,
OutT
,
Functor
>
(
funcs
::
ReduceKernelImpl
<
DeviceContext
,
bool
,
OutT
,
Functor
>
(
dev_ctx
,
input
,
output
,
dims
,
keep_dim
,
reduce_all
);
dev_ctx
,
input
,
output
,
dims
,
keep_dim
,
reduce_all
);
}
}
...
...
paddle/phi/kernels/funcs/matrix_reduce.cc
浏览文件 @
6b47507d
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
#include "paddle/phi/kernels/funcs/matrix_reduce.h"
#include "paddle/phi/kernels/funcs/matrix_reduce.h"
#include "paddle/phi/kernels/
cpu/reduce
.h"
#include "paddle/phi/kernels/
funcs/reduce_function
.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
namespace
phi
{
namespace
phi
{
...
@@ -47,7 +47,7 @@ class MatrixReduceSumFunctor<T, CPUContext> {
...
@@ -47,7 +47,7 @@ class MatrixReduceSumFunctor<T, CPUContext> {
out_reduce_dims
.
push_back
(
idx
);
out_reduce_dims
.
push_back
(
idx
);
}
}
}
}
phi
::
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
phi
::
funcs
::
SumFunctor
>
(
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
phi
::
funcs
::
SumFunctor
>
(
dev_ctx
,
in
,
out
,
out_reduce_dims
,
true
,
false
);
dev_ctx
,
in
,
out
,
out_reduce_dims
,
true
,
false
);
}
}
};
};
...
...
paddle/phi/kernels/funcs/reduce_function.h
浏览文件 @
6b47507d
...
@@ -15,8 +15,7 @@
...
@@ -15,8 +15,7 @@
#pragma once
#pragma once
// CUDA, XPU and HIP use same api
// CUDA, XPU and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
#if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
defined(PADDLE_WITH_XPU_KP)
#include <algorithm>
#include <algorithm>
#include <cmath>
#include <cmath>
...
@@ -40,10 +39,6 @@ namespace cub = hipcub;
...
@@ -40,10 +39,6 @@ namespace cub = hipcub;
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#endif
#endif
#include "paddle/phi/api/ext/dispatch.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/utils/array.h"
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
...
@@ -58,9 +53,19 @@ namespace kps = phi::kps;
...
@@ -58,9 +53,19 @@ namespace kps = phi::kps;
#ifdef PADDLE_WITH_XPU_KP
#ifdef PADDLE_WITH_XPU_KP
using
dim3
=
phi
::
kps
::
dim3
;
using
dim3
=
phi
::
kps
::
dim3
;
#endif
#endif
#endif
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/utils/array.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace
phi
{
namespace
phi
{
namespace
funcs
{
namespace
funcs
{
#if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
namespace
details
{
namespace
details
{
static
inline
int
GetLastPow2
(
int
n
)
{
static
inline
int
GetLastPow2
(
int
n
)
{
...
@@ -1190,8 +1195,174 @@ void ReduceKernel(const KPDevice& dev_ctx,
...
@@ -1190,8 +1195,174 @@ void ReduceKernel(const KPDevice& dev_ctx,
is_mean
);
is_mean
);
}
}
#endif
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
,
size_t
R_D
,
typename
Functor
>
void
ReduceFunctor
(
const
DeviceContext
&
context
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
auto
x
=
EigenTensor
<
T
,
D
>::
From
(
input
);
auto
x_rank
=
static_cast
<
int
>
(
x
.
dimensions
().
size
());
auto
reduce_dim
=
Eigen
::
array
<
int
,
R_D
>
();
std
::
vector
<
int64_t
>
dims_ref
=
dims
;
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
if
(
dims_ref
[
i
]
<
0
)
dims_ref
[
i
]
=
x_rank
+
dims_ref
[
i
];
reduce_dim
[
i
]
=
dims_ref
[
i
];
}
// construct the squeezed output tensor
DDim
out_dims
=
output
->
dims
();
if
(
keep_dim
&&
x_rank
>
1
)
{
const
int
kDelFlag
=
-
2
;
auto
dims_vector
=
phi
::
vectorize
(
out_dims
);
for
(
size_t
i
=
0
;
i
<
dims_ref
.
size
();
++
i
)
{
dims_vector
[
dims_ref
[
i
]]
=
kDelFlag
;
}
dims_vector
.
erase
(
remove
(
dims_vector
.
begin
(),
dims_vector
.
end
(),
kDelFlag
),
dims_vector
.
end
());
out_dims
=
phi
::
make_ddim
(
dims_vector
);
}
auto
&
place
=
*
context
.
eigen_device
();
Functor
functor
;
if
(
D
==
1
)
{
auto
out
=
EigenScalar
<
T
>::
From
(
*
output
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
auto
out
=
EigenTensor
<
T
,
(
D
-
R_D
)
>::
From
(
*
output
,
out_dims
);
functor
(
place
,
&
x
,
&
out
,
reduce_dim
);
}
}
#define HANDLE_REDUCE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceFunctor<DeviceContext, OutT, NDIM, RDIM, Functor>( \
dev_ctx, input, output, dims, keep_dim); \
}
//////////////// HandleLargeDim
inline
void
GetShuffledDim
(
const
DDim
&
src_dims
,
DDim
*
dst_dims
,
const
std
::
vector
<
int64_t
>&
reduced_dims
,
std
::
vector
<
int
>*
perm_axis
)
{
// check if it's a reduced dim
std
::
vector
<
bool
>
src_dims_check
(
src_dims
.
size
(),
false
);
size_t
src_size
=
src_dims
.
size
();
size_t
reduce_size
=
reduced_dims
.
size
();
std
::
vector
<
int64_t
>
regular_reduced_dims
=
reduced_dims
;
for
(
size_t
i
=
0
;
i
<
regular_reduced_dims
.
size
();
i
++
)
{
if
(
regular_reduced_dims
[
i
]
<
0
)
{
regular_reduced_dims
[
i
]
=
src_size
+
regular_reduced_dims
[
i
];
}
}
for
(
size_t
i
=
0
;
i
<
reduce_size
;
++
i
)
{
dst_dims
->
at
(
src_size
-
reduce_size
+
i
)
=
src_dims
[
regular_reduced_dims
[
i
]];
(
*
perm_axis
)[
src_size
-
reduce_size
+
i
]
=
regular_reduced_dims
[
i
];
src_dims_check
[
regular_reduced_dims
[
i
]]
=
true
;
}
size_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
src_dims_check
.
size
();
++
i
)
{
bool
is_reduced
=
src_dims_check
[
i
];
if
(
!
is_reduced
)
{
(
*
perm_axis
)[
offset
]
=
i
;
dst_dims
->
at
(
offset
++
)
=
src_dims
[
i
];
}
}
}
template
<
typename
DeviceContext
,
typename
OutT
>
void
GetShuffledInput
(
const
DeviceContext
&
dev_ctx
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
shuffled_input
,
const
std
::
vector
<
int64_t
>&
dims
)
{
DDim
shuffled_dims
(
input
.
dims
());
std
::
vector
<
int
>
perm_axis
(
input
.
dims
().
size
());
GetShuffledDim
(
input
.
dims
(),
&
shuffled_dims
,
dims
,
&
perm_axis
);
shuffled_input
->
Resize
(
shuffled_dims
);
dev_ctx
.
template
Alloc
<
OutT
>(
shuffled_input
);
phi
::
funcs
::
TransposeNormal
<
DeviceContext
,
OutT
>
trans
;
trans
(
dev_ctx
,
input
,
shuffled_input
,
perm_axis
);
}
template
<
typename
DeviceContext
,
typename
OutT
,
typename
Functor
>
void
HandleLargeDim
(
const
DeviceContext
&
dev_ctx
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
)
{
// shuffle the reduced dim to the end
phi
::
DenseTensor
shuffled_input
;
GetShuffledInput
<
DeviceContext
,
OutT
>
(
dev_ctx
,
input
,
&
shuffled_input
,
dims
);
// transpose to 2D tensor whose shape is {unreduced, reduced}.
const
int64_t
unreduced
=
output
->
numel
();
const
int64_t
reduced
=
shuffled_input
.
numel
()
/
unreduced
;
shuffled_input
.
ResizeAndAllocate
({
unreduced
,
reduced
});
DDim
output_dim
=
output
->
dims
();
output
->
ResizeAndAllocate
({
unreduced
});
ReduceFunctor
<
DeviceContext
,
OutT
,
2
,
1
,
Functor
>
(
dev_ctx
,
shuffled_input
,
output
,
{
1
},
keep_dim
);
output
->
ResizeAndAllocate
(
output_dim
);
}
////////////// ReduceKernel
template
<
typename
DeviceContext
,
typename
T
,
typename
OutT
,
typename
Functor
>
void
ReduceKernelImpl
(
const
DeviceContext
&
dev_ctx
,
const
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
*
output
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
bool
reduce_all
)
{
dev_ctx
.
template
Alloc
<
OutT
>(
output
);
if
(
reduce_all
)
{
// Flatten and reduce 1-D tensor
auto
x
=
EigenVector
<
OutT
>::
Flatten
(
input
);
auto
out
=
EigenScalar
<
OutT
>::
From
(
*
output
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
auto
reduce_dim
=
Eigen
::
array
<
int
,
1
>
({{
0
}});
Functor
functor
;
functor
(
dev
,
&
x
,
&
out
,
reduce_dim
);
}
else
{
int
ndim
=
input
.
dims
().
size
();
int
rdim
=
dims
.
size
();
if
(
ndim
>
6
)
{
HandleLargeDim
<
DeviceContext
,
OutT
,
Functor
>
(
dev_ctx
,
input
,
output
,
dims
,
keep_dim
);
}
else
{
HANDLE_REDUCE_DIM
(
6
,
5
);
HANDLE_REDUCE_DIM
(
6
,
4
);
HANDLE_REDUCE_DIM
(
6
,
3
);
HANDLE_REDUCE_DIM
(
6
,
2
);
HANDLE_REDUCE_DIM
(
6
,
1
);
HANDLE_REDUCE_DIM
(
5
,
4
);
HANDLE_REDUCE_DIM
(
5
,
3
);
HANDLE_REDUCE_DIM
(
5
,
2
);
HANDLE_REDUCE_DIM
(
5
,
1
);
HANDLE_REDUCE_DIM
(
4
,
3
);
HANDLE_REDUCE_DIM
(
4
,
2
);
HANDLE_REDUCE_DIM
(
4
,
1
);
HANDLE_REDUCE_DIM
(
3
,
2
);
HANDLE_REDUCE_DIM
(
3
,
1
);
HANDLE_REDUCE_DIM
(
2
,
1
);
HANDLE_REDUCE_DIM
(
1
,
1
);
}
}
}
}
// namespace funcs
}
// namespace funcs
}
// namespace phi
}
// namespace phi
#endif
paddle/phi/kernels/gpu/dirichlet_kernel.cu
浏览文件 @
6b47507d
...
@@ -16,10 +16,10 @@
...
@@ -16,10 +16,10 @@
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
...
@@ -99,7 +99,7 @@ struct DirichletSampler<GPUContext, T> {
...
@@ -99,7 +99,7 @@ struct DirichletSampler<GPUContext, T> {
gamma_sum
.
Resize
(
new_shape
);
gamma_sum
.
Resize
(
new_shape
);
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_sum
);
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_sum
);
ReduceKernelImpl
<
GPUContext
,
T
,
T
,
funcs
::
SumFunctor
>
(
funcs
::
ReduceKernelImpl
<
GPUContext
,
T
,
T
,
funcs
::
SumFunctor
>
(
dev_ctx
,
dev_ctx
,
gamma_samples
,
gamma_samples
,
&
gamma_sum
,
&
gamma_sum
,
...
...
paddle/phi/kernels/impl/logsumexp_kernel_impl.h
浏览文件 @
6b47507d
...
@@ -16,17 +16,17 @@
...
@@ -16,17 +16,17 @@
#include <type_traits>
#include <type_traits>
#include <vector>
#include <vector>
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/logsumexp_kernel.h"
#include "paddle/phi/kernels/logsumexp_kernel.h"
namespace
phi
{
namespace
phi
{
#define HANDLE_DIM(NDIM, RDIM) \
#define HANDLE_DIM(NDIM, RDIM)
\
if (ndim == NDIM && rdim == RDIM) { \
if (ndim == NDIM && rdim == RDIM) {
\
ReduceFunctor<Context, T, NDIM, RDIM, LogsumexpFunctor>( \
funcs::
ReduceFunctor<Context, T, NDIM, RDIM, LogsumexpFunctor>( \
dev_ctx, x, out, axis, keepdim); \
dev_ctx, x, out, axis, keepdim);
\
}
}
struct
LogsumexpFunctor
{
struct
LogsumexpFunctor
{
...
...
paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
浏览文件 @
6b47507d
...
@@ -17,8 +17,8 @@ limitations under the License. */
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dot_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/dot_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/matmul_kernel_impl.h"
#include "paddle/phi/kernels/impl/matmul_kernel_impl.h"
...
@@ -45,7 +45,7 @@ struct ReduceSumForMatmulGrad<CPUContext, T> {
...
@@ -45,7 +45,7 @@ struct ReduceSumForMatmulGrad<CPUContext, T> {
const
std
::
vector
<
int
>&
reduce_dims
)
{
const
std
::
vector
<
int
>&
reduce_dims
)
{
std
::
vector
<
int64_t
>
reduce_dims_tmp
(
reduce_dims
.
begin
(),
std
::
vector
<
int64_t
>
reduce_dims_tmp
(
reduce_dims
.
begin
(),
reduce_dims
.
end
());
reduce_dims
.
end
());
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
phi
::
funcs
::
SumFunctor
>
(
funcs
::
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
phi
::
funcs
::
SumFunctor
>
(
dev_ctx
,
input
,
output
,
reduce_dims_tmp
,
true
,
false
);
dev_ctx
,
input
,
output
,
reduce_dims_tmp
,
true
,
false
);
}
}
};
};
...
...
paddle/phi/kernels/impl/solve_grad_kernel_impl.h
浏览文件 @
6b47507d
...
@@ -16,11 +16,11 @@ limitations under the License. */
...
@@ -16,11 +16,11 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/expand_as_kernel.h"
#include "paddle/phi/kernels/expand_as_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/matrix_solve.h"
#include "paddle/phi/kernels/funcs/matrix_solve.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/solve_kernel_impl.h"
#include "paddle/phi/kernels/impl/solve_kernel_impl.h"
#include "paddle/phi/kernels/squeeze_kernel.h"
#include "paddle/phi/kernels/squeeze_kernel.h"
...
@@ -50,7 +50,7 @@ struct ReduceSumForSolvelGrad<CPUContext, T> {
...
@@ -50,7 +50,7 @@ struct ReduceSumForSolvelGrad<CPUContext, T> {
bool
keep_dims
)
{
bool
keep_dims
)
{
std
::
vector
<
int64_t
>
reduce_dims_tmp
(
reduce_dims
.
begin
(),
std
::
vector
<
int64_t
>
reduce_dims_tmp
(
reduce_dims
.
begin
(),
reduce_dims
.
end
());
reduce_dims
.
end
());
phi
::
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
phi
::
funcs
::
SumFunctor
>
(
funcs
::
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
phi
::
funcs
::
SumFunctor
>
(
dev_ctx
,
input
,
output
,
reduce_dims_tmp
,
keep_dims
,
false
);
dev_ctx
,
input
,
output
,
reduce_dims_tmp
,
keep_dims
,
false
);
}
}
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录