Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
9c5d5665
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9c5d5665
编写于
11月 17, 2021
作者:
N
niuliling123
提交者:
GitHub
11月 17, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Modify reduce_op.op.h for xpu2 with kernel primitive api (#36904)
* Modify reduce_op.op.h for xpu2 with kernel primitive api
上级
d08753df
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
407 addition
and
239 deletion
+407
-239
paddle/fluid/operators/kernel_primitives/datamover_primitives.h
.../fluid/operators/kernel_primitives/datamover_primitives.h
+6
-6
paddle/fluid/operators/kernel_primitives/helper_primitives.h
paddle/fluid/operators/kernel_primitives/helper_primitives.h
+32
-47
paddle/fluid/operators/kernel_primitives/kernel_primitives.h
paddle/fluid/operators/kernel_primitives/kernel_primitives.h
+36
-2
paddle/fluid/operators/margin_cross_entropy_op.cu
paddle/fluid/operators/margin_cross_entropy_op.cu
+4
-4
paddle/fluid/operators/reduce_ops/reduce_functor_op.h
paddle/fluid/operators/reduce_ops/reduce_functor_op.h
+8
-8
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
+321
-172
未找到文件。
paddle/fluid/operators/kernel_primitives/datamover_primitives.h
浏览文件 @
9c5d5665
...
...
@@ -360,12 +360,12 @@ __device__ __forceinline__ void ReadDataBc(
* reduce_last_dim: Used to indicate whether the dimension of reduce contains
* the lowest dimension.
*/
template
<
typename
T
,
int
NX
,
int
NY
,
int
BlockSize
,
int
Rank
,
typename
IndexCal
,
bool
IsBoundary
=
false
>
template
<
typename
T
x
,
typename
Ty
,
int
NX
,
int
NY
,
int
BlockSize
,
int
Rank
,
typename
IndexCal
,
typename
Functor
,
bool
IsBoundary
=
false
>
__device__
__forceinline__
void
ReadDataReduce
(
T
*
dst
,
const
T
*
__restrict__
src
,
int
block_offset
,
T
y
*
dst
,
const
Tx
*
__restrict__
src
,
int
block_offset
,
const
IndexCal
&
index_cal
,
int
size_nx
,
int
size_ny
,
int
stride_nx
,
int
stride_ny
,
bool
reduce_last_dim
)
{
int
stride_ny
,
Functor
func
,
bool
reduce_last_dim
)
{
int
thread_offset
=
0
;
int
left_idx
=
0
;
if
(
reduce_last_dim
)
{
...
...
@@ -385,7 +385,7 @@ __device__ __forceinline__ void ReadDataReduce(
}
}
uint32_t
index_src
=
index_cal
(
thread_offset
+
block_offset
);
dst
[
ny
]
=
s
rc
[
index_src
]
;
dst
[
ny
]
=
s
tatic_cast
<
Ty
>
(
func
(
src
[
index_src
]))
;
thread_offset
+=
stride_ny
;
}
}
else
{
...
...
@@ -400,7 +400,7 @@ __device__ __forceinline__ void ReadDataReduce(
}
}
uint32_t
index_src
=
index_cal
(
thread_offset
+
block_offset
);
dst
[
nx
+
ny
*
NX
]
=
s
rc
[
index_src
]
;
dst
[
nx
+
ny
*
NX
]
=
s
tatic_cast
<
Ty
>
(
func
(
src
[
index_src
]))
;
thread_offset
+=
stride_ny
;
}
}
...
...
paddle/fluid/operators/kernel_primitives/helper_primitives.h
浏览文件 @
9c5d5665
...
...
@@ -17,64 +17,49 @@
namespace
paddle
{
namespace
operators
{
namespace
kernel_primitives
{
namespace
details
{
static
__device__
__forceinline__
platform
::
float16
ExpFunctor
(
platform
::
float16
x
)
{
return
::
Eigen
::
numext
::
exp
(
x
);
}
static
__device__
__forceinline__
float
ExpFunctor
(
float
x
)
{
return
expf
(
x
);
}
static
__device__
__forceinline__
double
ExpFunctor
(
double
x
)
{
return
exp
(
x
);
}
static
__device__
__forceinline__
platform
::
float16
LogFunctor
(
platform
::
float16
x
)
{
return
::
Eigen
::
numext
::
log
(
x
);
}
static
__device__
__forceinline__
float
LogFunctor
(
float
x
)
{
return
logf
(
x
);
}
static
__device__
__forceinline__
double
LogFunctor
(
double
x
)
{
return
log
(
x
);
}
#ifdef PADDLE_WITH_XPU2
struct
dim3
{
int
x
;
int
y
;
int
z
;
/*************************** Compute Functor****************************/
// for margin_cross_entropy
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
ExpLogitTransformer
{
HOSTDEVICE
explicit
inline
ExpLogitTransformer
(
int
n
)
{}
HOSTDEVICE
inline
Ty
operator
()(
const
Tx
*
x
)
const
{
return
static_cast
<
Ty
>
(
details
::
ExpFunctor
(
x
[
0
]));
}
HOSTDEVICE
inline
Ty
operator
()(
const
Tx
&
x
)
const
{
return
static_cast
<
Ty
>
(
details
::
ExpFunctor
(
x
));
explicit
inline
dim3
(
int
split_x
,
int
split_y
=
1
,
int
split_z
=
1
)
{
x
=
split_x
;
y
=
split_y
;
z
=
split_z
;
}
};
#endif
// Post processing function for sum, max, min, prod, any
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
IdentityFunctor
{
HOSTDEVICE
explicit
inline
IdentityFunctor
(
int
n
)
{}
struct
DimConfig
{
int
split_num_x
;
int
split_num_y
;
int
split_num_z
;
int
deal_size_x
;
int
deal_size_y
;
int
deal_size_z
;
int
rem_x
;
int
rem_y
;
int
rem_z
;
HOSTDEVICE
inline
Ty
operator
()(
const
Tx
*
x
)
const
{
return
static_cast
<
Ty
>
(
x
[
0
]);
HOSTDEVICE
explicit
inline
DimConfig
(
int
split_x
,
int
split_y
,
int
split_z
,
int
size_x
,
int
size_y
,
int
size_z
)
{
split_num_x
=
split_x
;
split_num_y
=
split_y
;
split_num_z
=
split_z
;
deal_size_x
=
size_x
;
deal_size_y
=
size_y
;
deal_size_z
=
size_z
;
}
HOSTDEVICE
inline
Ty
operator
()(
const
Tx
&
x
)
const
{
return
static_cast
<
Ty
>
(
x
);
HOSTDEVICE
void
SetRem
(
int
rem_nx
,
int
rem_ny
,
int
rem_nz
)
{
rem_x
=
rem_nx
;
rem_y
=
rem_ny
;
rem_z
=
rem_nz
;
}
};
// Post processing function for mean
template
<
typename
T
>
struct
DivideFunctor
{
HOSTDEVICE
explicit
inline
DivideFunctor
(
int
n
)
:
n_inv
((
T
)(
1.0
/
n
))
{}
HOSTDEVICE
inline
T
operator
()(
const
T
*
x
)
const
{
return
x
[
0
]
*
n_inv
;
}
HOSTDEVICE
inline
T
operator
()(
const
T
&
x
)
const
{
return
x
*
n_inv
;
}
private:
T
n_inv
;
};
}
// namespace details
}
// namespace kernel_primitives
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/kernel_primitives/kernel_primitives.h
浏览文件 @
9c5d5665
...
...
@@ -13,11 +13,45 @@
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/kernel_primitives/functor_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/helper_primitives.h"
#ifdef PADDLE_WITH_XPU2
#include "paddle/fluid/operators/kernel_primitives/compute_primitives_xpu2.h"
#include "paddle/fluid/operators/kernel_primitives/datamover_primitives_xpu2.h"
#define THREAD_ID_X core_id()
#define THREAD_ID_Y 0
#define THREAD_ID_Z 0
#define BLOCK_NUM_X core_num()
#define BLOCK_NUM_Y 0
#define BLOCK_NUM_Z 0
#define BLOCK_ID_X cluster_id()
#define BLOCK_ID_Y 0
#define BLOCK_ID_Z 0
#define GRID_NUM_X cluster_num()
#define GRID_NUM_Y 0
#define GRID_NUM_Z 0
#else
#include "paddle/fluid/operators/kernel_primitives/compute_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/datamover_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/functor_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/helper_primitives.h"
#define THREAD_ID_X threadIdx.x
#define THREAD_ID_Y threadIdx.y
#define THREAD_ID_Z threadIdx.z
#define BLOCK_NUM_X blockDim.x
#define BLOCK_NUM_Y blockDim.y
#define BLOCK_NUM_Z blockDim.z
#define BLOCK_ID_X blockIdx.x
#define BLOCK_ID_Y blockIdx.y
#define BLOCK_ID_Z blockIdx.z
#define GRID_NUM_X gridDim.x
#define GRID_NUM_Y gridDim.y
#define GRID_NUM_Z gridDim.z
#endif
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/margin_cross_entropy_op.cu
浏览文件 @
9c5d5665
...
...
@@ -130,7 +130,7 @@ __global__ void AddMarginToPositiveLogitsKernel(
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
ExpAndSum
{
using
Transformer
=
kp
ds
::
ExpLogitTransforme
r
<
Tx
>
;
using
Transformer
=
kp
s
::
ExpFuncto
r
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
0.0
f
);
}
...
...
@@ -159,7 +159,7 @@ __global__ void LogitsMinusLogSumKernel(T* logits, const T* logits_sum_per_row,
const
int64_t
N
,
const
int64_t
D
)
{
CUDA_KERNEL_LOOP
(
i
,
N
*
D
)
{
auto
row
=
i
/
D
;
logits
[
i
]
-=
kp
ds
::
LogFunctor
(
logits_sum_per_row
[
row
]);
logits
[
i
]
-=
kp
s
::
details
::
Log
(
logits_sum_per_row
[
row
]);
}
}
...
...
@@ -174,9 +174,9 @@ __global__ void HardLabelSoftmaxWithCrossEntropyKernel(
if
((
col
+
start_index
)
==
labels
[
row
])
{
auto
softmax
=
log_softmax
[
i
];
loss
[
row
]
=
-
softmax
;
log_softmax
[
i
]
=
kp
ds
::
ExpFunctor
(
softmax
);
log_softmax
[
i
]
=
kp
s
::
details
::
Exp
(
softmax
);
}
else
{
log_softmax
[
i
]
=
kp
ds
::
ExpFunctor
(
log_softmax
[
i
]);
log_softmax
[
i
]
=
kp
s
::
details
::
Exp
(
log_softmax
[
i
]);
}
}
}
...
...
paddle/fluid/operators/reduce_ops/reduce_functor_op.h
浏览文件 @
9c5d5665
...
...
@@ -24,11 +24,11 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
namespace
kp
ds
=
paddle
::
operators
::
kernel_primitives
::
detail
s
;
namespace
kp
s
=
paddle
::
operators
::
kernel_primitive
s
;
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomMin
{
using
Transformer
=
kp
d
s
::
IdentityFunctor
<
Tx
>
;
using
Transformer
=
kps
::
IdentityFunctor
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
std
::
numeric_limits
<
Ty
>::
max
());
...
...
@@ -41,7 +41,7 @@ struct CustomMin {
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomMax
{
using
Transformer
=
kp
d
s
::
IdentityFunctor
<
Tx
>
;
using
Transformer
=
kps
::
IdentityFunctor
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
std
::
numeric_limits
<
Ty
>::
lowest
());
...
...
@@ -55,7 +55,7 @@ struct CustomMax {
// for cub::Reduce
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomSum
{
using
Transformer
=
kp
d
s
::
IdentityFunctor
<
Tx
,
Ty
>
;
using
Transformer
=
kps
::
IdentityFunctor
<
Tx
,
Ty
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
0.0
f
);
}
...
...
@@ -66,7 +66,7 @@ struct CustomSum {
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomMean
{
using
Transformer
=
kp
d
s
::
DivideFunctor
<
Tx
>
;
using
Transformer
=
kps
::
DivideFunctor
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
0.0
f
);
}
...
...
@@ -77,7 +77,7 @@ struct CustomMean {
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomMul
{
using
Transformer
=
kp
d
s
::
IdentityFunctor
<
Tx
>
;
using
Transformer
=
kps
::
IdentityFunctor
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
1.0
f
);
}
...
...
@@ -88,7 +88,7 @@ struct CustomMul {
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomLogicalOr
{
using
Transformer
=
kp
d
s
::
IdentityFunctor
<
Tx
>
;
using
Transformer
=
kps
::
IdentityFunctor
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
false
);
}
...
...
@@ -99,7 +99,7 @@ struct CustomLogicalOr {
template
<
typename
Tx
,
typename
Ty
=
Tx
>
struct
CustomLogicalAnd
{
using
Transformer
=
kp
d
s
::
IdentityFunctor
<
Tx
>
;
using
Transformer
=
kps
::
IdentityFunctor
<
Tx
>
;
inline
Ty
initial
()
{
return
static_cast
<
Ty
>
(
true
);
}
...
...
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
浏览文件 @
9c5d5665
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录