Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
25ffe9c2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
25ffe9c2
编写于
11月 23, 2022
作者:
Z
zhangyikun02
提交者:
GitHub
11月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add warpctc kernel and change cast_v2 to cast for xpu, test=kunlun (#48134)
上级
b07e6b45
变更
16
显示空白变更内容
内联
并排
Showing
16 changed file
with
804 addition
and
81 deletion
+804
-81
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+1
-1
paddle/fluid/framework/data_type_transform.cc
paddle/fluid/framework/data_type_transform.cc
+2
-2
paddle/fluid/imperative/tests/test_gradient_accmulator.cc
paddle/fluid/imperative/tests/test_gradient_accmulator.cc
+10
-4
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
+4
-4
paddle/fluid/operators/top_k_op_xpu.cc
paddle/fluid/operators/top_k_op_xpu.cc
+5
-5
paddle/fluid/platform/device/xpu/xpu2_op_list.h
paddle/fluid/platform/device/xpu/xpu2_op_list.h
+3
-0
paddle/phi/kernels/xpu/amp_kernel.cc
paddle/phi/kernels/xpu/amp_kernel.cc
+10
-10
paddle/phi/kernels/xpu/cast_kernel.cc
paddle/phi/kernels/xpu/cast_kernel.cc
+8
-8
paddle/phi/kernels/xpu/cross_entropy_grad_kernel.cc
paddle/phi/kernels/xpu/cross_entropy_grad_kernel.cc
+10
-10
paddle/phi/kernels/xpu/cross_entropy_kernel.cc
paddle/phi/kernels/xpu/cross_entropy_kernel.cc
+5
-5
paddle/phi/kernels/xpu/gather_grad_kernel.cc
paddle/phi/kernels/xpu/gather_grad_kernel.cc
+6
-16
paddle/phi/kernels/xpu/sgd_kernel.cc
paddle/phi/kernels/xpu/sgd_kernel.cc
+5
-6
paddle/phi/kernels/xpu/top_k_kernel.cc
paddle/phi/kernels/xpu/top_k_kernel.cc
+10
-10
paddle/phi/kernels/xpu/warpctc_grad_kernel.cc
paddle/phi/kernels/xpu/warpctc_grad_kernel.cc
+60
-0
paddle/phi/kernels/xpu/warpctc_kernel.cc
paddle/phi/kernels/xpu/warpctc_kernel.cc
+102
-0
python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
...n/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
+563
-0
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
25ffe9c2
...
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
...
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if
(
NOT DEFINED XPU_BASE_URL
)
if
(
NOT DEFINED XPU_BASE_URL
)
set
(
XPU_BASE_URL_WITHOUT_DATE
set
(
XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev"
)
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev"
)
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL_WITHOUT_DATE
}
/202211
16
"
)
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL_WITHOUT_DATE
}
/202211
20
"
)
else
()
else
()
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL
}
"
)
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL
}
"
)
endif
()
endif
()
...
...
paddle/fluid/framework/data_type_transform.cc
浏览文件 @
25ffe9c2
...
@@ -40,12 +40,12 @@ static void XPUCastData(const phi::DenseTensor& in,
...
@@ -40,12 +40,12 @@ static void XPUCastData(const phi::DenseTensor& in,
const
platform
::
XPUDeviceContext
*
dev_ctx
)
{
const
platform
::
XPUDeviceContext
*
dev_ctx
)
{
using
XPUInTDType
=
typename
XPUTypeTrait
<
InType
>::
Type
;
using
XPUInTDType
=
typename
XPUTypeTrait
<
InType
>::
Type
;
using
XPUOutTDType
=
typename
XPUTypeTrait
<
OutType
>::
Type
;
using
XPUOutTDType
=
typename
XPUTypeTrait
<
OutType
>::
Type
;
int
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
XPUOutTDType
>
(
int
r
=
xpu
::
cast
<
XPUInTDType
,
XPUOutTDType
>
(
dev_ctx
->
x_context
(),
dev_ctx
->
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in
.
data
<
InType
>
()),
reinterpret_cast
<
const
XPUInTDType
*>
(
in
.
data
<
InType
>
()),
reinterpret_cast
<
XPUOutTDType
*>
(
out
->
mutable_data
<
OutType
>
(
in
.
place
())),
reinterpret_cast
<
XPUOutTDType
*>
(
out
->
mutable_data
<
OutType
>
(
in
.
place
())),
in
.
numel
());
in
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
}
}
...
...
paddle/fluid/imperative/tests/test_gradient_accmulator.cc
浏览文件 @
25ffe9c2
...
@@ -161,13 +161,10 @@ TEST(test_add_functor, add_functor) {
...
@@ -161,13 +161,10 @@ TEST(test_add_functor, add_functor) {
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
2.0
));
static_cast
<
platform
::
float16
>
(
2.0
));
EXPECT_EQ
(
cpu_res
,
0
);
EXPECT_EQ
(
cpu_res
,
0
);
// double
#ifndef PADDLE_WITH_XPU
// does not support double when compiled using xpu
cpu_res
=
TensorddTest
(
cpu_res
=
TensorddTest
(
cpu_place
,
cpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
cpu_place
,
cpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
cpu_res
,
0
);
EXPECT_EQ
(
cpu_res
,
0
);
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
int
gpu_res
=
1
;
int
gpu_res
=
1
;
...
@@ -217,6 +214,9 @@ TEST(test_add_functor, add_functor) {
...
@@ -217,6 +214,9 @@ TEST(test_add_functor, add_functor) {
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
2.0
));
static_cast
<
platform
::
float16
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
EXPECT_EQ
(
xpu_res
,
0
);
xpu_res
=
TensorddTest
(
xpu_place
,
xpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
// different places
// different places
xpu_res
=
TensorddTest
(
xpu_res
=
TensorddTest
(
cpu_place
,
xpu_place
,
static_cast
<
float
>
(
1.0
),
static_cast
<
float
>
(
2.0
));
cpu_place
,
xpu_place
,
static_cast
<
float
>
(
1.0
),
static_cast
<
float
>
(
2.0
));
...
@@ -234,6 +234,12 @@ TEST(test_add_functor, add_functor) {
...
@@ -234,6 +234,12 @@ TEST(test_add_functor, add_functor) {
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
2.0
));
static_cast
<
platform
::
float16
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
EXPECT_EQ
(
xpu_res
,
0
);
xpu_res
=
TensorddTest
(
cpu_place
,
xpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
xpu_res
=
TensorddTest
(
xpu_place
,
cpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
#endif
#endif
}
}
...
...
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
浏览文件 @
25ffe9c2
...
@@ -50,13 +50,13 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
...
@@ -50,13 +50,13 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
int
*
label_int32_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
size
);
int
*
label_int32_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
size
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
label_int32_ptr
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
label_int32_ptr
);
int
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
int
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
indices_data
,
indices_int32_ptr
,
size
);
dev_ctx
.
x_context
(),
indices_data
,
indices_int32_ptr
,
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
label_data
,
label_int32_ptr
,
size
);
dev_ctx
.
x_context
(),
label_data
,
label_int32_ptr
,
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
accuracy
(
dev_ctx
.
x_context
(),
r
=
xpu
::
accuracy
(
dev_ctx
.
x_context
(),
indices_int32_ptr
,
indices_int32_ptr
,
...
...
paddle/fluid/operators/top_k_op_xpu.cc
浏览文件 @
25ffe9c2
...
@@ -79,11 +79,11 @@ class TopkXPUKernel : public framework::OpKernel<T> {
...
@@ -79,11 +79,11 @@ class TopkXPUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
// cast to int64 as final result
// cast to int64 as final result
r
=
xpu
::
cast
_v2
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
indices_int_data
,
(
const
int32_t
*
)
indices_int_data
,
indices_data
,
indices_data
,
indices
->
numel
());
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
}
};
};
...
...
paddle/fluid/platform/device/xpu/xpu2_op_list.h
浏览文件 @
25ffe9c2
...
@@ -681,6 +681,9 @@ XPUOpMap& get_kl2_ops() {
...
@@ -681,6 +681,9 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType
(
vartype
::
INT8
,
XPUPlace
()),
pOpKernelType
(
vartype
::
INT8
,
XPUPlace
()),
pOpKernelType
(
vartype
::
UINT8
,
XPUPlace
()),
pOpKernelType
(
vartype
::
UINT8
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"warpctc_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"warpctc"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"where_index"
,
{
"where_index"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
BOOL
,
XPUPlace
()),
pOpKernelType
(
vartype
::
BOOL
,
XPUPlace
()),
...
...
paddle/phi/kernels/xpu/amp_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -233,11 +233,11 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
...
@@ -233,11 +233,11 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
MPDType
>(
&
float_out
,
dev_ctx
.
template
Alloc
<
MPDType
>(
&
float_out
,
out
->
numel
()
*
sizeof
(
MPDType
));
out
->
numel
()
*
sizeof
(
MPDType
));
int
r
=
xpu
::
cast
_v2
(
dev_ctx
.
x_context
(),
int
r
=
xpu
::
cast
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
float16
*>
(
x
->
data
<
T
>
()),
reinterpret_cast
<
const
float16
*>
(
x
->
data
<
T
>
()),
float_x
.
data
<
MPDType
>
(),
float_x
.
data
<
MPDType
>
(),
x
->
numel
());
x
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
float_x
.
data
<
MPDType
>
(),
float_x
.
data
<
MPDType
>
(),
...
@@ -248,11 +248,11 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
...
@@ -248,11 +248,11 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
0.0
);
0.0
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale"
);
r
=
xpu
::
cast
_v2
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
(
dev_ctx
.
x_context
(),
float_out
.
data
<
MPDType
>
(),
float_out
.
data
<
MPDType
>
(),
reinterpret_cast
<
float16
*>
(
out
->
data
<
T
>
()),
reinterpret_cast
<
float16
*>
(
out
->
data
<
T
>
()),
out
->
numel
());
out
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
else
{
}
else
{
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
x
->
data
<
T
>
()),
reinterpret_cast
<
const
XPUType
*>
(
x
->
data
<
T
>
()),
...
...
paddle/phi/kernels/xpu/cast_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -39,14 +39,14 @@ void CastKernel(const Context& dev_ctx,
...
@@ -39,14 +39,14 @@ void CastKernel(const Context& dev_ctx,
int
r
=
-
1
;
int
r
=
-
1
;
switch
(
out_dtype
)
{
switch
(
out_dtype
)
{
case
phi
::
DataType
::
FLOAT32
:
case
phi
::
DataType
::
FLOAT32
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
float
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
float
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
float
>(
out
),
dev_ctx
.
template
Alloc
<
float
>(
out
),
numel
);
numel
);
break
;
break
;
case
phi
::
DataType
::
FLOAT16
:
case
phi
::
DataType
::
FLOAT16
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
float16
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
float16
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
float16
*>
(
reinterpret_cast
<
float16
*>
(
...
@@ -54,35 +54,35 @@ void CastKernel(const Context& dev_ctx,
...
@@ -54,35 +54,35 @@ void CastKernel(const Context& dev_ctx,
numel
);
numel
);
break
;
break
;
case
phi
::
DataType
::
INT64
:
case
phi
::
DataType
::
INT64
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
int64_t
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
int64_t
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
int64_t
>(
out
),
dev_ctx
.
template
Alloc
<
int64_t
>(
out
),
numel
);
numel
);
break
;
break
;
case
phi
::
DataType
::
INT32
:
case
phi
::
DataType
::
INT32
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
int32_t
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
int32_t
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
int
>(
out
),
dev_ctx
.
template
Alloc
<
int
>(
out
),
numel
);
numel
);
break
;
break
;
case
phi
::
DataType
::
BOOL
:
case
phi
::
DataType
::
BOOL
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
bool
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
bool
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
bool
>(
out
),
dev_ctx
.
template
Alloc
<
bool
>(
out
),
numel
);
numel
);
break
;
break
;
case
phi
::
DataType
::
UINT8
:
case
phi
::
DataType
::
UINT8
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
uint8_t
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
uint8_t
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
uint8_t
>(
out
),
dev_ctx
.
template
Alloc
<
uint8_t
>(
out
),
numel
);
numel
);
break
;
break
;
case
phi
::
DataType
::
FLOAT64
:
case
phi
::
DataType
::
FLOAT64
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
double
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
double
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
double
>(
out
),
dev_ctx
.
template
Alloc
<
double
>(
out
),
...
@@ -93,7 +93,7 @@ void CastKernel(const Context& dev_ctx,
...
@@ -93,7 +93,7 @@ void CastKernel(const Context& dev_ctx,
"Not supported cast %d -> %d"
,
x
.
dtype
(),
out_dtype
));
"Not supported cast %d -> %d"
,
x
.
dtype
(),
out_dtype
));
}
}
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
}
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/xpu/cross_entropy_grad_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -59,11 +59,11 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx,
...
@@ -59,11 +59,11 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx,
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels_int_ptr_l3
,
labels
.
numel
());
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
hard_softmax_with_cross_entropy_grad
<
XPUType
,
int
>
(
r
=
xpu
::
hard_softmax_with_cross_entropy_grad
<
XPUType
,
int
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
...
@@ -117,11 +117,11 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx,
...
@@ -117,11 +117,11 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx,
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels_int_ptr_l3
,
labels
.
numel
());
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
lip_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
ast
"
);
r
=
xpu
::
hard_softmax_with_cross_entropy_grad
<
XPUType
,
int
>
(
r
=
xpu
::
hard_softmax_with_cross_entropy_grad
<
XPUType
,
int
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
loss_grad
.
data
<
T
>
()),
reinterpret_cast
<
const
XPUType
*>
(
loss_grad
.
data
<
T
>
()),
...
...
paddle/phi/kernels/xpu/cross_entropy_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -132,11 +132,11 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx,
...
@@ -132,11 +132,11 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx,
int
*
labels_int_ptr_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
int
*
labels_int_ptr_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels_int_ptr_l3
,
labels
.
numel
());
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
lip_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
ast
"
);
r
=
xpu
::
hard_cross_entropy
<
XPUType
,
int32_t
>
(
r
=
xpu
::
hard_cross_entropy
<
XPUType
,
int32_t
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
...
...
paddle/phi/kernels/xpu/gather_grad_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -72,16 +72,11 @@ void GatherGradKernel(const Context& dev_ctx,
...
@@ -72,16 +72,11 @@ void GatherGradKernel(const Context& dev_ctx,
}
else
{
}
else
{
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
int
*
index_int_ptr_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
index
.
numel
());
int
*
index_int_ptr_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
index
.
numel
());
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
index
.
data
<
int64_t
>
(),
index
.
data
<
int64_t
>
(),
index_int_ptr_l3
,
index_int_ptr_l3
,
index
.
numel
());
index
.
numel
());
PADDLE_ENFORCE_EQ
(
r
,
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
XPU_SUCCESS
,
phi
::
errors
::
External
(
"XPU API(cast_v2) return wrong "
"value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
r
=
xpu
::
gather_grad
<
XPUType
,
int
>
(
r
=
xpu
::
gather_grad
<
XPUType
,
int
>
(
dev_ctx
.
x_context
(),
dev_ctx
.
x_context
(),
...
@@ -93,12 +88,7 @@ void GatherGradKernel(const Context& dev_ctx,
...
@@ -93,12 +88,7 @@ void GatherGradKernel(const Context& dev_ctx,
axis_v
,
axis_v
,
overwrite
);
overwrite
);
}
}
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"gather_grad"
);
r
,
xpu
::
Error_t
::
SUCCESS
,
phi
::
errors
::
External
(
"XPU gather grad kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
}
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/xpu/sgd_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -54,12 +54,11 @@ void SGDDenseKernel(const Context &dev_ctx,
...
@@ -54,12 +54,11 @@ void SGDDenseKernel(const Context &dev_ctx,
const
float
*
lr
=
nullptr
;
const
float
*
lr
=
nullptr
;
if
(
std
::
is_same
<
T
,
dtype
::
float16
>::
value
)
{
if
(
std
::
is_same
<
T
,
dtype
::
float16
>::
value
)
{
float
*
lr_float
=
RAII_GUARD
.
alloc_l3_or_gm
<
float
>
(
learning_rate
.
numel
());
float
*
lr_float
=
RAII_GUARD
.
alloc_l3_or_gm
<
float
>
(
learning_rate
.
numel
());
int
r
=
int
r
=
xpu
::
cast
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
xpu
::
cast_v2
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
lr_t
),
reinterpret_cast
<
const
XPUType
*>
(
lr_t
),
lr_float
,
lr_float
,
learning_rate
.
numel
());
learning_rate
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
lip_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
ast
"
);
lr
=
lr_float
;
lr
=
lr_float
;
}
else
{
}
else
{
lr
=
reinterpret_cast
<
const
float
*>
(
lr_t
);
lr
=
reinterpret_cast
<
const
float
*>
(
lr_t
);
...
...
paddle/phi/kernels/xpu/top_k_kernel.cc
浏览文件 @
25ffe9c2
...
@@ -68,11 +68,11 @@ void TopkKernel(const Context& dev_ctx,
...
@@ -68,11 +68,11 @@ void TopkKernel(const Context& dev_ctx,
k
);
k
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
r
=
xpu
::
cast
_v2
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
indices_int_data
,
(
const
int32_t
*
)
indices_int_data
,
indices_data
,
indices_data
,
indices
->
numel
());
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
else
{
}
else
{
// do transpose if axis is not the last dim of input
// do transpose if axis is not the last dim of input
std
::
vector
<
int
>
trans_axes
;
std
::
vector
<
int
>
trans_axes
;
...
@@ -127,11 +127,11 @@ void TopkKernel(const Context& dev_ctx,
...
@@ -127,11 +127,11 @@ void TopkKernel(const Context& dev_ctx,
k
);
k
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
r
=
xpu
::
cast
_v2
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
trans_idx_int32_data
,
(
const
int32_t
*
)
trans_idx_int32_data
,
trans_idx_data
,
trans_idx_data
,
indices
->
numel
());
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
// Transpose back to original dims
// Transpose back to original dims
std
::
vector
<
int
>
trans_back_axes
;
std
::
vector
<
int
>
trans_back_axes
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
...
...
paddle/phi/kernels/xpu/warpctc_grad_kernel.cc
0 → 100644
浏览文件 @
25ffe9c2
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/warpctc_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
WarpctcGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
logits
,
const
paddle
::
optional
<
DenseTensor
>&
logits_length
,
const
DenseTensor
&
warpctcgrad
,
const
DenseTensor
&
loss_grad
,
int
blank
,
bool
norm_by_times
,
DenseTensor
*
logits_grad
)
{
dev_ctx
.
template
Alloc
<
T
>(
logits_grad
);
bool
has_logits_length
=
logits_length
.
is_initialized
();
if
(
!
has_logits_length
)
{
PADDLE_THROW
(
phi
::
errors
::
External
(
"XPU only support logits_length is_initialized"
));
}
int
max_seq_length
=
warpctcgrad
.
dims
()[
0
];
// Tmax
int
num_sequences
=
warpctcgrad
.
dims
()[
1
];
// B
int
seq_width
=
warpctcgrad
.
dims
()[
2
];
// D
auto
*
logits_length_ptr
=
logits_length
.
get_ptr
();
int
r
=
xpu
::
ctc_loss_grad
<
T
,
int64_t
>
(
dev_ctx
.
x_context
(),
loss_grad
.
data
<
T
>
(),
logits_grad
->
data
<
T
>
(),
warpctcgrad
.
data
<
T
>
(),
max_seq_length
,
num_sequences
,
seq_width
,
logits_length_ptr
->
data
<
int64_t
>
(),
norm_by_times
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"ctc_loss_grad"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
warpctc_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
WarpctcGradKernel
,
float
)
{}
paddle/phi/kernels/xpu/warpctc_kernel.cc
0 → 100644
浏览文件 @
25ffe9c2
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/warpctc_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
WarpctcKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
logits
,
const
DenseTensor
&
label
,
const
paddle
::
optional
<
DenseTensor
>&
logits_length
,
const
paddle
::
optional
<
DenseTensor
>&
labels_length
,
int
blank
,
bool
norm_by_times
,
DenseTensor
*
loss
,
DenseTensor
*
warpctcgrad
)
{
bool
has_logits_length
=
logits_length
.
is_initialized
();
if
(
!
has_logits_length
)
{
PADDLE_THROW
(
phi
::
errors
::
External
(
"XPU only support logits_length is_initialized"
));
}
bool
has_labels_length
=
labels_length
.
is_initialized
();
if
(
!
has_labels_length
)
{
PADDLE_THROW
(
phi
::
errors
::
External
(
"XPU only support labels_length is_initialized"
));
}
int
max_sequence_length
=
logits
.
dims
()[
0
];
int
num_sequences
=
logits
.
dims
()[
1
];
int
sequence_width
=
logits
.
dims
()[
2
];
int
max_target_seq_length
=
label
.
dims
()[
1
];
PADDLE_ENFORCE_GT
(
max_sequence_length
,
0
,
phi
::
errors
::
InvalidArgument
(
"The first dimension of Input(Logits) should be "
"greater than zero "
"but received %d. "
,
max_sequence_length
));
PADDLE_ENFORCE_GT
(
num_sequences
,
0
,
phi
::
errors
::
InvalidArgument
(
"The second dimension of Input(Logits) should be "
"greater than zero "
"but received %d. "
,
num_sequences
));
PADDLE_ENFORCE_GT
(
sequence_width
,
0
,
phi
::
errors
::
InvalidArgument
(
"The third dimension of Input(Logits) should be "
"greater than zero "
"but received %d. "
,
sequence_width
));
loss
->
Resize
(
phi
::
make_ddim
({
num_sequences
,
1
}));
dev_ctx
.
template
Alloc
<
T
>(
loss
);
warpctcgrad
->
Resize
(
phi
::
make_ddim
({
max_sequence_length
,
num_sequences
,
sequence_width
}));
dev_ctx
.
template
Alloc
<
T
>(
warpctcgrad
);
const
T
*
logits_data
=
logits
.
data
<
T
>
();
const
int
*
label_data
=
label
.
data
<
int
>
();
auto
logits_length_data
=
logits_length
.
get_ptr
()
->
data
<
int64_t
>
();
auto
labels_length_data
=
labels_length
.
get_ptr
()
->
data
<
int64_t
>
();
T
*
loss_data
=
loss
->
data
<
T
>
();
T
*
warpctcgrad_data
=
warpctcgrad
->
data
<
T
>
();
int
r
=
xpu
::
ctc_loss
<
T
,
int64_t
>
(
dev_ctx
.
x_context
(),
logits_data
,
label_data
,
loss_data
,
warpctcgrad_data
,
logits_length_data
,
labels_length_data
,
max_sequence_length
,
num_sequences
,
sequence_width
,
max_target_seq_length
,
blank
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"ctc_loss"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
warpctc
,
XPU
,
ALL_LAYOUT
,
phi
::
WarpctcKernel
,
float
)
{}
python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
0 → 100644
浏览文件 @
25ffe9c2
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
sys
.
path
.
append
(
".."
)
import
unittest
import
numpy
as
np
from
test_softmax_op
import
stable_softmax
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
import
paddle
import
paddle.nn.functional
as
F
from
op_test_xpu
import
XPUOpTest
from
xpu.get_test_cover_info
import
(
create_test_class
,
get_xpu_op_support_types
,
XPUOpTestWrapper
,
)
paddle
.
enable_static
()
CUDA_BLOCK_SIZE
=
32
class
CTCForward
(
object
):
def
__init__
(
self
,
softmax
,
softmax_lod
,
labels
,
labels_lod
,
num_classes
,
batch_size
,
blank
,
norm_by_times
,
):
self
.
softmax
=
softmax
self
.
softmax_lod
=
softmax_lod
self
.
labels
=
labels
self
.
labels_lod
=
labels_lod
self
.
blank
=
blank
self
.
norm_by_times
=
norm_by_times
self
.
level
=
0
self
.
num_classes
=
num_classes
self
.
batch_size
=
batch_size
self
.
loss
=
np
.
zeros
([
self
.
batch_size
,
1
],
dtype
=
softmax
.
dtype
)
self
.
gradient
=
np
.
zeros
(
self
.
softmax
.
shape
,
dtype
=
softmax
.
dtype
)
# float64
self
.
EXP_MAX
=
sys
.
float_info
.
max
self
.
EXP_MIN
=
sys
.
float_info
.
min
self
.
LOG_ZERO
=
np
.
log
(
self
.
EXP_MIN
)
self
.
LOG_INFINITY
=
np
.
log
(
self
.
EXP_MAX
)
def
safe_exp
(
self
,
x
):
if
x
<=
self
.
LOG_ZERO
:
return
0.0
if
x
>=
self
.
LOG_INFINITY
:
return
self
.
EXP_MAX
return
np
.
exp
(
x
)
def
safe_log
(
self
,
x
):
if
x
<=
self
.
EXP_MIN
:
return
self
.
LOG_ZERO
return
np
.
log
(
x
)
# x = lna and y = lnb are in log scale, ln(a / b) = lna - lnb
def
log_div
(
self
,
x
,
y
):
res
=
x
-
y
if
res
<=
self
.
LOG_ZERO
:
return
self
.
LOG_ZERO
if
res
>=
self
.
LOG_INFINITY
:
return
self
.
LOG_INFINITY
return
res
# x = lna and y = lnb are in log scale, ln(a * b) = lna + lnb
def
log_mul
(
self
,
x
,
y
):
res
=
x
+
y
if
res
<=
self
.
LOG_ZERO
:
return
self
.
LOG_ZERO
if
res
>=
self
.
LOG_INFINITY
:
return
self
.
LOG_INFINITY
return
res
# x = lna and y = lnb are in log scale,
# ln(a + b) = lna + ln(1 + exp(lnb - lna)), where b > a
def
log_add
(
self
,
x
,
y
):
if
x
<
y
:
t
=
y
y
=
x
x
=
t
return
x
+
self
.
safe_log
(
1
+
self
.
safe_exp
(
y
-
x
))
def
segment_range
(
self
,
time
,
total_times
,
total_segments
):
start
=
max
(
0
,
total_segments
-
(
2
*
(
total_times
-
time
)))
end
=
min
(
total_segments
,
2
*
(
time
+
1
))
return
start
,
end
def
forward_a_sequence
(
self
,
softmax_a_sequence
,
labels_a_sequence
):
total_times
=
softmax_a_sequence
.
shape
[
0
]
total_segments
=
labels_a_sequence
.
shape
[
0
]
*
2
+
1
required_times
=
labels_a_sequence
.
shape
[
0
]
old_label
=
-
1
for
i
in
range
(
labels_a_sequence
.
shape
[
0
]):
# two contingous labels with the same value
if
labels_a_sequence
[
i
,
0
]
==
old_label
:
required_times
=
required_times
+
1
old_label
=
labels_a_sequence
[
i
,
0
]
if
total_times
<
required_times
:
return
0
# calculate the forward and backward variables,
# reference Chapter 7.3 of "Alex Grave, Supervised Sequence
# Labelling with Recurrent Neural Networks"
log_acts
=
np
.
zeros
(
[
total_times
,
self
.
num_classes
],
dtype
=
softmax_a_sequence
.
dtype
)
for
i
in
range
(
total_times
):
for
j
in
range
(
self
.
num_classes
):
log_acts
[
i
,
j
]
=
self
.
safe_log
(
softmax_a_sequence
[
i
,
j
])
# calculate the forward variables
forward_vars
=
np
.
zeros
(
[
total_times
,
total_segments
],
dtype
=
softmax_a_sequence
.
dtype
)
for
i
in
range
(
total_times
):
for
j
in
range
(
total_segments
):
forward_vars
[
i
,
j
]
=
self
.
LOG_ZERO
for
i
in
range
(
total_times
):
# dp initialization at t0
if
i
==
0
:
forward_vars
[
i
,
0
]
=
log_acts
[
0
,
self
.
blank
]
if
total_segments
>
1
:
forward_vars
[
i
,
1
]
=
log_acts
[
0
,
labels_a_sequence
[
i
,
0
]]
continue
# dp from t1
start
,
end
=
self
.
segment_range
(
i
,
total_times
,
total_segments
)
for
k
in
range
(
end
-
start
):
j
=
k
+
start
if
j
&
1
==
1
:
label_idx
=
j
//
2
label_val
=
labels_a_sequence
[
label_idx
,
0
]
fv
=
self
.
log_add
(
forward_vars
[
i
-
1
,
j
],
forward_vars
[
i
-
1
,
j
-
1
]
)
if
(
j
>
1
and
label_val
!=
labels_a_sequence
[
label_idx
-
1
,
0
]
):
fv
=
self
.
log_add
(
fv
,
forward_vars
[
i
-
1
,
j
-
2
])
fv
=
self
.
log_mul
(
fv
,
log_acts
[
i
,
label_val
])
else
:
fv
=
forward_vars
[
i
-
1
,
j
]
if
j
>
0
:
fv
=
self
.
log_add
(
fv
,
forward_vars
[
i
-
1
,
j
-
1
])
fv
=
self
.
log_mul
(
fv
,
log_acts
[
i
,
self
.
blank
])
forward_vars
[
i
,
j
]
=
fv
# sum the last two value as log_prob
log_prob
=
forward_vars
[
total_times
-
1
,
total_segments
-
1
]
if
total_segments
>
1
:
log_prob
=
self
.
log_add
(
log_prob
,
forward_vars
[
total_times
-
1
,
total_segments
-
2
]
)
return
-
log_prob
def
forward
(
self
):
softmax_offset
=
0
labels_offset
=
0
for
i
in
range
(
self
.
batch_size
):
if
self
.
labels
.
shape
[
1
]
==
1
:
softmax_start_i
=
softmax_offset
softmax_end_i
=
softmax_offset
+
self
.
softmax_lod
[
self
.
level
][
i
]
labels_start_i
=
labels_offset
labels_end_i
=
labels_offset
+
self
.
labels_lod
[
self
.
level
][
i
]
softmax_a_sequence
=
self
.
softmax
[
softmax_start_i
:
softmax_end_i
,
:
]
labels_a_sequence
=
self
.
labels
[
labels_start_i
:
labels_end_i
,
:]
self
.
loss
[
i
]
=
self
.
forward_a_sequence
(
softmax_a_sequence
,
labels_a_sequence
)
softmax_offset
+=
self
.
softmax_lod
[
self
.
level
][
i
]
labels_offset
+=
self
.
labels_lod
[
self
.
level
][
i
]
else
:
softmax_a_sequence
=
self
.
softmax
[:
self
.
softmax_lod
[
i
],
i
,
:]
labels_a_sequence
=
self
.
labels
[:
self
.
labels_lod
[
i
],
:]
self
.
loss
[
i
]
=
self
.
forward_a_sequence
(
softmax_a_sequence
,
labels_a_sequence
)
return
self
.
loss
def
python_api
(
logits
,
label
,
logits_length
=
None
,
labels_length
=
None
,
blank
=
0
,
norm_by_times
=
False
,
):
return
paddle
.
fluid
.
layers
.
warpctc
(
logits
,
label
,
blank
,
norm_by_times
,
logits_length
,
labels_length
)
class
XPUTestWarpCTCOp
(
XPUOpTestWrapper
):
def
__init__
(
self
):
self
.
op_name
=
'warpctc'
class
TestWarpCTCOpWithPadding
(
XPUOpTest
):
def
config
(
self
):
self
.
batch_size
=
4
self
.
num_classes
=
8
self
.
logits_lod
=
[[
4
,
1
,
3
,
3
]]
self
.
labels_lod
=
[[
3
,
1
,
4
,
4
]]
self
.
logits_length
=
np
.
array
([
4
,
1
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
def
setUp
(
self
):
self
.
op_type
=
"warpctc"
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
python_api
=
python_api
self
.
python_out_sig
=
[
"Loss"
]
self
.
config
()
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
sum
(
self
.
logits_length
),
self
.
num_classes
]
).
astype
(
self
.
dtype
)
print
(
"logits.shape = "
,
logits
.
shape
)
softmax
=
np
.
apply_along_axis
(
stable_softmax
,
1
,
logits
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
0
,
self
.
num_classes
-
1
,
[
sum
(
self
.
labels_length
),
1
],
dtype
=
"int32"
,
)
ctc
=
CTCForward
(
softmax
,
self
.
logits_lod
,
labels
,
self
.
labels_lod
,
self
.
num_classes
,
self
.
batch_size
,
self
.
blank
,
self
.
norm_by_times
,
)
loss
=
ctc
.
forward
()
max_sequence_length
=
0
for
i
in
range
(
self
.
batch_size
):
max_sequence_length
=
max
(
max_sequence_length
,
self
.
logits_length
[
i
]
)
# reshape logits to T*N*S
new_logits
=
np
.
zeros
(
[
max_sequence_length
,
self
.
batch_size
,
self
.
num_classes
],
dtype
=
logits
.
dtype
,
)
cur
=
0
for
batch_id
in
range
(
self
.
batch_size
):
for
i
in
range
(
self
.
logits_length
[
batch_id
]):
for
j
in
range
(
self
.
num_classes
):
new_logits
[
i
,
batch_id
,
j
]
=
logits
[
cur
+
i
,
j
]
cur
=
cur
+
self
.
logits_length
[
batch_id
]
# reshape labels to N*S
max_target_seq_length
=
0
for
i
in
range
(
self
.
batch_size
):
max_target_seq_length
=
max
(
max_target_seq_length
,
self
.
labels_length
[
i
]
)
new_labels
=
np
.
zeros
(
[
self
.
batch_size
,
max_target_seq_length
],
dtype
=
"int32"
)
cur
=
0
for
batch_id
in
range
(
self
.
batch_size
):
for
i
in
range
(
self
.
labels_length
[
batch_id
]):
new_labels
[
batch_id
,
i
]
=
labels
[
cur
+
i
]
cur
=
cur
+
self
.
labels_length
[
batch_id
]
self
.
gradient
=
np
.
zeros
(
[
max_sequence_length
,
self
.
batch_size
,
self
.
num_classes
],
dtype
=
logits
.
dtype
,
)
self
.
inputs
=
{
"Logits"
:
new_logits
,
"Label"
:
new_labels
,
"LogitsLength"
:
self
.
logits_length
,
"LabelLength"
:
self
.
labels_length
,
}
self
.
outputs
=
{
"Loss"
:
loss
}
self
.
attrs
=
{
"blank"
:
self
.
blank
,
"norm_by_times"
:
self
.
norm_by_times
,
}
def
test_check_output
(
self
):
self
.
check_output
(
check_eager
=
True
)
def
test_check_grad
(
self
):
self
.
outputs
[
'WarpCTCGrad'
]
=
self
.
gradient
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
"Logits"
],
"Loss"
,
max_relative_error
=
0.007
,
check_dygraph
=
False
,
)
class
TestWarpCTCOpWithPaddingCase1
(
TestWarpCTCOpWithPadding
):
def
config
(
self
):
self
.
batch_size
=
4
self
.
num_classes
=
CUDA_BLOCK_SIZE
+
2
self
.
logits_lod
=
[[
4
,
1
,
3
,
3
]]
self
.
labels_lod
=
[[
3
,
1
,
4
,
4
]]
self
.
logits_length
=
np
.
array
([
4
,
1
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
class
TestWarpCTCOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
with
program_guard
(
Program
(),
Program
()):
logits
=
fluid
.
data
(
name
=
'logits'
,
shape
=
[
5
,
16
,
6
],
dtype
=
self
.
dtype
)
logits_length
=
fluid
.
data
(
name
=
'logits_length'
,
shape
=
[
None
],
dtype
=
'int64'
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
16
,
3
],
dtype
=
'int32'
)
label_length
=
fluid
.
data
(
name
=
'labels_length'
,
shape
=
[
None
],
dtype
=
'int64'
)
def
test_logits_Variable
():
logits_data
=
np
.
random
.
rand
(
5
,
16
,
6
).
astype
(
logits
.
dtype
)
fluid
.
layers
.
warpctc
(
input
=
logits_data
,
label
=
label
,
input_length
=
logits_length
,
label_length
=
label_length
,
)
self
.
assertRaises
(
TypeError
,
test_logits_Variable
)
def
test_label_Variable
():
label_data
=
np
.
random
.
randint
(
0
,
5
,
[
5
,
1
]).
astype
(
"int32"
)
fluid
.
layers
.
warpctc
(
input
=
logits
,
label
=
label_data
,
input_length
=
logits_length
,
label_length
=
label_length
,
)
self
.
assertRaises
(
TypeError
,
test_label_Variable
)
def
test_logits_len_Variable
():
logits_length_data
=
np
.
array
([
5
]
*
16
).
astype
(
"int64"
)
fluid
.
layers
.
warpctc
(
input
=
logits
,
label
=
label
,
input_length
=
logits_length_data
,
label_length
=
label_length
,
)
self
.
assertRaises
(
TypeError
,
test_logits_len_Variable
)
def
test_label_len_Variable
():
label_length_data
=
np
.
array
([
3
]
*
16
).
astype
(
"int64"
)
fluid
.
layers
.
warpctc
(
input
=
logits
,
label
=
label
,
input_length
=
logits_length
,
label_length
=
label_length_data
,
)
self
.
assertRaises
(
TypeError
,
test_label_len_Variable
)
def
test_dygraph_errors
(
self
):
def
test_dygraph_with_lod
():
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
20
,
15
]).
astype
(
self
.
dtype
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
0
,
15
-
1
,
[
15
,
1
],
dtype
=
"int32"
)
softmax
=
paddle
.
to_tensor
(
logits
)
labels
=
paddle
.
to_tensor
(
labels
)
fluid
.
layers
.
warpctc
(
input
=
softmax
,
label
=
labels
)
paddle
.
disable_static
()
self
.
assertRaises
(
ValueError
,
test_dygraph_with_lod
)
paddle
.
enable_static
()
class
TestCTCLossAPICase
(
unittest
.
TestCase
):
def
test_functinal_api
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
batch_size
=
4
self
.
num_classes
=
CUDA_BLOCK_SIZE
+
2
self
.
logits_length
=
np
.
array
([
4
,
1
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
max
(
self
.
logits_length
),
self
.
batch_size
,
self
.
num_classes
],
).
astype
(
self
.
dtype
)
softmax
=
np
.
apply_along_axis
(
stable_softmax
,
-
1
,
logits
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
0
,
self
.
num_classes
-
1
,
[
self
.
batch_size
,
max
(
self
.
labels_length
)],
dtype
=
"int32"
,
)
ctc
=
CTCForward
(
softmax
,
self
.
logits_length
,
labels
,
self
.
labels_length
,
self
.
num_classes
,
self
.
batch_size
,
self
.
blank
,
self
.
norm_by_times
,
)
loss_np
=
ctc
.
forward
()
paddle
.
disable_static
()
softmax
=
paddle
.
to_tensor
(
logits
)
labels
=
paddle
.
to_tensor
(
labels
)
logits_length
=
paddle
.
to_tensor
(
self
.
logits_length
)
labels_length
=
paddle
.
to_tensor
(
self
.
labels_length
)
loss_pd_mean
=
F
.
ctc_loss
(
softmax
,
labels
,
logits_length
,
labels_length
,
blank
=
self
.
blank
,
reduction
=
'mean'
,
)
loss_pd_mean
=
loss_pd_mean
.
numpy
()
loss_pd_sum
=
F
.
ctc_loss
(
softmax
,
labels
,
logits_length
,
labels_length
,
blank
=
self
.
blank
,
reduction
=
'sum'
,
)
loss_pd_sum
=
loss_pd_sum
.
numpy
()
paddle
.
enable_static
()
loss_np
=
np
.
squeeze
(
loss_np
,
axis
=-
1
)
loss_np_mean
=
(
loss_np
/
labels_length
.
numpy
()).
mean
()
loss_np_sum
=
loss_np
.
sum
()
np
.
testing
.
assert_allclose
(
loss_pd_mean
,
loss_np_mean
,
rtol
=
1e-05
,
atol
=
1
)
np
.
testing
.
assert_allclose
(
loss_pd_sum
,
loss_np_sum
,
rtol
=
1e-05
,
atol
=
1
)
def
test_class_api
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
batch_size
=
3
self
.
num_classes
=
15
self
.
logits_length
=
np
.
array
([
3
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
0
,
1
,
2
],
dtype
=
np
.
int64
)
self
.
blank
=
0
self
.
norm_by_times
=
False
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
max
(
self
.
logits_length
),
self
.
batch_size
,
self
.
num_classes
],
).
astype
(
self
.
dtype
)
softmax
=
np
.
apply_along_axis
(
stable_softmax
,
-
1
,
logits
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
1
,
self
.
num_classes
,
[
self
.
batch_size
,
max
(
self
.
labels_length
)],
dtype
=
"int32"
,
)
ctc
=
CTCForward
(
softmax
,
self
.
logits_length
,
labels
,
self
.
labels_length
,
self
.
num_classes
,
self
.
batch_size
,
self
.
blank
,
self
.
norm_by_times
,
)
loss_np
=
ctc
.
forward
()
paddle
.
disable_static
()
softmax
=
paddle
.
to_tensor
(
logits
)
labels
=
paddle
.
to_tensor
(
labels
)
logits_length
=
paddle
.
to_tensor
(
self
.
logits_length
)
labels_length
=
paddle
.
to_tensor
(
self
.
labels_length
)
loss_pd
=
paddle
.
nn
.
CTCLoss
(
self
.
blank
,
'none'
)(
softmax
,
labels
,
logits_length
,
labels_length
)
loss_pd
=
loss_pd
.
numpy
()
paddle
.
enable_static
()
loss_np
=
np
.
squeeze
(
loss_np
,
axis
=-
1
)
np
.
testing
.
assert_allclose
(
loss_pd
,
loss_np
,
rtol
=
1e-05
,
atol
=
1
)
support_types
=
get_xpu_op_support_types
(
'warpctc'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestWarpCTCOp
,
stype
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录