Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
25ffe9c2
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
25ffe9c2
编写于
11月 23, 2022
作者:
Z
zhangyikun02
提交者:
GitHub
11月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add warpctc kernel and change cast_v2 to cast for xpu, test=kunlun (#48134)
上级
b07e6b45
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
804 addition
and
81 deletion
+804
-81
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+1
-1
paddle/fluid/framework/data_type_transform.cc
paddle/fluid/framework/data_type_transform.cc
+2
-2
paddle/fluid/imperative/tests/test_gradient_accmulator.cc
paddle/fluid/imperative/tests/test_gradient_accmulator.cc
+10
-4
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
+4
-4
paddle/fluid/operators/top_k_op_xpu.cc
paddle/fluid/operators/top_k_op_xpu.cc
+5
-5
paddle/fluid/platform/device/xpu/xpu2_op_list.h
paddle/fluid/platform/device/xpu/xpu2_op_list.h
+3
-0
paddle/phi/kernels/xpu/amp_kernel.cc
paddle/phi/kernels/xpu/amp_kernel.cc
+10
-10
paddle/phi/kernels/xpu/cast_kernel.cc
paddle/phi/kernels/xpu/cast_kernel.cc
+8
-8
paddle/phi/kernels/xpu/cross_entropy_grad_kernel.cc
paddle/phi/kernels/xpu/cross_entropy_grad_kernel.cc
+10
-10
paddle/phi/kernels/xpu/cross_entropy_kernel.cc
paddle/phi/kernels/xpu/cross_entropy_kernel.cc
+5
-5
paddle/phi/kernels/xpu/gather_grad_kernel.cc
paddle/phi/kernels/xpu/gather_grad_kernel.cc
+6
-16
paddle/phi/kernels/xpu/sgd_kernel.cc
paddle/phi/kernels/xpu/sgd_kernel.cc
+5
-6
paddle/phi/kernels/xpu/top_k_kernel.cc
paddle/phi/kernels/xpu/top_k_kernel.cc
+10
-10
paddle/phi/kernels/xpu/warpctc_grad_kernel.cc
paddle/phi/kernels/xpu/warpctc_grad_kernel.cc
+60
-0
paddle/phi/kernels/xpu/warpctc_kernel.cc
paddle/phi/kernels/xpu/warpctc_kernel.cc
+102
-0
python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
...n/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
+563
-0
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
25ffe9c2
...
...
@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if
(
NOT DEFINED XPU_BASE_URL
)
set
(
XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev"
)
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL_WITHOUT_DATE
}
/202211
16
"
)
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL_WITHOUT_DATE
}
/202211
20
"
)
else
()
set
(
XPU_BASE_URL
"
${
XPU_BASE_URL
}
"
)
endif
()
...
...
paddle/fluid/framework/data_type_transform.cc
浏览文件 @
25ffe9c2
...
...
@@ -40,12 +40,12 @@ static void XPUCastData(const phi::DenseTensor& in,
const
platform
::
XPUDeviceContext
*
dev_ctx
)
{
using
XPUInTDType
=
typename
XPUTypeTrait
<
InType
>::
Type
;
using
XPUOutTDType
=
typename
XPUTypeTrait
<
OutType
>::
Type
;
int
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
XPUOutTDType
>
(
int
r
=
xpu
::
cast
<
XPUInTDType
,
XPUOutTDType
>
(
dev_ctx
->
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in
.
data
<
InType
>
()),
reinterpret_cast
<
XPUOutTDType
*>
(
out
->
mutable_data
<
OutType
>
(
in
.
place
())),
in
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
dev_ctx
->
Wait
();
}
...
...
paddle/fluid/imperative/tests/test_gradient_accmulator.cc
浏览文件 @
25ffe9c2
...
...
@@ -161,13 +161,10 @@ TEST(test_add_functor, add_functor) {
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
2.0
));
EXPECT_EQ
(
cpu_res
,
0
);
#ifndef PADDLE_WITH_XPU
// does not support double when compiled using xpu
// double
cpu_res
=
TensorddTest
(
cpu_place
,
cpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
cpu_res
,
0
);
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
int
gpu_res
=
1
;
...
...
@@ -217,6 +214,9 @@ TEST(test_add_functor, add_functor) {
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
xpu_res
=
TensorddTest
(
xpu_place
,
xpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
// different places
xpu_res
=
TensorddTest
(
cpu_place
,
xpu_place
,
static_cast
<
float
>
(
1.0
),
static_cast
<
float
>
(
2.0
));
...
...
@@ -234,6 +234,12 @@ TEST(test_add_functor, add_functor) {
static_cast
<
platform
::
float16
>
(
1.0
),
static_cast
<
platform
::
float16
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
xpu_res
=
TensorddTest
(
cpu_place
,
xpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
xpu_res
=
TensorddTest
(
xpu_place
,
cpu_place
,
static_cast
<
double
>
(
1.0
),
static_cast
<
double
>
(
2.0
));
EXPECT_EQ
(
xpu_res
,
0
);
#endif
}
...
...
paddle/fluid/operators/metrics/accuracy_op_xpu.cc
浏览文件 @
25ffe9c2
...
...
@@ -50,13 +50,13 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
int
*
label_int32_ptr
=
RAII_GUARD
.
alloc_l3_or_gm
<
int
>
(
size
);
PADDLE_ENFORCE_XDNN_NOT_NULL
(
label_int32_ptr
);
int
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
int
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
indices_data
,
indices_int32_ptr
,
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
label_data
,
label_int32_ptr
,
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
accuracy
(
dev_ctx
.
x_context
(),
indices_int32_ptr
,
...
...
paddle/fluid/operators/top_k_op_xpu.cc
浏览文件 @
25ffe9c2
...
...
@@ -79,11 +79,11 @@ class TopkXPUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
// cast to int64 as final result
r
=
xpu
::
cast
_v2
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
indices_int_data
,
indices_data
,
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
indices_int_data
,
indices_data
,
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
};
...
...
paddle/fluid/platform/device/xpu/xpu2_op_list.h
浏览文件 @
25ffe9c2
...
...
@@ -681,6 +681,9 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType
(
vartype
::
INT8
,
XPUPlace
()),
pOpKernelType
(
vartype
::
UINT8
,
XPUPlace
()),
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"warpctc_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"warpctc"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"where_index"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
INT32
,
XPUPlace
()),
pOpKernelType
(
vartype
::
BOOL
,
XPUPlace
()),
...
...
paddle/phi/kernels/xpu/amp_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -233,11 +233,11 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
MPDType
>(
&
float_out
,
out
->
numel
()
*
sizeof
(
MPDType
));
int
r
=
xpu
::
cast
_v2
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
float16
*>
(
x
->
data
<
T
>
()),
float_x
.
data
<
MPDType
>
(),
x
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
int
r
=
xpu
::
cast
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
float16
*>
(
x
->
data
<
T
>
()),
float_x
.
data
<
MPDType
>
(),
x
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
float_x
.
data
<
MPDType
>
(),
...
...
@@ -248,11 +248,11 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
0.0
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale"
);
r
=
xpu
::
cast
_v2
(
dev_ctx
.
x_context
(),
float_out
.
data
<
MPDType
>
(),
reinterpret_cast
<
float16
*>
(
out
->
data
<
T
>
()),
out
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
r
=
xpu
::
cast
(
dev_ctx
.
x_context
(),
float_out
.
data
<
MPDType
>
(),
reinterpret_cast
<
float16
*>
(
out
->
data
<
T
>
()),
out
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
else
{
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
x
->
data
<
T
>
()),
...
...
paddle/phi/kernels/xpu/cast_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -39,14 +39,14 @@ void CastKernel(const Context& dev_ctx,
int
r
=
-
1
;
switch
(
out_dtype
)
{
case
phi
::
DataType
::
FLOAT32
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
float
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
float
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
float
>(
out
),
numel
);
break
;
case
phi
::
DataType
::
FLOAT16
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
float16
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
float16
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
float16
*>
(
...
...
@@ -54,35 +54,35 @@ void CastKernel(const Context& dev_ctx,
numel
);
break
;
case
phi
::
DataType
::
INT64
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
int64_t
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
int64_t
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
int64_t
>(
out
),
numel
);
break
;
case
phi
::
DataType
::
INT32
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
int32_t
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
int32_t
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
int
>(
out
),
numel
);
break
;
case
phi
::
DataType
::
BOOL
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
bool
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
bool
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
bool
>(
out
),
numel
);
break
;
case
phi
::
DataType
::
UINT8
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
uint8_t
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
uint8_t
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
uint8_t
>(
out
),
numel
);
break
;
case
phi
::
DataType
::
FLOAT64
:
r
=
xpu
::
cast
_v2
<
XPUInTDType
,
double
>
(
r
=
xpu
::
cast
<
XPUInTDType
,
double
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
dev_ctx
.
template
Alloc
<
double
>(
out
),
...
...
@@ -93,7 +93,7 @@ void CastKernel(const Context& dev_ctx,
"Not supported cast %d -> %d"
,
x
.
dtype
(),
out_dtype
));
}
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
}
// namespace phi
...
...
paddle/phi/kernels/xpu/cross_entropy_grad_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -59,11 +59,11 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx,
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
hard_softmax_with_cross_entropy_grad
<
XPUType
,
int
>
(
dev_ctx
.
x_context
(),
...
...
@@ -117,11 +117,11 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx,
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
lip_v2
"
);
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
ast
"
);
r
=
xpu
::
hard_softmax_with_cross_entropy_grad
<
XPUType
,
int
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
loss_grad
.
data
<
T
>
()),
...
...
paddle/phi/kernels/xpu/cross_entropy_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -132,11 +132,11 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx,
int
*
labels_int_ptr_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
labels
.
numel
());
PADDLE_ENFORCE_XDNN_NOT_NULL
(
labels_int_ptr_l3
);
r
=
xpu
::
cast
_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
lip_v2
"
);
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
labels
.
data
<
int64_t
>
(),
labels_int_ptr_l3
,
labels
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"c
ast
"
);
r
=
xpu
::
hard_cross_entropy
<
XPUType
,
int32_t
>
(
dev_ctx
.
x_context
(),
...
...
paddle/phi/kernels/xpu/gather_grad_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -72,16 +72,11 @@ void GatherGradKernel(const Context& dev_ctx,
}
else
{
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
int
*
index_int_ptr_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
int32_t
>
(
index
.
numel
());
r
=
xpu
::
cast_v2
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
index
.
data
<
int64_t
>
(),
index_int_ptr_l3
,
index
.
numel
());
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
phi
::
errors
::
External
(
"XPU API(cast_v2) return wrong "
"value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
r
=
xpu
::
cast
<
int64_t
,
int32_t
>
(
dev_ctx
.
x_context
(),
index
.
data
<
int64_t
>
(),
index_int_ptr_l3
,
index
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
r
=
xpu
::
gather_grad
<
XPUType
,
int
>
(
dev_ctx
.
x_context
(),
...
...
@@ -93,12 +88,7 @@ void GatherGradKernel(const Context& dev_ctx,
axis_v
,
overwrite
);
}
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
phi
::
errors
::
External
(
"XPU gather grad kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"gather_grad"
);
}
}
// namespace phi
...
...
paddle/phi/kernels/xpu/sgd_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -54,12 +54,11 @@ void SGDDenseKernel(const Context &dev_ctx,
const
float
*
lr
=
nullptr
;
if
(
std
::
is_same
<
T
,
dtype
::
float16
>::
value
)
{
float
*
lr_float
=
RAII_GUARD
.
alloc_l3_or_gm
<
float
>
(
learning_rate
.
numel
());
int
r
=
xpu
::
cast_v2
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
lr_t
),
lr_float
,
learning_rate
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"clip_v2"
);
int
r
=
xpu
::
cast
<
XPUType
,
float
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUType
*>
(
lr_t
),
lr_float
,
learning_rate
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
lr
=
lr_float
;
}
else
{
lr
=
reinterpret_cast
<
const
float
*>
(
lr_t
);
...
...
paddle/phi/kernels/xpu/top_k_kernel.cc
浏览文件 @
25ffe9c2
...
...
@@ -68,11 +68,11 @@ void TopkKernel(const Context& dev_ctx,
k
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
r
=
xpu
::
cast
_v2
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
indices_int_data
,
indices_data
,
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
indices_int_data
,
indices_data
,
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
}
else
{
// do transpose if axis is not the last dim of input
std
::
vector
<
int
>
trans_axes
;
...
...
@@ -127,11 +127,11 @@ void TopkKernel(const Context& dev_ctx,
k
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"sorted_topk"
);
r
=
xpu
::
cast
_v2
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
trans_idx_int32_data
,
trans_idx_data
,
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast
_v2
"
);
r
=
xpu
::
cast
<
int32_t
,
int64_t
>
(
dev_ctx
.
x_context
(),
(
const
int32_t
*
)
trans_idx_int32_data
,
trans_idx_data
,
indices
->
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"cast"
);
// Transpose back to original dims
std
::
vector
<
int
>
trans_back_axes
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
...
...
paddle/phi/kernels/xpu/warpctc_grad_kernel.cc
0 → 100644
浏览文件 @
25ffe9c2
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/warpctc_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
WarpctcGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
logits
,
const
paddle
::
optional
<
DenseTensor
>&
logits_length
,
const
DenseTensor
&
warpctcgrad
,
const
DenseTensor
&
loss_grad
,
int
blank
,
bool
norm_by_times
,
DenseTensor
*
logits_grad
)
{
dev_ctx
.
template
Alloc
<
T
>(
logits_grad
);
bool
has_logits_length
=
logits_length
.
is_initialized
();
if
(
!
has_logits_length
)
{
PADDLE_THROW
(
phi
::
errors
::
External
(
"XPU only support logits_length is_initialized"
));
}
int
max_seq_length
=
warpctcgrad
.
dims
()[
0
];
// Tmax
int
num_sequences
=
warpctcgrad
.
dims
()[
1
];
// B
int
seq_width
=
warpctcgrad
.
dims
()[
2
];
// D
auto
*
logits_length_ptr
=
logits_length
.
get_ptr
();
int
r
=
xpu
::
ctc_loss_grad
<
T
,
int64_t
>
(
dev_ctx
.
x_context
(),
loss_grad
.
data
<
T
>
(),
logits_grad
->
data
<
T
>
(),
warpctcgrad
.
data
<
T
>
(),
max_seq_length
,
num_sequences
,
seq_width
,
logits_length_ptr
->
data
<
int64_t
>
(),
norm_by_times
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"ctc_loss_grad"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
warpctc_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
WarpctcGradKernel
,
float
)
{}
paddle/phi/kernels/xpu/warpctc_kernel.cc
0 → 100644
浏览文件 @
25ffe9c2
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/warpctc_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
WarpctcKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
logits
,
const
DenseTensor
&
label
,
const
paddle
::
optional
<
DenseTensor
>&
logits_length
,
const
paddle
::
optional
<
DenseTensor
>&
labels_length
,
int
blank
,
bool
norm_by_times
,
DenseTensor
*
loss
,
DenseTensor
*
warpctcgrad
)
{
bool
has_logits_length
=
logits_length
.
is_initialized
();
if
(
!
has_logits_length
)
{
PADDLE_THROW
(
phi
::
errors
::
External
(
"XPU only support logits_length is_initialized"
));
}
bool
has_labels_length
=
labels_length
.
is_initialized
();
if
(
!
has_labels_length
)
{
PADDLE_THROW
(
phi
::
errors
::
External
(
"XPU only support labels_length is_initialized"
));
}
int
max_sequence_length
=
logits
.
dims
()[
0
];
int
num_sequences
=
logits
.
dims
()[
1
];
int
sequence_width
=
logits
.
dims
()[
2
];
int
max_target_seq_length
=
label
.
dims
()[
1
];
PADDLE_ENFORCE_GT
(
max_sequence_length
,
0
,
phi
::
errors
::
InvalidArgument
(
"The first dimension of Input(Logits) should be "
"greater than zero "
"but received %d. "
,
max_sequence_length
));
PADDLE_ENFORCE_GT
(
num_sequences
,
0
,
phi
::
errors
::
InvalidArgument
(
"The second dimension of Input(Logits) should be "
"greater than zero "
"but received %d. "
,
num_sequences
));
PADDLE_ENFORCE_GT
(
sequence_width
,
0
,
phi
::
errors
::
InvalidArgument
(
"The third dimension of Input(Logits) should be "
"greater than zero "
"but received %d. "
,
sequence_width
));
loss
->
Resize
(
phi
::
make_ddim
({
num_sequences
,
1
}));
dev_ctx
.
template
Alloc
<
T
>(
loss
);
warpctcgrad
->
Resize
(
phi
::
make_ddim
({
max_sequence_length
,
num_sequences
,
sequence_width
}));
dev_ctx
.
template
Alloc
<
T
>(
warpctcgrad
);
const
T
*
logits_data
=
logits
.
data
<
T
>
();
const
int
*
label_data
=
label
.
data
<
int
>
();
auto
logits_length_data
=
logits_length
.
get_ptr
()
->
data
<
int64_t
>
();
auto
labels_length_data
=
labels_length
.
get_ptr
()
->
data
<
int64_t
>
();
T
*
loss_data
=
loss
->
data
<
T
>
();
T
*
warpctcgrad_data
=
warpctcgrad
->
data
<
T
>
();
int
r
=
xpu
::
ctc_loss
<
T
,
int64_t
>
(
dev_ctx
.
x_context
(),
logits_data
,
label_data
,
loss_data
,
warpctcgrad_data
,
logits_length_data
,
labels_length_data
,
max_sequence_length
,
num_sequences
,
sequence_width
,
max_target_seq_length
,
blank
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"ctc_loss"
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
warpctc
,
XPU
,
ALL_LAYOUT
,
phi
::
WarpctcKernel
,
float
)
{}
python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
0 → 100644
浏览文件 @
25ffe9c2
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
sys
.
path
.
append
(
".."
)
import
unittest
import
numpy
as
np
from
test_softmax_op
import
stable_softmax
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
import
paddle
import
paddle.nn.functional
as
F
from
op_test_xpu
import
XPUOpTest
from
xpu.get_test_cover_info
import
(
create_test_class
,
get_xpu_op_support_types
,
XPUOpTestWrapper
,
)
paddle
.
enable_static
()
CUDA_BLOCK_SIZE
=
32
class
CTCForward
(
object
):
def
__init__
(
self
,
softmax
,
softmax_lod
,
labels
,
labels_lod
,
num_classes
,
batch_size
,
blank
,
norm_by_times
,
):
self
.
softmax
=
softmax
self
.
softmax_lod
=
softmax_lod
self
.
labels
=
labels
self
.
labels_lod
=
labels_lod
self
.
blank
=
blank
self
.
norm_by_times
=
norm_by_times
self
.
level
=
0
self
.
num_classes
=
num_classes
self
.
batch_size
=
batch_size
self
.
loss
=
np
.
zeros
([
self
.
batch_size
,
1
],
dtype
=
softmax
.
dtype
)
self
.
gradient
=
np
.
zeros
(
self
.
softmax
.
shape
,
dtype
=
softmax
.
dtype
)
# float64
self
.
EXP_MAX
=
sys
.
float_info
.
max
self
.
EXP_MIN
=
sys
.
float_info
.
min
self
.
LOG_ZERO
=
np
.
log
(
self
.
EXP_MIN
)
self
.
LOG_INFINITY
=
np
.
log
(
self
.
EXP_MAX
)
def
safe_exp
(
self
,
x
):
if
x
<=
self
.
LOG_ZERO
:
return
0.0
if
x
>=
self
.
LOG_INFINITY
:
return
self
.
EXP_MAX
return
np
.
exp
(
x
)
def
safe_log
(
self
,
x
):
if
x
<=
self
.
EXP_MIN
:
return
self
.
LOG_ZERO
return
np
.
log
(
x
)
# x = lna and y = lnb are in log scale, ln(a / b) = lna - lnb
def
log_div
(
self
,
x
,
y
):
res
=
x
-
y
if
res
<=
self
.
LOG_ZERO
:
return
self
.
LOG_ZERO
if
res
>=
self
.
LOG_INFINITY
:
return
self
.
LOG_INFINITY
return
res
# x = lna and y = lnb are in log scale, ln(a * b) = lna + lnb
def
log_mul
(
self
,
x
,
y
):
res
=
x
+
y
if
res
<=
self
.
LOG_ZERO
:
return
self
.
LOG_ZERO
if
res
>=
self
.
LOG_INFINITY
:
return
self
.
LOG_INFINITY
return
res
# x = lna and y = lnb are in log scale,
# ln(a + b) = lna + ln(1 + exp(lnb - lna)), where b > a
def
log_add
(
self
,
x
,
y
):
if
x
<
y
:
t
=
y
y
=
x
x
=
t
return
x
+
self
.
safe_log
(
1
+
self
.
safe_exp
(
y
-
x
))
def
segment_range
(
self
,
time
,
total_times
,
total_segments
):
start
=
max
(
0
,
total_segments
-
(
2
*
(
total_times
-
time
)))
end
=
min
(
total_segments
,
2
*
(
time
+
1
))
return
start
,
end
def
forward_a_sequence
(
self
,
softmax_a_sequence
,
labels_a_sequence
):
total_times
=
softmax_a_sequence
.
shape
[
0
]
total_segments
=
labels_a_sequence
.
shape
[
0
]
*
2
+
1
required_times
=
labels_a_sequence
.
shape
[
0
]
old_label
=
-
1
for
i
in
range
(
labels_a_sequence
.
shape
[
0
]):
# two contingous labels with the same value
if
labels_a_sequence
[
i
,
0
]
==
old_label
:
required_times
=
required_times
+
1
old_label
=
labels_a_sequence
[
i
,
0
]
if
total_times
<
required_times
:
return
0
# calculate the forward and backward variables,
# reference Chapter 7.3 of "Alex Grave, Supervised Sequence
# Labelling with Recurrent Neural Networks"
log_acts
=
np
.
zeros
(
[
total_times
,
self
.
num_classes
],
dtype
=
softmax_a_sequence
.
dtype
)
for
i
in
range
(
total_times
):
for
j
in
range
(
self
.
num_classes
):
log_acts
[
i
,
j
]
=
self
.
safe_log
(
softmax_a_sequence
[
i
,
j
])
# calculate the forward variables
forward_vars
=
np
.
zeros
(
[
total_times
,
total_segments
],
dtype
=
softmax_a_sequence
.
dtype
)
for
i
in
range
(
total_times
):
for
j
in
range
(
total_segments
):
forward_vars
[
i
,
j
]
=
self
.
LOG_ZERO
for
i
in
range
(
total_times
):
# dp initialization at t0
if
i
==
0
:
forward_vars
[
i
,
0
]
=
log_acts
[
0
,
self
.
blank
]
if
total_segments
>
1
:
forward_vars
[
i
,
1
]
=
log_acts
[
0
,
labels_a_sequence
[
i
,
0
]]
continue
# dp from t1
start
,
end
=
self
.
segment_range
(
i
,
total_times
,
total_segments
)
for
k
in
range
(
end
-
start
):
j
=
k
+
start
if
j
&
1
==
1
:
label_idx
=
j
//
2
label_val
=
labels_a_sequence
[
label_idx
,
0
]
fv
=
self
.
log_add
(
forward_vars
[
i
-
1
,
j
],
forward_vars
[
i
-
1
,
j
-
1
]
)
if
(
j
>
1
and
label_val
!=
labels_a_sequence
[
label_idx
-
1
,
0
]
):
fv
=
self
.
log_add
(
fv
,
forward_vars
[
i
-
1
,
j
-
2
])
fv
=
self
.
log_mul
(
fv
,
log_acts
[
i
,
label_val
])
else
:
fv
=
forward_vars
[
i
-
1
,
j
]
if
j
>
0
:
fv
=
self
.
log_add
(
fv
,
forward_vars
[
i
-
1
,
j
-
1
])
fv
=
self
.
log_mul
(
fv
,
log_acts
[
i
,
self
.
blank
])
forward_vars
[
i
,
j
]
=
fv
# sum the last two value as log_prob
log_prob
=
forward_vars
[
total_times
-
1
,
total_segments
-
1
]
if
total_segments
>
1
:
log_prob
=
self
.
log_add
(
log_prob
,
forward_vars
[
total_times
-
1
,
total_segments
-
2
]
)
return
-
log_prob
def
forward
(
self
):
softmax_offset
=
0
labels_offset
=
0
for
i
in
range
(
self
.
batch_size
):
if
self
.
labels
.
shape
[
1
]
==
1
:
softmax_start_i
=
softmax_offset
softmax_end_i
=
softmax_offset
+
self
.
softmax_lod
[
self
.
level
][
i
]
labels_start_i
=
labels_offset
labels_end_i
=
labels_offset
+
self
.
labels_lod
[
self
.
level
][
i
]
softmax_a_sequence
=
self
.
softmax
[
softmax_start_i
:
softmax_end_i
,
:
]
labels_a_sequence
=
self
.
labels
[
labels_start_i
:
labels_end_i
,
:]
self
.
loss
[
i
]
=
self
.
forward_a_sequence
(
softmax_a_sequence
,
labels_a_sequence
)
softmax_offset
+=
self
.
softmax_lod
[
self
.
level
][
i
]
labels_offset
+=
self
.
labels_lod
[
self
.
level
][
i
]
else
:
softmax_a_sequence
=
self
.
softmax
[:
self
.
softmax_lod
[
i
],
i
,
:]
labels_a_sequence
=
self
.
labels
[:
self
.
labels_lod
[
i
],
:]
self
.
loss
[
i
]
=
self
.
forward_a_sequence
(
softmax_a_sequence
,
labels_a_sequence
)
return
self
.
loss
def
python_api
(
logits
,
label
,
logits_length
=
None
,
labels_length
=
None
,
blank
=
0
,
norm_by_times
=
False
,
):
return
paddle
.
fluid
.
layers
.
warpctc
(
logits
,
label
,
blank
,
norm_by_times
,
logits_length
,
labels_length
)
class
XPUTestWarpCTCOp
(
XPUOpTestWrapper
):
def
__init__
(
self
):
self
.
op_name
=
'warpctc'
class
TestWarpCTCOpWithPadding
(
XPUOpTest
):
def
config
(
self
):
self
.
batch_size
=
4
self
.
num_classes
=
8
self
.
logits_lod
=
[[
4
,
1
,
3
,
3
]]
self
.
labels_lod
=
[[
3
,
1
,
4
,
4
]]
self
.
logits_length
=
np
.
array
([
4
,
1
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
def
setUp
(
self
):
self
.
op_type
=
"warpctc"
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
python_api
=
python_api
self
.
python_out_sig
=
[
"Loss"
]
self
.
config
()
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
sum
(
self
.
logits_length
),
self
.
num_classes
]
).
astype
(
self
.
dtype
)
print
(
"logits.shape = "
,
logits
.
shape
)
softmax
=
np
.
apply_along_axis
(
stable_softmax
,
1
,
logits
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
0
,
self
.
num_classes
-
1
,
[
sum
(
self
.
labels_length
),
1
],
dtype
=
"int32"
,
)
ctc
=
CTCForward
(
softmax
,
self
.
logits_lod
,
labels
,
self
.
labels_lod
,
self
.
num_classes
,
self
.
batch_size
,
self
.
blank
,
self
.
norm_by_times
,
)
loss
=
ctc
.
forward
()
max_sequence_length
=
0
for
i
in
range
(
self
.
batch_size
):
max_sequence_length
=
max
(
max_sequence_length
,
self
.
logits_length
[
i
]
)
# reshape logits to T*N*S
new_logits
=
np
.
zeros
(
[
max_sequence_length
,
self
.
batch_size
,
self
.
num_classes
],
dtype
=
logits
.
dtype
,
)
cur
=
0
for
batch_id
in
range
(
self
.
batch_size
):
for
i
in
range
(
self
.
logits_length
[
batch_id
]):
for
j
in
range
(
self
.
num_classes
):
new_logits
[
i
,
batch_id
,
j
]
=
logits
[
cur
+
i
,
j
]
cur
=
cur
+
self
.
logits_length
[
batch_id
]
# reshape labels to N*S
max_target_seq_length
=
0
for
i
in
range
(
self
.
batch_size
):
max_target_seq_length
=
max
(
max_target_seq_length
,
self
.
labels_length
[
i
]
)
new_labels
=
np
.
zeros
(
[
self
.
batch_size
,
max_target_seq_length
],
dtype
=
"int32"
)
cur
=
0
for
batch_id
in
range
(
self
.
batch_size
):
for
i
in
range
(
self
.
labels_length
[
batch_id
]):
new_labels
[
batch_id
,
i
]
=
labels
[
cur
+
i
]
cur
=
cur
+
self
.
labels_length
[
batch_id
]
self
.
gradient
=
np
.
zeros
(
[
max_sequence_length
,
self
.
batch_size
,
self
.
num_classes
],
dtype
=
logits
.
dtype
,
)
self
.
inputs
=
{
"Logits"
:
new_logits
,
"Label"
:
new_labels
,
"LogitsLength"
:
self
.
logits_length
,
"LabelLength"
:
self
.
labels_length
,
}
self
.
outputs
=
{
"Loss"
:
loss
}
self
.
attrs
=
{
"blank"
:
self
.
blank
,
"norm_by_times"
:
self
.
norm_by_times
,
}
def
test_check_output
(
self
):
self
.
check_output
(
check_eager
=
True
)
def
test_check_grad
(
self
):
self
.
outputs
[
'WarpCTCGrad'
]
=
self
.
gradient
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
"Logits"
],
"Loss"
,
max_relative_error
=
0.007
,
check_dygraph
=
False
,
)
class
TestWarpCTCOpWithPaddingCase1
(
TestWarpCTCOpWithPadding
):
def
config
(
self
):
self
.
batch_size
=
4
self
.
num_classes
=
CUDA_BLOCK_SIZE
+
2
self
.
logits_lod
=
[[
4
,
1
,
3
,
3
]]
self
.
labels_lod
=
[[
3
,
1
,
4
,
4
]]
self
.
logits_length
=
np
.
array
([
4
,
1
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
class
TestWarpCTCOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
with
program_guard
(
Program
(),
Program
()):
logits
=
fluid
.
data
(
name
=
'logits'
,
shape
=
[
5
,
16
,
6
],
dtype
=
self
.
dtype
)
logits_length
=
fluid
.
data
(
name
=
'logits_length'
,
shape
=
[
None
],
dtype
=
'int64'
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
16
,
3
],
dtype
=
'int32'
)
label_length
=
fluid
.
data
(
name
=
'labels_length'
,
shape
=
[
None
],
dtype
=
'int64'
)
def
test_logits_Variable
():
logits_data
=
np
.
random
.
rand
(
5
,
16
,
6
).
astype
(
logits
.
dtype
)
fluid
.
layers
.
warpctc
(
input
=
logits_data
,
label
=
label
,
input_length
=
logits_length
,
label_length
=
label_length
,
)
self
.
assertRaises
(
TypeError
,
test_logits_Variable
)
def
test_label_Variable
():
label_data
=
np
.
random
.
randint
(
0
,
5
,
[
5
,
1
]).
astype
(
"int32"
)
fluid
.
layers
.
warpctc
(
input
=
logits
,
label
=
label_data
,
input_length
=
logits_length
,
label_length
=
label_length
,
)
self
.
assertRaises
(
TypeError
,
test_label_Variable
)
def
test_logits_len_Variable
():
logits_length_data
=
np
.
array
([
5
]
*
16
).
astype
(
"int64"
)
fluid
.
layers
.
warpctc
(
input
=
logits
,
label
=
label
,
input_length
=
logits_length_data
,
label_length
=
label_length
,
)
self
.
assertRaises
(
TypeError
,
test_logits_len_Variable
)
def
test_label_len_Variable
():
label_length_data
=
np
.
array
([
3
]
*
16
).
astype
(
"int64"
)
fluid
.
layers
.
warpctc
(
input
=
logits
,
label
=
label
,
input_length
=
logits_length
,
label_length
=
label_length_data
,
)
self
.
assertRaises
(
TypeError
,
test_label_len_Variable
)
def
test_dygraph_errors
(
self
):
def
test_dygraph_with_lod
():
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
20
,
15
]).
astype
(
self
.
dtype
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
0
,
15
-
1
,
[
15
,
1
],
dtype
=
"int32"
)
softmax
=
paddle
.
to_tensor
(
logits
)
labels
=
paddle
.
to_tensor
(
labels
)
fluid
.
layers
.
warpctc
(
input
=
softmax
,
label
=
labels
)
paddle
.
disable_static
()
self
.
assertRaises
(
ValueError
,
test_dygraph_with_lod
)
paddle
.
enable_static
()
class
TestCTCLossAPICase
(
unittest
.
TestCase
):
def
test_functinal_api
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
batch_size
=
4
self
.
num_classes
=
CUDA_BLOCK_SIZE
+
2
self
.
logits_length
=
np
.
array
([
4
,
1
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
max
(
self
.
logits_length
),
self
.
batch_size
,
self
.
num_classes
],
).
astype
(
self
.
dtype
)
softmax
=
np
.
apply_along_axis
(
stable_softmax
,
-
1
,
logits
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
0
,
self
.
num_classes
-
1
,
[
self
.
batch_size
,
max
(
self
.
labels_length
)],
dtype
=
"int32"
,
)
ctc
=
CTCForward
(
softmax
,
self
.
logits_length
,
labels
,
self
.
labels_length
,
self
.
num_classes
,
self
.
batch_size
,
self
.
blank
,
self
.
norm_by_times
,
)
loss_np
=
ctc
.
forward
()
paddle
.
disable_static
()
softmax
=
paddle
.
to_tensor
(
logits
)
labels
=
paddle
.
to_tensor
(
labels
)
logits_length
=
paddle
.
to_tensor
(
self
.
logits_length
)
labels_length
=
paddle
.
to_tensor
(
self
.
labels_length
)
loss_pd_mean
=
F
.
ctc_loss
(
softmax
,
labels
,
logits_length
,
labels_length
,
blank
=
self
.
blank
,
reduction
=
'mean'
,
)
loss_pd_mean
=
loss_pd_mean
.
numpy
()
loss_pd_sum
=
F
.
ctc_loss
(
softmax
,
labels
,
logits_length
,
labels_length
,
blank
=
self
.
blank
,
reduction
=
'sum'
,
)
loss_pd_sum
=
loss_pd_sum
.
numpy
()
paddle
.
enable_static
()
loss_np
=
np
.
squeeze
(
loss_np
,
axis
=-
1
)
loss_np_mean
=
(
loss_np
/
labels_length
.
numpy
()).
mean
()
loss_np_sum
=
loss_np
.
sum
()
np
.
testing
.
assert_allclose
(
loss_pd_mean
,
loss_np_mean
,
rtol
=
1e-05
,
atol
=
1
)
np
.
testing
.
assert_allclose
(
loss_pd_sum
,
loss_np_sum
,
rtol
=
1e-05
,
atol
=
1
)
def
test_class_api
(
self
):
self
.
dtype
=
self
.
in_type
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
batch_size
=
3
self
.
num_classes
=
15
self
.
logits_length
=
np
.
array
([
3
,
3
,
3
],
dtype
=
np
.
int64
)
self
.
labels_length
=
np
.
array
([
0
,
1
,
2
],
dtype
=
np
.
int64
)
self
.
blank
=
0
self
.
norm_by_times
=
False
logits
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
max
(
self
.
logits_length
),
self
.
batch_size
,
self
.
num_classes
],
).
astype
(
self
.
dtype
)
softmax
=
np
.
apply_along_axis
(
stable_softmax
,
-
1
,
logits
)
# labels should not be blank
labels
=
np
.
random
.
randint
(
1
,
self
.
num_classes
,
[
self
.
batch_size
,
max
(
self
.
labels_length
)],
dtype
=
"int32"
,
)
ctc
=
CTCForward
(
softmax
,
self
.
logits_length
,
labels
,
self
.
labels_length
,
self
.
num_classes
,
self
.
batch_size
,
self
.
blank
,
self
.
norm_by_times
,
)
loss_np
=
ctc
.
forward
()
paddle
.
disable_static
()
softmax
=
paddle
.
to_tensor
(
logits
)
labels
=
paddle
.
to_tensor
(
labels
)
logits_length
=
paddle
.
to_tensor
(
self
.
logits_length
)
labels_length
=
paddle
.
to_tensor
(
self
.
labels_length
)
loss_pd
=
paddle
.
nn
.
CTCLoss
(
self
.
blank
,
'none'
)(
softmax
,
labels
,
logits_length
,
labels_length
)
loss_pd
=
loss_pd
.
numpy
()
paddle
.
enable_static
()
loss_np
=
np
.
squeeze
(
loss_np
,
axis
=-
1
)
np
.
testing
.
assert_allclose
(
loss_pd
,
loss_np
,
rtol
=
1e-05
,
atol
=
1
)
support_types
=
get_xpu_op_support_types
(
'warpctc'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestWarpCTCOp
,
stype
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录