Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
10ec329b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
10ec329b
编写于
5月 23, 2018
作者:
Y
yangyaming
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine code.
上级
07972467
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
183 addition
and
169 deletion
+183
-169
paddle/fluid/operators/math/sequence_padding.cc
paddle/fluid/operators/math/sequence_padding.cc
+63
-60
paddle/fluid/operators/math/sequence_padding.cu
paddle/fluid/operators/math/sequence_padding.cu
+72
-64
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+34
-35
paddle/fluid/operators/math/sequence_padding_test.cc
paddle/fluid/operators/math/sequence_padding_test.cc
+7
-3
paddle/fluid/operators/sequence_pad_op.cc
paddle/fluid/operators/sequence_pad_op.cc
+1
-1
paddle/fluid/operators/warpctc_op.h
paddle/fluid/operators/warpctc_op.h
+6
-6
未找到文件。
paddle/fluid/operators/math/sequence_padding.cc
浏览文件 @
10ec329b
...
...
@@ -18,111 +18,114 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
T
,
PaddingLayout
padding_layout
>
template
<
typename
T
>
void
CopyDataCPU
(
framework
::
LoDTensor
*
seq_tensor
,
framework
::
Tensor
*
pad
ding
_tensor
,
const
framework
::
Vector
<
size_t
>&
abs
_offset
,
framework
::
Tensor
*
pad_tensor
,
const
framework
::
Vector
<
size_t
>&
seq
_offset
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_width
,
bool
seq_to_padding
,
bool
norm_by_len
)
{
bool
seq_to_pad
,
bool
norm_by_len
,
OutputLayout
output_layout
)
{
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
T
*
pad
ding_data
=
padding
_tensor
->
data
<
T
>
();
T
*
pad
_data
=
pad
_tensor
->
data
<
T
>
();
int64_t
seq_num
=
abs
_offset
.
size
()
-
1
;
int64_t
seq_num
=
seq
_offset
.
size
()
-
1
;
for
(
int64_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
int64_t
seq_start
=
abs_offset
[
i
];
int64_t
seq_len
=
abs_offset
[
i
+
1
]
-
seq_start
;
int64_t
seq_start
=
seq_offset
[
i
];
int64_t
seq_len
=
seq_offset
[
i
+
1
]
-
seq_start
;
T
scale
=
norm_by_len
?
(
1.0
f
/
static_cast
<
T
>
(
seq_len
))
:
1.0
f
;
for
(
int64_t
j
=
0
;
j
<
seq_len
;
++
j
)
{
for
(
int64_t
k
=
0
;
k
<
seq_width
;
++
k
)
{
size_t
padding_offset
=
0
;
if
(
padding_layout
==
BATCH_LENGTH_WIDTH
)
{
padding_offset
=
(
i
*
max_seq_len
*
seq_width
)
+
j
*
seq_width
+
k
;
size_t
pad_data_idx
=
0
;
size_t
seq_data_idx
=
(
seq_start
+
j
)
*
seq_width
+
k
;
if
(
output_layout
==
kBatchLengthWidth
)
{
pad_data_idx
=
(
i
*
max_seq_len
+
j
)
*
seq_width
+
k
;
}
else
{
pad
ding_offset
=
(
j
*
seq_num
*
seq_width
)
+
i
*
seq_width
+
k
;
pad
_data_idx
=
(
j
*
seq_num
+
i
)
*
seq_width
+
k
;
}
if
(
seq_to_padding
)
{
padding_data
[
padding_offset
]
=
seq_data
[(
seq_start
+
j
)
*
seq_width
+
k
]
*
scale
;
if
(
seq_to_pad
)
{
pad_data
[
pad_data_idx
]
=
seq_data
[
seq_data_idx
]
*
scale
;
}
else
{
seq_data
[(
seq_start
+
j
)
*
seq_width
+
k
]
=
padding_data
[
padding_offset
]
*
scale
;
seq_data
[
seq_data_idx
]
=
pad_data
[
pad_data_idx
]
*
scale
;
}
}
}
}
}
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
,
padding_layout
>
{
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
T
padding_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
seq_tensor
,
lod_level
);
framework
::
Tensor
*
pad_tensor
,
T
pad_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
)
{
CheckLoD
(
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
.
lod
();
auto
&
abs
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
seq_dims
=
seq_tensor
.
dims
();
auto
padding_dims
=
padding_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq_dims
[
0
];
int64_t
numel
=
max_seq_len
*
seq_num
*
seq_width
;
auto
seq_tensor_dims
=
seq_tensor
.
dims
();
auto
pad_tensor_dims
=
pad_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
seq_offset
);
int64_t
seq_num
=
seq_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq_tensor_dims
[
0
];
ValidateShape
(
seq_dims
,
abs_offset
.
back
(),
padding
_dims
,
max_seq_len
,
seq_num
,
seq_width
,
padding
_layout
);
CheckDims
(
seq_tensor_dims
,
seq_offset
.
back
(),
pad_tensor
_dims
,
max_seq_len
,
seq_num
,
seq_width
,
output
_layout
);
T
*
pad
ding_data
=
padding
_tensor
->
data
<
T
>
();
T
*
pad
_data
=
pad
_tensor
->
data
<
T
>
();
memset
(
pad
ding_data
,
padding_value
,
numel
*
sizeof
(
T
));
memset
(
pad
_data
,
pad_value
,
max_seq_len
*
seq_num
*
seq_width
*
sizeof
(
T
));
CopyDataCPU
<
T
,
padding_layout
>
(
const_cast
<
framework
::
LoDTensor
*>
(
&
seq_tensor
),
padding_tensor
,
abs_offset
,
max_seq_len
,
seq_width
,
true
/* seq_to_padding */
,
norm_by_times
);
CopyDataCPU
<
T
>
(
const_cast
<
framework
::
LoDTensor
*>
(
&
seq_tensor
),
pad_tensor
,
seq_offset
,
max_seq_len
,
seq_width
,
true
/* seq_to_pad */
,
norm_by_times
,
output_layout
);
}
};
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
,
padding_layout
>
{
template
<
typename
T
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
padding_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
*
seq_tensor
,
lod_level
);
const
framework
::
Tensor
&
pad_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
)
{
CheckLoD
(
*
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
->
lod
();
auto
&
abs
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq_dims
=
seq_tensor
->
dims
();
auto
&
pad
ding_dims
=
padding
_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
int64_t
seq_num
=
abs
_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_dims
[
0
];
auto
&
seq_
tensor_
dims
=
seq_tensor
->
dims
();
auto
&
pad
_tensor_dims
=
pad
_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
seq_offset
);
int64_t
seq_num
=
seq
_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_
tensor_
dims
[
0
];
ValidateShape
(
seq_dims
,
abs_offset
.
back
(),
padding
_dims
,
max_seq_len
,
seq_num
,
seq_width
,
padding
_layout
);
CheckDims
(
seq_tensor_dims
,
seq_offset
.
back
(),
pad_tensor
_dims
,
max_seq_len
,
seq_num
,
seq_width
,
output
_layout
);
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
memset
(
seq_data
,
static_cast
<
T
>
(
0
),
seq_tensor
->
numel
()
*
sizeof
(
T
));
CopyDataCPU
<
T
,
padding_layout
>
(
seq_tensor
,
const_cast
<
framework
::
Tensor
*>
(
&
padding_tensor
),
abs_offset
,
max_seq_len
,
seq_width
,
false
/* seq_to_padding */
,
norm_by_times
);
CopyDataCPU
<
T
>
(
seq_tensor
,
const_cast
<
framework
::
Tensor
*>
(
&
pad_tensor
),
seq_offset
,
max_seq_len
,
seq_width
,
false
/* seq_to_pad */
,
norm_by_times
,
output_layout
);
}
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sequence_padding.cu
浏览文件 @
10ec329b
...
...
@@ -21,74 +21,74 @@ namespace math {
template
<
typename
T
,
bool
Padding
>
__global__
void
SequencePaddingKernel
(
T
*
padding_data
,
T
*
seq_data
,
const
size_t
*
abs_offset
,
const
size_t
&
seq_num
,
const
size_t
&
max_seq_len
,
const
size_t
&
seq_width
,
const
PaddingLayout
&
padding_layout
,
bool
norm_by_times
=
false
,
const
T
&
padding_value
=
0
)
{
size_t
padding_idx
=
blockIdx
.
y
;
size_t
seq_start
=
abs_offset
[
padding_idx
];
size_t
seq_len
=
abs_offset
[
padding_idx
+
1
]
-
seq_start
;
T
*
pad_data
,
T
*
seq_data
,
const
size_t
*
seq_offset
,
const
size_t
&
seq_num
,
const
size_t
&
max_seq_len
,
const
size_t
&
seq_width
,
bool
norm_by_times
,
const
T
&
pad_value
,
const
OutputLayout
&
output_layout
)
{
size_t
seq_idx
=
blockIdx
.
y
;
size_t
seq_start
=
seq_offset
[
seq_idx
];
size_t
seq_len
=
seq_offset
[
seq_idx
+
1
]
-
seq_start
;
size_t
seq_idx
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
size_t
seq_
step_
idx
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
size_t
seq_
offset
=
(
seq_start
+
seq
_idx
)
*
seq_width
;
size_t
seq_
data_offset
=
(
seq_start
+
seq_step
_idx
)
*
seq_width
;
size_t
pad
ding
_offset
=
0
;
size_t
pad
_data
_offset
=
0
;
if
(
padding_layout
==
LENGTH_BATCH_WIDTH
)
{
pad
ding_offset
=
(
seq_idx
*
seq_num
+
padding
_idx
)
*
seq_width
;
if
(
output_layout
==
kLengthBatchWidth
)
{
pad
_data_offset
=
(
seq_step_idx
*
seq_num
+
seq
_idx
)
*
seq_width
;
}
else
{
pad
ding_offset
=
(
padding_idx
*
max_seq_len
+
seq
_idx
)
*
seq_width
;
pad
_data_offset
=
(
seq_idx
*
max_seq_len
+
seq_step
_idx
)
*
seq_width
;
}
if
(
seq_idx
<
seq_len
)
{
if
(
seq_
step_
idx
<
seq_len
)
{
T
scale
=
norm_by_times
?
(
1.0
f
/
static_cast
<
T
>
(
seq_len
))
:
1.0
f
;
if
(
Padding
)
{
/* seq
uence -> padding
*/
/* seq
-> pad
*/
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq_width
;
i
+=
blockDim
.
x
)
{
pad
ding_data
[
padding_offset
+
i
]
=
scale
*
seq_data
[
seq
_offset
+
i
];
pad
_data
[
pad_data_offset
+
i
]
=
scale
*
seq_data
[
seq_data
_offset
+
i
];
}
}
else
{
/* pad
ding -> sequence
*/
/* pad
-> seq
*/
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq_width
;
i
+=
blockDim
.
x
)
{
seq_data
[
seq_
offset
+
i
]
=
scale
*
padding_data
[
padding
_offset
+
i
];
seq_data
[
seq_
data_offset
+
i
]
=
scale
*
pad_data
[
pad_data
_offset
+
i
];
}
}
}
else
if
(
seq_idx
<
max_seq_len
)
{
}
else
if
(
seq_
step_
idx
<
max_seq_len
)
{
if
(
Padding
)
{
/* seq
uence -> padding
*/
/* seq
-> pad
*/
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq_width
;
i
+=
blockDim
.
x
)
{
pad
ding_data
[
padding_offset
+
i
]
=
padding
_value
;
pad
_data
[
pad_data_offset
+
i
]
=
pad
_value
;
}
}
}
}
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
,
padding_layout
>
{
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
T
padding_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
seq_tensor
,
lod_level
);
framework
::
Tensor
*
pad_tensor
,
T
pad_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
)
{
CheckLoD
(
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
.
lod
();
auto
&
abs
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
seq_dims
=
seq_tensor
.
dims
();
auto
pad
ding_dims
=
padding
_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
const
int64_t
seq_num
=
abs
_offset
.
size
()
-
1
;
const
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq
_dims
[
0
];
auto
seq_
tensor_
dims
=
seq_tensor
.
dims
();
auto
pad
_tensor_dims
=
pad
_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
seq_offset
);
int64_t
seq_num
=
seq
_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq_tensor
_dims
[
0
];
ValidateShape
(
seq_dims
,
abs_offset
.
back
(),
padding
_dims
,
max_seq_len
,
seq_num
,
seq_width
,
padding
_layout
);
CheckDims
(
seq_tensor_dims
,
seq_offset
.
back
(),
pad_tensor
_dims
,
max_seq_len
,
seq_num
,
seq_width
,
output
_layout
);
if
(
!
norm_by_times
&&
seq_num
==
1UL
)
{
TensorCopy
(
seq_tensor
,
context
.
GetPlace
(),
context
,
pad
ding
_tensor
);
pad
ding_tensor
->
Resize
(
padding
_dims
);
TensorCopy
(
seq_tensor
,
context
.
GetPlace
(),
context
,
pad_tensor
);
pad
_tensor
->
Resize
(
pad_tensor
_dims
);
return
;
}
...
...
@@ -107,37 +107,40 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T, padding_layout> {
dim3
grid
(
grid_dim_x
,
grid_dim_y
);
const
T
*
seq_data
=
seq_tensor
.
data
<
T
>
();
T
*
pad
ding_data
=
padding
_tensor
->
data
<
T
>
();
T
*
pad
_data
=
pad
_tensor
->
data
<
T
>
();
SequencePaddingKernel
<
T
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
pad
ding
_data
,
const_cast
<
T
*>
(
seq_data
),
abs
_offset
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
max_seq_len
,
seq_width
,
padding_layout
,
norm_by_times
,
padding_value
);
pad_data
,
const_cast
<
T
*>
(
seq_data
),
seq
_offset
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
max_seq_len
,
seq_width
,
norm_by_times
,
pad_value
,
output_layout
);
}
};
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
,
padding_layout
>
{
template
<
typename
T
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
padding_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
*
seq_tensor
,
lod_level
);
const
framework
::
Tensor
&
pad_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
)
{
CheckLoD
(
*
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
->
lod
();
auto
&
abs
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq
_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
seq_dims
=
seq_tensor
->
dims
();
auto
padding_dims
=
padding_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_dims
[
0
];
auto
seq_tensor_dims
=
seq_tensor
->
dims
();
auto
pad_tensor_dims
=
pad_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
seq_offset
);
int64_t
seq_num
=
seq_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
seq_offset
.
back
(),
pad_tensor_dims
,
max_seq_len
,
seq_num
,
seq_width
,
output_layout
);
if
(
!
norm_by_times
&&
seq_num
==
1UL
)
{
TensorCopy
(
pad
ding
_tensor
,
context
.
GetPlace
(),
context
,
seq_tensor
);
seq_tensor
->
Resize
(
seq_dims
);
TensorCopy
(
pad_tensor
,
context
.
GetPlace
(),
context
,
seq_tensor
);
seq_tensor
->
Resize
(
seq_
tensor_
dims
);
return
;
}
...
...
@@ -155,20 +158,25 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T,
size_t
grid_dim_y
=
seq_num
;
dim3
grid
(
grid_dim_x
,
grid_dim_y
);
const
T
*
pad
ding_data
=
padding
_tensor
.
data
<
T
>
();
const
T
*
pad
_data
=
pad
_tensor
.
data
<
T
>
();
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
SequencePaddingKernel
<
T
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
pad
ding
_data
),
seq_data
,
abs
_offset
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
max_seq_len
,
seq_width
,
padding_layout
,
norm_by_times
);
SequencePaddingKernel
<
T
,
0
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
pad_data
),
seq_data
,
seq
_offset
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
max_seq_len
,
seq_width
,
norm_by_times
,
static_cast
<
T
>
(
0
),
output_layout
);
}
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int64_t
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
10ec329b
...
...
@@ -22,49 +22,46 @@ namespace paddle {
namespace
operators
{
namespace
math
{
enum
PaddingLayout
{
BATCH_LENGTH_WIDTH
,
LENGTH_BATCH_WIDTH
};
enum
OutputLayout
{
kBatchLengthWidth
=
0
,
kLengthBatchWidth
};
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
LoD
&
lod
,
const
size_t
level
)
{
const
size_t
seq_num
=
lod
[
level
]
.
size
()
-
1
;
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
Vector
<
size_t
>&
seq_offset
)
{
size_t
seq_num
=
seq_offset
.
size
()
-
1
;
size_t
max_seq_len
=
0
;
auto
abs_offset
=
framework
::
ToAbsOffset
(
lod
)[
level
];
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
max_seq_len
=
std
::
max
(
max_seq_len
,
abs_offset
[
i
+
1
]
-
abs
_offset
[
i
]);
max_seq_len
=
std
::
max
(
max_seq_len
,
seq_offset
[
i
+
1
]
-
seq
_offset
[
i
]);
}
return
max_seq_len
;
}
inline
static
void
Validate
LoD
(
const
framework
::
LoDTensor
&
seq_tensor
,
const
size_t
&
lod_level
)
{
inline
static
void
Check
LoD
(
const
framework
::
LoDTensor
&
seq_tensor
,
const
size_t
&
lod_level
)
{
PADDLE_ENFORCE
(
lod_level
<
seq_tensor
.
lod
().
size
(),
"Invalid
`lod_level`
which should be at least 0 and less "
"than maximum lod level of
`seq_tensor`
."
);
"Invalid
lod level
which should be at least 0 and less "
"than maximum lod level of
sequence tensor
."
);
}
inline
static
void
ValidateShape
(
const
framework
::
DDim
&
seq_tensor_dims
,
const
size_t
&
abs_offset_back_value
,
const
framework
::
DDim
&
padding_tensor_dims
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_num
,
const
int64_t
&
seq_width
,
const
PaddingLayout
&
padding_layout
)
{
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
seq_tensor_dims
[
0
]),
abs_offset_back_value
,
"The 1st dimension of `seq_tensor` should be equal to "
"sum of lengths of all sequences."
);
inline
static
void
CheckDims
(
const
framework
::
DDim
&
seq_tensor_dims
,
const
size_t
&
last_offset
,
const
framework
::
DDim
&
pad_tensor_dims
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_num
,
const
int64_t
&
seq_width
,
const
OutputLayout
&
output_layout
)
{
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
seq_tensor_dims
[
0
]),
last_offset
,
"Value of 1st dimension of the sequence tensor should be "
"equal to sum of lengths of all sequences."
);
PADDLE_ENFORCE_EQ
(
pad
ding
_tensor_dims
.
size
(),
3UL
,
"
`padding_tensor`
should be a 3-D tensor."
);
PADDLE_ENFORCE_EQ
(
pad_tensor_dims
.
size
(),
3UL
,
"
Padded tensor
should be a 3-D tensor."
);
if
(
padding_layout
==
BATCH_LENGTH_WIDTH
)
{
PADDLE_ENFORCE_EQ
(
pad
ding
_tensor_dims
,
if
(
output_layout
==
kBatchLengthWidth
)
{
PADDLE_ENFORCE_EQ
(
pad_tensor_dims
,
framework
::
make_ddim
({
seq_num
,
max_seq_len
,
seq_width
}));
}
else
if
(
padding_layout
==
LENGTH_BATCH_WIDTH
)
{
PADDLE_ENFORCE_EQ
(
pad
ding
_tensor_dims
,
}
else
if
(
output_layout
==
kLengthBatchWidth
)
{
PADDLE_ENFORCE_EQ
(
pad_tensor_dims
,
framework
::
make_ddim
({
max_seq_len
,
seq_num
,
seq_width
}));
}
else
{
PADDLE_THROW
(
"Unsupported
padding
layout."
);
PADDLE_THROW
(
"Unsupported
output
layout."
);
}
}
...
...
@@ -94,23 +91,25 @@ inline static void ValidateShape(const framework::DDim& seq_tensor_dims,
*
* \note transposition is also done in this functor.
*/
template
<
typename
DeviceContext
,
typename
T
,
PaddingLayout
padding_layout
>
template
<
typename
DeviceContext
,
typename
T
>
class
PaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
T
padding_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
);
framework
::
Tensor
*
pad_tensor
,
T
pad_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
);
};
template
<
typename
DeviceContext
,
typename
T
,
PaddingLayout
padding_layout
>
template
<
typename
DeviceContext
,
typename
T
>
class
UnpaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
padding_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
);
const
framework
::
Tensor
&
pad_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
);
};
}
// namespace math
...
...
paddle/fluid/operators/math/sequence_padding_test.cc
浏览文件 @
10ec329b
...
...
@@ -46,20 +46,24 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
}
const
size_t
max_sequence_length
=
paddle
::
operators
::
math
::
MaximumSequenceLength
(
lod
,
level
);
paddle
::
operators
::
math
::
MaximumSequenceLength
(
lod
[
level
]
);
const
size_t
num_sequences
=
lod
[
level
].
size
()
-
1
;
auto
padding_dims
=
paddle
::
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
max_sequence_length
),
static_cast
<
int64_t
>
(
num_sequences
),
static_cast
<
int64_t
>
(
sequence_width
)});
padding
.
mutable_data
<
T
>
(
padding_dims
,
*
place
);
paddle
::
operators
::
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
*
context
,
seq
,
&
padding
,
false
);
*
context
,
seq
,
&
padding
,
0
,
false
,
0
,
paddle
::
operators
::
math
::
kLengthBatchWidth
);
seq_back
.
set_lod
(
lod
);
seq_back
.
mutable_data
<
T
>
(
seq_dims
,
*
place
);
paddle
::
operators
::
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
*
context
,
&
seq_back
,
padding
,
false
);
*
context
,
&
seq_back
,
padding
,
false
,
0
,
paddle
::
operators
::
math
::
kLengthBatchWidth
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
cpu_seq_back
=
seq_back
;
...
...
paddle/fluid/operators/sequence_pad_op.cc
浏览文件 @
10ec329b
...
...
@@ -54,7 +54,7 @@ class SequencePadOp : public framework::OperatorWithKernel {
seq_num
=
x_abs_offset
.
size
()
-
1
;
for
(
size
_t
i
=
1
;
i
<=
seq_num
;
++
i
)
{
for
(
int64
_t
i
=
1
;
i
<=
seq_num
;
++
i
)
{
int64_t
seq_len
=
x_abs_offset
[
i
]
-
x_abs_offset
[
i
-
1
];
max_len
=
max_len
<
seq_len
?
seq_len
:
max_len
;
}
...
...
paddle/fluid/operators/warpctc_op.h
浏览文件 @
10ec329b
...
...
@@ -155,15 +155,16 @@ class WarpCTCKernel : public framework::OpKernel<T> {
// warpctc needs sequences data stored in transposed padding format
Tensor
warpctc_logits
;
const
size_t
max_sequence_length
=
math
::
MaximumSequenceLength
(
logits_lod
,
level
);
math
::
MaximumSequenceLength
(
logits_lod
[
level
]
);
auto
warpctc_logits_dims
=
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
max_sequence_length
),
static_cast
<
int64_t
>
(
num_sequences
),
static_cast
<
int64_t
>
(
sequence_width
)});
warpctc_logits
.
mutable_data
<
T
>
(
warpctc_logits_dims
,
ctx
.
GetPlace
());
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
,
math
::
LENGTH_BATCH_WIDTH
>
()(
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
logits
,
&
warpctc_logits
,
false
);
static_cast
<
T
>
(
0
),
false
/* norm_by_times */
,
0
,
math
::
kLengthBatchWidth
);
const
T
*
warpctc_logits_data
=
warpctc_logits
.
data
<
T
>
();
std
::
vector
<
int
>
warpctc_label_lengths
(
num_sequences
);
...
...
@@ -216,10 +217,9 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
logits_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bool
norm_by_times
=
ctx
.
Attr
<
bool
>
(
"norm_by_times"
);
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
,
math
::
LENGTH_BATCH_WIDTH
>
()(
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
logits_grad
,
*
warpctc_grad
,
norm_by_times
);
*
warpctc_grad
,
norm_by_times
,
0
,
math
::
kLengthBatchWidth
);
const
T
*
loss_grad_data
=
loss_grad
->
data
<
T
>
();
math
::
ScaleLoDTensorFunctor
<
DeviceContext
,
T
>
()(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录