Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5d901416
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5d901416
编写于
5月 11, 2018
作者:
Y
yangyaming
提交者:
fengjiayi
8月 31, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Enhance sequence_padding functor (CPU and GPU).
上级
fe70c69f
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
330 addition
and
316 deletion
+330
-316
paddle/fluid/operators/math/sequence_padding.cc
paddle/fluid/operators/math/sequence_padding.cc
+93
-110
paddle/fluid/operators/math/sequence_padding.cu
paddle/fluid/operators/math/sequence_padding.cu
+95
-136
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+52
-14
paddle/fluid/operators/sequence_pad_op.cc
paddle/fluid/operators/sequence_pad_op.cc
+25
-15
paddle/fluid/operators/sequence_pad_op.h
paddle/fluid/operators/sequence_pad_op.h
+62
-39
paddle/fluid/operators/warpctc_op.h
paddle/fluid/operators/warpctc_op.h
+3
-2
未找到文件。
paddle/fluid/operators/math/sequence_padding.cc
浏览文件 @
5d901416
...
...
@@ -18,128 +18,111 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq
,
framework
::
Tensor
*
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
.
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The LoD of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
.
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
->
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequence_length, num_sequences, sequence_width]."
);
const
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be the "
"maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be the "
"number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
.
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
const
T
*
seq_data
=
seq
.
data
<
T
>
();
T
*
padding_data
=
padding
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
max_sequence_length
;
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
num_sequences
;
++
j
)
{
int64_t
start_pos
=
abs_offset_lod
[
level
][
j
];
int64_t
sequence_length
=
abs_offset_lod
[
level
][
j
+
1
]
-
start_pos
;
if
(
i
<
sequence_length
)
{
// i > 0 => sequence_length > 0
T
scale
=
norm_by_times
?
(
1.0
f
/
static_cast
<
T
>
(
sequence_length
))
:
1.0
f
;
for
(
int64_t
k
=
0
;
k
<
sequence_width
;
++
k
)
{
padding_data
[(
i
*
num_sequences
+
j
)
*
sequence_width
+
k
]
=
seq_data
[(
start_pos
+
i
)
*
sequence_width
+
k
]
*
scale
;
}
template
<
typename
T
,
PaddingLayout
padding_layout
>
void
CopyDataCPU
(
framework
::
LoDTensor
*
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
const
framework
::
Vector
<
size_t
>&
abs_offset
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_width
,
bool
seq_to_padding
,
bool
norm_by_len
)
{
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
T
*
padding_data
=
padding_tensor
->
data
<
T
>
();
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
for
(
int64_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
int64_t
seq_start
=
abs_offset
[
i
];
int64_t
seq_len
=
abs_offset
[
i
+
1
]
-
seq_start
;
T
scale
=
norm_by_len
?
(
1.0
f
/
static_cast
<
T
>
(
seq_len
))
:
1.0
f
;
for
(
int64_t
j
=
0
;
j
<
seq_len
;
++
j
)
{
for
(
int64_t
k
=
0
;
k
<
seq_width
;
++
k
)
{
size_t
padding_offset
=
0
;
if
(
padding_layout
==
BATCH_LENGTH_WIDTH
)
{
padding_offset
=
(
i
*
max_seq_len
*
seq_width
)
+
j
*
seq_width
+
k
;
}
else
{
padding_offset
=
(
j
*
seq_num
*
seq_width
)
+
i
*
seq_width
+
k
;
}
if
(
seq_to_padding
)
{
padding_data
[
padding_offset
]
=
seq_data
[(
seq_start
+
j
)
*
seq_width
+
k
]
*
scale
;
}
else
{
memset
(
padding_data
+
(
i
*
num_sequences
+
j
)
*
sequence_width
,
0
,
sequence_width
*
sizeof
(
T
))
;
seq_data
[(
seq_start
+
j
)
*
seq_width
+
k
]
=
padding_data
[
padding_offset
]
*
scale
;
}
}
}
}
}
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
,
padding_layout
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
T
padding_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
.
lod
();
auto
&
abs_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
seq_dims
=
seq_tensor
.
dims
();
auto
padding_dims
=
padding_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq_dims
[
0
];
int64_t
numel
=
max_seq_len
*
seq_num
*
seq_width
;
ValidateShape
(
seq_dims
,
abs_offset
.
back
(),
padding_dims
,
max_seq_len
,
seq_num
,
seq_width
,
padding_layout
);
T
*
padding_data
=
padding_tensor
->
data
<
T
>
();
memset
(
padding_data
,
padding_value
,
numel
*
sizeof
(
T
));
CopyDataCPU
<
T
,
padding_layout
>
(
const_cast
<
framework
::
LoDTensor
*>
(
&
seq_tensor
),
padding_tensor
,
abs_offset
,
max_seq_len
,
seq_width
,
true
/* seq_to_padding */
,
norm_by_times
);
}
};
template
<
typename
T
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
,
padding_layout
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
framework
::
LoDTensor
*
seq
,
const
framework
::
Tensor
&
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
->
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The LoD of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
->
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
.
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequnece_length, num_sequences, sequence_width]."
);
const
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be "
"the maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be "
"the number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
->
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
const
T
*
padding_data
=
padding
.
data
<
T
>
();
T
*
seq_data
=
seq
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
int64_t
start_pos
=
abs_offset_lod
[
level
][
i
];
int64_t
sequence_length
=
abs_offset_lod
[
level
][
i
+
1
]
-
start_pos
;
for
(
int64_t
j
=
0
;
j
<
sequence_length
;
++
j
)
{
// sequence_width > j > 0
T
scale
=
norm_by_times
?
(
1.0
f
/
static_cast
<
T
>
(
sequence_length
))
:
1.0
f
;
for
(
int64_t
k
=
0
;
k
<
sequence_width
;
++
k
)
{
seq_data
[(
start_pos
+
j
)
*
sequence_width
+
k
]
=
padding_data
[(
j
*
num_sequences
+
i
)
*
sequence_width
+
k
]
*
scale
;
}
}
}
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
padding_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
*
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
->
lod
();
auto
&
abs_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq_dims
=
seq_tensor
->
dims
();
auto
&
padding_dims
=
padding_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_dims
[
0
];
ValidateShape
(
seq_dims
,
abs_offset
.
back
(),
padding_dims
,
max_seq_len
,
seq_num
,
seq_width
,
padding_layout
);
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
memset
(
seq_data
,
static_cast
<
T
>
(
0
),
seq_tensor
->
numel
()
*
sizeof
(
T
));
CopyDataCPU
<
T
,
padding_layout
>
(
seq_tensor
,
const_cast
<
framework
::
Tensor
*>
(
&
padding_tensor
),
abs_offset
,
max_seq_len
,
seq_width
,
false
/* seq_to_padding */
,
norm_by_times
);
}
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sequence_padding.cu
浏览文件 @
5d901416
...
...
@@ -19,87 +19,76 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
T
,
bool
NormByTimes
,
bool
Padding
>
__global__
void
SequencePaddingKernel
(
T
*
padding
,
T
*
sequence
,
const
size_t
*
sequence_start_positions
,
const
size_t
sequence
_width
,
const
size_t
max_sequence_length
,
const
size_t
num_sequences
)
{
template
<
typename
T
,
bool
Padding
>
__global__
void
SequencePaddingKernel
(
T
*
padding_data
,
T
*
seq_data
,
const
size_t
*
abs_offset
,
const
size_t
&
seq_num
,
const
size_t
&
max_seq_len
,
const
size_t
&
seq
_width
,
const
PaddingLayout
&
padding_layout
,
bool
norm_by_times
=
false
,
const
T
&
padding_value
=
0
)
{
size_t
padding_idx
=
blockIdx
.
y
;
size_t
start_pos
=
sequence_start_positions
[
padding_idx
];
size_t
sequence_length
=
sequence_start_positions
[
padding_idx
+
1
]
-
start_pos
;
size_t
seq_start
=
abs_offset
[
padding_idx
];
size_t
seq_len
=
abs_offset
[
padding_idx
+
1
]
-
seq_start
;
size_t
sequence_idx
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
size_t
padding_base_idx
=
(
sequence_idx
*
num_sequences
+
padding_idx
)
*
sequence_width
;
size_t
sequence_base_idx
=
(
start_pos
+
sequence_idx
)
*
sequence_width
;
size_t
seq_idx
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
sequence_idx
<
sequence_length
)
{
T
scale
=
NormByTimes
?
(
1.0
f
/
static_cast
<
T
>
(
sequence_length
))
:
1.0
f
;
size_t
seq_offset
=
(
seq_start
+
seq_idx
)
*
seq_width
;
size_t
padding_offset
=
0
;
if
(
padding_layout
==
LENGTH_BATCH_WIDTH
)
{
padding_offset
=
(
seq_idx
*
seq_num
+
padding_idx
)
*
seq_width
;
}
else
{
padding_offset
=
(
padding_idx
*
max_seq_len
+
seq_idx
)
*
seq_width
;
}
if
(
seq_idx
<
seq_len
)
{
T
scale
=
norm_by_times
?
(
1.0
f
/
static_cast
<
T
>
(
seq_len
))
:
1.0
f
;
if
(
Padding
)
{
/* sequence -> padding */
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq
uence
_width
;
i
+=
blockDim
.
x
)
{
padding
[
padding_base_idx
+
i
]
=
scale
*
sequence
[
sequence_base_idx
+
i
];
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq_width
;
i
+=
blockDim
.
x
)
{
padding
_data
[
padding_offset
+
i
]
=
scale
*
seq_data
[
seq_offset
+
i
];
}
}
else
{
/* padding -> sequence */
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq
uence
_width
;
i
+=
blockDim
.
x
)
{
seq
uence
[
sequence_base_idx
+
i
]
=
scale
*
padding
[
padding_base_idx
+
i
];
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq_width
;
i
+=
blockDim
.
x
)
{
seq
_data
[
seq_offset
+
i
]
=
scale
*
padding_data
[
padding_offset
+
i
];
}
}
}
else
if
(
seq
uence_idx
<
max_sequence_length
)
{
}
else
if
(
seq
_idx
<
max_seq_len
)
{
if
(
Padding
)
{
/* sequence -> padding */
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq
uence
_width
;
i
+=
blockDim
.
x
)
{
padding
[
padding_base_idx
+
i
]
=
0
;
for
(
size_t
i
=
threadIdx
.
x
;
i
<
seq_width
;
i
+=
blockDim
.
x
)
{
padding
_data
[
padding_offset
+
i
]
=
padding_value
;
}
}
}
}
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
,
padding_layout
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq
,
framework
::
Tensor
*
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
.
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The lod of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
.
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
->
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequence_length, num_sequences, sequence_width]."
);
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be the "
"maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be the "
"number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
.
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
if
(
!
norm_by_times
&&
num_sequences
==
1UL
)
{
TensorCopy
(
seq
,
context
.
GetPlace
(),
context
,
padding
);
padding
->
Resize
(
padding_dims
);
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
T
padding_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
.
lod
();
auto
&
abs_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
seq_dims
=
seq_tensor
.
dims
();
auto
padding_dims
=
padding_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
const
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
const
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq_dims
[
0
];
ValidateShape
(
seq_dims
,
abs_offset
.
back
(),
padding_dims
,
max_seq_len
,
seq_num
,
seq_width
,
padding_layout
);
if
(
!
norm_by_times
&&
seq_num
==
1UL
)
{
TensorCopy
(
seq_tensor
,
context
.
GetPlace
(),
context
,
padding_tensor
);
padding_tensor
->
Resize
(
padding_dims
);
return
;
}
...
...
@@ -109,72 +98,46 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
* and at least 8 elements for each thread.
*/
size_t
block_dim_x
=
std
::
min
(((((
seq
uence
_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
std
::
min
(((((
seq_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
size_t
block_dim_y
=
kBlockSize
/
block_dim_x
;
dim3
threads
(
block_dim_x
,
block_dim_y
);
size_t
grid_dim_x
=
(
max_seq
uence_length
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
num_sequences
;
size_t
grid_dim_x
=
(
max_seq
_len
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
seq_num
;
dim3
grid
(
grid_dim_x
,
grid_dim_y
);
const
T
*
seq_data
=
seq
.
data
<
T
>
();
T
*
padding_data
=
padding
->
data
<
T
>
();
if
(
norm_by_times
)
{
SequencePaddingKernel
<
T
,
1
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
padding_data
,
const_cast
<
T
*>
(
seq_data
),
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
else
{
SequencePaddingKernel
<
T
,
0
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
padding_data
,
const_cast
<
T
*>
(
seq_data
),
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
const
T
*
seq_data
=
seq_tensor
.
data
<
T
>
();
T
*
padding_data
=
padding_tensor
->
data
<
T
>
();
SequencePaddingKernel
<
T
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
padding_data
,
const_cast
<
T
*>
(
seq_data
),
abs_offset
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
max_seq_len
,
seq_width
,
padding_layout
,
norm_by_times
,
padding_value
);
}
};
template
<
typename
T
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
template
<
typename
T
,
PaddingLayout
padding_layout
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
,
padding_layout
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
framework
::
LoDTensor
*
seq
,
const
framework
::
Tensor
&
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
->
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The lod of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
->
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
.
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequnece_length, num_sequences, sequence_width]."
);
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be "
"the maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be "
"the number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
->
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
if
(
!
norm_by_times
&&
num_sequences
==
1UL
)
{
TensorCopy
(
padding
,
context
.
GetPlace
(),
context
,
seq
);
seq
->
Resize
(
seq_dims
);
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
padding_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
)
{
ValidateLoD
(
*
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
->
lod
();
auto
&
abs_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
seq_dims
=
seq_tensor
->
dims
();
auto
padding_dims
=
padding_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
lod
,
lod_level
);
int64_t
seq_num
=
abs_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_dims
[
0
];
if
(
!
norm_by_times
&&
seq_num
==
1UL
)
{
TensorCopy
(
padding_tensor
,
context
.
GetPlace
(),
context
,
seq_tensor
);
seq_tensor
->
Resize
(
seq_dims
);
return
;
}
...
...
@@ -184,32 +147,28 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
* and at least 8 elements for each thread.
*/
size_t
block_dim_x
=
std
::
min
(((((
seq
uence
_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
std
::
min
(((((
seq_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
size_t
block_dim_y
=
kBlockSize
/
block_dim_x
;
dim3
threads
(
block_dim_x
,
block_dim_y
);
size_t
grid_dim_x
=
(
max_seq
uence_length
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
num_sequences
;
size_t
grid_dim_x
=
(
max_seq
_len
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
seq_num
;
dim3
grid
(
grid_dim_x
,
grid_dim_y
);
const
T
*
padding_data
=
padding
.
data
<
T
>
();
T
*
seq_data
=
seq
->
data
<
T
>
();
if
(
norm_by_times
)
{
SequencePaddingKernel
<
T
,
1
,
0
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
padding_data
),
seq_data
,
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
else
{
SequencePaddingKernel
<
T
,
0
,
0
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
padding_data
),
seq_data
,
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
const
T
*
padding_data
=
padding_tensor
.
data
<
T
>
();
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
SequencePaddingKernel
<
T
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
padding_data
),
seq_data
,
abs_offset
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
max_seq_len
,
seq_width
,
padding_layout
,
norm_by_times
);
}
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
,
LENGTH_BATCH_WIDTH
>;
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
5d901416
...
...
@@ -22,17 +22,50 @@ namespace paddle {
namespace
operators
{
namespace
math
{
enum
PaddingLayout
{
BATCH_LENGTH_WIDTH
,
LENGTH_BATCH_WIDTH
};
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
LoD
&
lod
,
const
size_t
level
)
{
const
size_t
num_sequences
=
lod
[
level
].
size
()
-
1
;
size_t
max_sequence_length
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
for
(
size_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
max_sequence_length
=
std
::
max
(
max_sequence_length
,
abs_offset_lod
[
level
][
i
+
1
]
-
abs_offset_lod
[
level
][
i
]);
const
size_t
seq_num
=
lod
[
level
].
size
()
-
1
;
size_t
max_seq_len
=
0
;
auto
abs_offset
=
framework
::
ToAbsOffset
(
lod
)[
level
];
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
max_seq_len
=
std
::
max
(
max_seq_len
,
abs_offset
[
i
+
1
]
-
abs_offset
[
i
]);
}
return
max_seq_len
;
}
inline
static
void
ValidateLoD
(
const
framework
::
LoDTensor
&
seq_tensor
,
const
size_t
&
lod_level
)
{
PADDLE_ENFORCE
(
lod_level
<
seq_tensor
.
lod
().
size
(),
"Invalid `lod_level` which should be at least 0 and less "
"than maximum lod level of `seq_tensor`."
);
}
inline
static
void
ValidateShape
(
const
framework
::
DDim
&
seq_tensor_dims
,
const
size_t
&
abs_offset_back_value
,
const
framework
::
DDim
&
padding_tensor_dims
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_num
,
const
int64_t
&
seq_width
,
const
PaddingLayout
&
padding_layout
)
{
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
seq_tensor_dims
[
0
]),
abs_offset_back_value
,
"The 1st dimension of `seq_tensor` should be equal to "
"sum of lengths of all sequences."
);
PADDLE_ENFORCE_EQ
(
padding_tensor_dims
.
size
(),
3UL
,
"`padding_tensor` should be a 3-D tensor."
);
if
(
padding_layout
==
BATCH_LENGTH_WIDTH
)
{
PADDLE_ENFORCE_EQ
(
padding_tensor_dims
,
framework
::
make_ddim
({
seq_num
,
max_seq_len
,
seq_width
}));
}
else
if
(
padding_layout
==
LENGTH_BATCH_WIDTH
)
{
PADDLE_ENFORCE_EQ
(
padding_tensor_dims
,
framework
::
make_ddim
({
max_seq_len
,
seq_num
,
seq_width
}));
}
else
{
PADDLE_THROW
(
"Unsupported padding layout."
);
}
return
max_sequence_length
;
}
/*
...
...
@@ -61,18 +94,23 @@ inline static size_t MaximumSequenceLength(const framework::LoD& lod,
*
* \note transposition is also done in this functor.
*/
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
PaddingLayout
padding_layout
>
class
PaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq
,
framework
::
Tensor
*
padding
,
bool
norm_by_times
);
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
padding_tensor
,
T
padding_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
);
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
PaddingLayout
padding_layout
>
class
UnpaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
framework
::
LoDTensor
*
seq
,
const
framework
::
Tensor
&
padding
,
bool
norm_by_times
);
void
operator
()(
const
DeviceContext
&
context
,
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
padding_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
);
};
}
// namespace math
...
...
paddle/fluid/operators/sequence_pad_op.cc
浏览文件 @
5d901416
...
...
@@ -32,7 +32,11 @@ class SequencePadOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
2
,
"Only support 2-D tensor, rank of Input(X) should be 2."
);
auto
out_dims
=
x_dims
;
int
lod_level
=
ctx
->
Attrs
().
Get
<
int
>
(
"lod_level"
);
int64_t
max_len
=
-
1
;
int64_t
seq_num
=
-
1
;
int
x_lod_size
=
-
1
;
if
(
ctx
->
IsRuntime
())
{
framework
::
Variable
*
x_var
=
...
...
@@ -40,27 +44,31 @@ class SequencePadOp : public framework::OperatorWithKernel {
auto
&
x_lod
=
x_var
->
Get
<
LoDTensor
>
().
lod
();
PADDLE_ENFORCE_GE
(
x_lod
.
size
(),
1
,
"Input(X) should be sequences containing lod."
);
x_lod_size
=
x_lod
.
size
();
auto
x_abs_offset
=
framework
::
ToAbsOffset
(
x_lod
)[
lod_level
];
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
static_cast
<
int64_t
>
(
x_abs_offset
.
back
()),
"The first dimension of `X` should be equal to sum "
"of all sequences' length."
);
auto
last_level_lod
=
x_lod
[
x_lod
.
size
()
-
1
];
size_t
max_len
=
0
;
seq_num
=
x_abs_offset
.
size
()
-
1
;
for
(
size_t
i
=
1
;
i
<
last_level_lod
.
size
()
;
++
i
)
{
auto
seq_len
=
last_level_lod
[
i
]
-
last_level_lod
[
i
-
1
];
for
(
size_t
i
=
1
;
i
<
=
seq_num
;
++
i
)
{
int64_t
seq_len
=
x_abs_offset
[
i
]
-
x_abs_offset
[
i
-
1
];
max_len
=
max_len
<
seq_len
?
seq_len
:
max_len
;
}
out_dims
[
0
]
=
max_len
*
(
last_level_lod
.
size
()
-
1
);
}
else
{
framework
::
VarDesc
*
x_desc
=
boost
::
get
<
framework
::
VarDesc
*>
(
ctx
->
GetInputVarPtrs
(
"X"
)[
0
]);
PADDLE_ENFORCE_GE
(
x_desc
->
GetLoDLevel
(),
1
,
"Input(X) should be sequences containing lod."
);
out_dims
[
0
]
=
-
1
;
x_lod_size
=
x_desc
->
GetLoDLevel
();
}
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
PADDLE_ENFORCE
(
lod_level
>=
0
&&
lod_level
<
x_lod_size
,
"Invalid `lod_level` which should be at least 0 and less "
"than maximum lod level of `X`"
);
ctx
->
SetOutputDim
(
"Out"
,
{
seq_num
,
max_len
,
x_dims
[
1
]});
}
protected:
...
...
@@ -84,9 +92,11 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) Output variable which would be a common tensor "
"without lod. Each sequence would be padded to the maximum "
"length."
);
AddAttr
<
float
>
(
"lod_level"
,
"(int, default 0) Specify which level lod to referred to."
);
AddAttr
<
float
>
(
"pad_value"
,
"(float, default 0.0)
Value to be padded
"
"t
o t
he end of each sequence."
);
"(float, default 0.0)
Specify which value to be padded to
"
"the end of each sequence."
);
AddComment
(
R"DOC(
)DOC"
);
...
...
paddle/fluid/operators/sequence_pad_op.h
浏览文件 @
5d901416
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_padding.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -23,39 +24,68 @@ namespace operators {
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoD
=
framework
::
LoD
;
// @TODO clean code
template
<
typename
DeviceContext
,
typename
T
>
struct
CopyFunctor
{
LoDTensor
*
lod_tensor_
;
LoDTensor
*
pad_tensor_
;
const
LoD
&
ref_lod_
;
const
DeviceContext
&
ctx_
;
bool
is_lod_to_pad_
;
CopyFunctor
(
LoDTensor
*
lod_tensor
,
const
LoD
&
ref_lod
,
LoDTensor
*
pad_tensor
,
const
DeviceContext
&
ctx
,
bool
is_lod_to_pad
)
:
lod_tensor_
(
lod_tensor
),
pad_tensor_
(
pad_tensor
),
ref_lod_
(
ref_lod
),
ctx_
(
ctx
),
is_lod_to_pad_
(
is_lod_to_pad
)
{}
void
operator
()()
const
{
/*
auto seq_num = ref_lod_.size() - 1;
auto max_len = pad_tensor_->dims()[0] / seq_num;
PADDLE_ENFORCE_EQ(max_len * seq_num, pad_tensor_->dims()[0],
"First dimension of padded tensor should be equal to "
"maximum sequence length mulplied by sequence number.");
for (size_t i = 1; i < ref_lod_.size(); ++i) {
auto seq_start = ref_lod_[i - 1];
auto seq_end = ref_lod_[i];
auto pad_start = (i - 1) * max_len;
auto pad_end = pad_start + (seq_end - seq_start);
auto sub_lod_tensor = lod_tensor_->Slice(seq_start, seq_end);
auto sub_pad_tensor = pad_tensor_->Slice(pad_start, pad_end);
if (is_lod_to_pad_) {
framework::TensorCopy(sub_lod_tensor, ctx.GetPlace(), &sub_pad_tensor);
} else {
framework::TensorCopy(sub_pad_tensor, ctx.GetPlace(), &sub_lod_tensor);
}
}
*/
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
SequencePadOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x_ptr
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
/*
auto* x = ctx.Input<LoDTensor>("X");
auto* out_ptr = ctx.Output<LoDTensor>("Out");
out_ptr->mutable_data<T>(ctx.GetPlace());
// Resize();
T pad_value = static_cast<T>(ctx.Attr<float>("pad_value"));
math::PaddingLoDTensorFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), *x, *, false);
math::SetConstant<DeviceContext, T> set_func;
set_func(ctx.template device_context<DeviceContext>(), out_ptr, pad_value);
auto
&
x_lod
=
x_ptr
->
lod
();
auto
&
x_last_level_lod
=
x_lod
[
x_lod
.
size
()
-
1
];
auto
seq_num
=
x_last_level_lod
.
size
()
-
1
;
auto
max_len
=
out_ptr
->
dims
()[
0
]
/
seq_num
;
PADDLE_ENFORCE_EQ
(
max_len
*
seq_num
,
out_ptr
->
dims
()[
0
],
"First dimension of `Out` should be equal to "
"maximum length mulplied by sequence number."
);
for
(
size_t
i
=
1
;
i
<
x_last_level_lod
.
size
();
++
i
)
{
auto
x_start
=
x_last_level_lod
[
i
-
1
];
auto
x_end
=
x_last_level_lod
[
i
];
auto
out_start
=
(
i
-
1
)
*
max_len
;
auto
out_end
=
out_start
+
(
x_end
-
x_start
);
auto
x_sub_tensor
=
x_ptr
->
Slice
(
x_start
,
x_end
);
auto
out_sub_tensor
=
out_ptr
->
Slice
(
out_start
,
out_end
);
framework
::
TensorCopy
(
x_sub_tensor
,
ctx
.
GetPlace
(),
&
out_sub_tensor
);
}
*/
}
};
...
...
@@ -63,33 +93,26 @@ template <typename DeviceContext, typename T>
class
SequencePadGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
/*
auto* x_ptr = ctx.Input<LoDTensor>("X");
auto* g_out_ptr = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
auto* g_x_ptr = ctx.Output<LoDTensor>(framework::GradVarName("X"));
math::SetConstant<DeviceContext, T> set_func;
set_func
(
ctx
.
template
device_context
<
DeviceContext
>(),
g_x_ptr
,
set_func(ctx.template device_context<DeviceContext>(),
g_x_ptr,
static_cast<T>(0));
auto& x_lod = x_ptr->lod();
auto& x_last_level_lod = x_lod[x_lod.size() - 1];
auto
seq_num
=
x_last_level_lod
.
size
()
-
1
;
int64_t
max_len
=
g_out_ptr
->
dims
()[
0
]
/
seq_num
;
PADDLE_ENFORCE_EQ
(
max_len
*
seq_num
,
g_out_ptr
->
dims
()[
0
],
"First dimension of `Out` should be equal to "
"maximum length mulplied by sequence number."
);
for
(
size_t
i
=
1
;
i
<
x_last_level_lod
.
size
();
++
i
)
{
auto
x_start
=
x_last_level_lod
[
i
-
1
];
auto
x_end
=
x_last_level_lod
[
i
];
auto
out_start
=
(
i
-
1
)
*
max_len
;
auto
out_end
=
out_start
+
(
x_end
-
x_start
);
auto
g_out_sub
=
g_out_ptr
->
Slice
(
out_start
,
out_end
);
auto
g_x_sub
=
g_x_ptr
->
Slice
(
x_start
,
x_end
);
framework
::
TensorCopy
(
g_x_sub
,
ctx
.
GetPlace
(),
&
g_out_sub
);
}
CopyFunctor copy_func<DeviceContext, T>(g_out_ptr,
x_last_level_lod,
g_x_ptr,
ctx,
false);
copy_func();
*/
}
};
...
...
paddle/fluid/operators/warpctc_op.h
浏览文件 @
5d901416
...
...
@@ -161,7 +161,7 @@ class WarpCTCKernel : public framework::OpKernel<T> {
static_cast
<
int64_t
>
(
num_sequences
),
static_cast
<
int64_t
>
(
sequence_width
)});
warpctc_logits
.
mutable_data
<
T
>
(
warpctc_logits_dims
,
ctx
.
GetPlace
());
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
,
math
::
LENGTH_BATCH_WIDTH
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
logits
,
&
warpctc_logits
,
false
);
const
T
*
warpctc_logits_data
=
warpctc_logits
.
data
<
T
>
();
...
...
@@ -215,7 +215,8 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
logits_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bool
norm_by_times
=
ctx
.
Attr
<
bool
>
(
"norm_by_times"
);
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
,
math
::
LENGTH_BATCH_WIDTH
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
logits_grad
,
*
warpctc_grad
,
norm_by_times
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录