Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
17152510
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
17152510
编写于
8月 16, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update CPU sequence_padding functor
上级
8b9938ac
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
108 addition
and
113 deletion
+108
-113
paddle/fluid/operators/math/sequence_padding.cc
paddle/fluid/operators/math/sequence_padding.cc
+78
-71
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+22
-34
paddle/fluid/operators/math/sequence_padding_test.cc
paddle/fluid/operators/math/sequence_padding_test.cc
+3
-3
paddle/fluid/operators/warpctc_op.h
paddle/fluid/operators/warpctc_op.h
+5
-5
未找到文件。
paddle/fluid/operators/math/sequence_padding.cc
浏览文件 @
17152510
...
...
@@ -18,37 +18,45 @@ namespace paddle {
namespace
operators
{
namespace
math
{
enum
CopyType
{
kSeqToPad
,
kPadToSeq
};
template
<
typename
T
>
void
CopyDataCPU
(
framework
::
LoDTensor
*
seq_tensor
,
framework
::
Tensor
*
pad_tensor
,
const
framework
::
Vector
<
size_t
>&
seq_offset
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_width
,
bool
seq_to_pad
,
bool
norm_by_len
,
OutputLayout
output_layout
)
{
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
int64_t
seq_num
=
seq_offset
.
size
()
-
1
;
for
(
int64_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
int64_t
seq_start
=
seq_offset
[
i
];
int64_t
seq_len
=
seq_offset
[
i
+
1
]
-
seq_start
;
T
scale
=
norm_by_len
?
(
1.0
f
/
static_cast
<
T
>
(
seq_len
))
:
1.0
f
;
for
(
int64_t
j
=
0
;
j
<
seq_len
;
++
j
)
{
for
(
int64_t
k
=
0
;
k
<
seq_width
;
++
k
)
{
size_t
pad_data_idx
=
0
;
size_t
seq_data_idx
=
(
seq_start
+
j
)
*
seq_width
+
k
;
if
(
output_layout
==
kBatchLengthWidth
)
{
pad_data_idx
=
(
i
*
max_seq_len
+
j
)
*
seq_width
+
k
;
}
else
{
pad_data_idx
=
(
j
*
seq_num
+
i
)
*
seq_width
+
k
;
}
if
(
seq_to_pad
)
{
pad_data
[
pad_data_idx
]
=
seq_data
[
seq_data_idx
]
*
scale
;
}
else
{
seq_data
[
seq_data_idx
]
=
pad_data
[
pad_data_idx
]
*
scale
;
void
CopyValidData
(
framework
::
Tensor
*
dst_tensor
,
const
framework
::
Tensor
*
src_tensor
,
const
framework
::
Vector
<
size_t
>&
seq_offsets
,
int
pad_seq_len
,
int
step_width
,
bool
norm_by_len
,
CopyType
type
,
PadLayout
layout
)
{
int
seq_num
=
seq_offsets
.
size
()
-
1
;
const
T
*
src_data
=
src_tensor
->
data
<
T
>
();
T
*
dst_data
=
dst_tensor
->
data
<
T
>
();
int
seq_cpy_gap
=
step_width
;
int
pad_cpy_gap
=
layout
==
kBatchLengthWidth
?
step_width
:
seq_num
*
step_width
;
for
(
int
seq_idx
=
0
;
seq_idx
<
seq_num
;
++
seq_idx
)
{
int
valid_seq_len
=
seq_offsets
[
seq_idx
+
1
]
-
seq_offsets
[
seq_idx
];
PADDLE_ENFORCE_GE
(
pad_seq_len
,
valid_seq_len
,
"The padded sequence length can not be less than its original length."
);
int
seq_data_offset
=
seq_offsets
[
seq_idx
]
*
step_width
;
int
pad_data_offset
=
layout
==
kBatchLengthWidth
?
seq_idx
*
pad_seq_len
*
step_width
:
seq_idx
*
step_width
;
float
scale
=
1.0
f
/
static_cast
<
float
>
(
valid_seq_len
);
for
(
int
step_idx
=
0
;
step_idx
<
valid_seq_len
;
++
step_idx
)
{
const
T
*
src
=
src_data
+
(
type
==
kSeqToPad
?
seq_data_offset
:
pad_data_offset
);
T
*
dst
=
dst_data
+
(
type
==
kSeqToPad
?
pad_data_offset
:
seq_data_offset
);
memcpy
(
dst
,
src
,
step_width
*
sizeof
(
T
));
if
(
norm_by_len
)
{
for
(
int
i
=
0
;
i
<
step_width
;
++
i
)
{
*
(
dst
+
i
)
*=
scale
;
}
}
seq_data_offset
+=
seq_cpy_gap
;
pad_data_offset
+=
pad_cpy_gap
;
}
}
}
...
...
@@ -58,31 +66,37 @@ class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
pad_tensor
,
T
pad_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
)
{
CheckLoD
(
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
.
lod
();
auto
&
seq_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
framework
::
LoDTensor
*
pad_tensor
,
std
::
vector
<
T
>
pad_value
=
{
0
},
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_tensor
.
lod
())[
lod_level
];
auto
seq_tensor_dims
=
seq_tensor
.
dims
();
auto
pad_tensor_dims
=
pad_tensor
->
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
seq_offset
);
int64_t
seq_num
=
seq_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
.
numel
()
/
seq_tensor_dims
[
0
];
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
}
int
step_width
=
seq_tensor
.
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
seq_offset
.
back
(),
pad_tensor_dims
,
max_seq_len
,
seq_num
,
seq_width
,
output_layout
);
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
PADDLE_ENFORCE
(
pad_value
.
size
()
==
1
||
static_cast
<
int
>
(
pad_value
.
size
())
==
step_width
,
"The size of 'pad_value' can only be 1 or be equal to the "
"'step_width'."
);
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
if
(
pad_value
.
size
()
==
1
)
{
pad_value
=
std
::
vector
<
T
>
(
step_width
,
pad_value
[
0
]);
}
memset
(
pad_data
,
pad_value
,
max_seq_len
*
seq_num
*
seq_width
*
sizeof
(
T
));
// fill padding value
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
for
(
int
i
=
0
;
i
<
pad_tensor
->
numel
()
/
step_width
;
++
i
)
{
memcpy
(
pad_data
,
pad_value
.
data
(),
step_width
*
sizeof
(
T
));
}
CopyDataCPU
<
T
>
(
const_cast
<
framework
::
LoDTensor
*>
(
&
seq_tensor
),
pad_tensor
,
seq_offset
,
max_seq_len
,
seq_width
,
true
/* seq_to_pad */
,
norm_by_times
,
output_layout
);
CopyValidData
<
T
>
(
pad_tensor
,
&
seq_tensor
,
seq_offsets
,
pad_seq_len
,
step_width
,
norm_by_times
,
kSeqToPad
,
layout
);
}
};
...
...
@@ -90,30 +104,23 @@ template <typename T>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
framework
::
LoDTensor
*
seq_tensor
,
const
framework
::
Tensor
&
pad_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_layout
=
kBatchLengthWidth
)
{
CheckLoD
(
*
seq_tensor
,
lod_level
);
auto
&
lod
=
seq_tensor
->
lod
();
auto
&
seq_offset
=
framework
::
ToAbsOffset
(
lod
)[
lod_level
];
auto
&
seq_tensor_dims
=
seq_tensor
->
dims
();
auto
&
pad_tensor_dims
=
pad_tensor
.
dims
();
int64_t
max_seq_len
=
MaximumSequenceLength
(
seq_offset
);
int64_t
seq_num
=
seq_offset
.
size
()
-
1
;
int64_t
seq_width
=
seq_tensor
->
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
seq_offset
.
back
(),
pad_tensor_dims
,
max_seq_len
,
seq_num
,
seq_width
,
output_layout
);
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
memset
(
seq_data
,
static_cast
<
T
>
(
0
),
seq_tensor
->
numel
()
*
sizeof
(
T
));
CopyDataCPU
<
T
>
(
seq_tensor
,
const_cast
<
framework
::
Tensor
*>
(
&
pad_tensor
),
seq_offset
,
max_seq_len
,
seq_width
,
false
/* seq_to_pad */
,
norm_by_times
,
output_layout
);
const
framework
::
LoDTensor
&
pad_tensor
,
framework
::
LoDTensor
*
seq_tensor
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
&
layout
=
kBatchLengthWidth
)
{
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_tensor
->
lod
())[
lod_level
];
auto
seq_tensor_dims
=
seq_tensor
->
dims
();
auto
pad_tensor_dims
=
pad_tensor
.
dims
();
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
}
int
step_width
=
seq_tensor
->
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
CopyValidData
<
T
>
(
seq_tensor
,
&
pad_tensor
,
seq_offsets
,
pad_seq_len
,
step_width
,
norm_by_times
,
kPadToSeq
,
layout
);
}
};
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
17152510
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/device_context.h"
...
...
@@ -22,7 +23,7 @@ namespace paddle {
namespace
operators
{
namespace
math
{
enum
Output
Layout
{
kBatchLengthWidth
=
0
,
kLengthBatchWidth
};
enum
Pad
Layout
{
kBatchLengthWidth
=
0
,
kLengthBatchWidth
};
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
Vector
<
size_t
>&
seq_offset
)
{
...
...
@@ -34,35 +35,22 @@ inline static size_t MaximumSequenceLength(
return
max_seq_len
;
}
inline
static
void
CheckLoD
(
const
framework
::
LoDTensor
&
seq_tensor
,
const
size_t
&
lod_level
)
{
PADDLE_ENFORCE
(
lod_level
<
seq_tensor
.
lod
().
size
(),
"Invalid lod level which should be at least 0 and less "
"than maximum lod level of sequence tensor."
);
}
inline
static
void
CheckDims
(
const
framework
::
DDim
&
seq_tensor_dims
,
const
size_t
&
last_offset
,
const
framework
::
DDim
&
pad_tensor_dims
,
const
int64_t
&
max_seq_len
,
const
int64_t
&
seq_num
,
const
int64_t
&
seq
_width
,
const
OutputLayout
&
output_
layout
)
{
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
seq_tensor_dims
[
0
]),
last_offset
,
const
framework
::
Vector
<
size_t
>&
seq_offset
,
int64_t
padded_seq_len
,
int64_t
step
_width
,
const
PadLayout
&
layout
)
{
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
seq_tensor_dims
[
0
]),
seq_offset
.
back
()
,
"Value of 1st dimension of the sequence tensor should be "
"equal to sum of lengths of all sequences."
);
PADDLE_ENFORCE
_EQ
(
pad_tensor_dims
.
size
(),
3UL
,
"Padded tensor should be a 3-D tensor
."
);
PADDLE_ENFORCE
(
seq_tensor_dims
.
size
()
==
1
||
seq_tensor_dims
.
size
()
==
2
,
"seq_tensor's rank should be 1 or 2
."
);
if
(
output_layout
==
kBatchLengthWidth
)
{
PADDLE_ENFORCE_EQ
(
pad_tensor_dims
,
framework
::
make_ddim
({
seq_num
,
max_seq_len
,
seq_width
}));
}
else
if
(
output_layout
==
kLengthBatchWidth
)
{
PADDLE_ENFORCE_EQ
(
pad_tensor_dims
,
framework
::
make_ddim
({
max_seq_len
,
seq_num
,
seq_width
}));
}
else
{
PADDLE_THROW
(
"Unsupported output layout."
);
}
PADDLE_ENFORCE
(
seq_tensor_dims
.
size
()
+
1
==
pad_tensor_dims
.
size
()
||
seq_tensor_dims
.
size
()
==
pad_tensor_dims
.
size
(),
"pad_tensor's rank should be 1 greater than seq_tensor's "
"rank, or be equal with it."
);
}
/*
...
...
@@ -94,22 +82,22 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
template
<
typename
DeviceContext
,
typename
T
>
class
PaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
platform
::
CPU
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
Tensor
*
pad_tensor
,
T
pad_value
=
static_cast
<
T
>
(
0
),
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_
layout
=
kBatchLengthWidth
);
framework
::
LoD
Tensor
*
pad_tensor
,
std
::
vector
<
T
>
pad_value
=
{
0
},
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
UnpaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
framework
::
LoDTensor
*
seq
_tensor
,
const
framework
::
Tensor
&
pad_tensor
,
bool
norm_by_times
=
false
,
size_t
lod_level
=
0
,
OutputLayout
output_
layout
=
kBatchLengthWidth
);
void
operator
()(
const
platform
::
CPU
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
pad
_tensor
,
framework
::
LoDTensor
*
seq_tensor
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
&
layout
=
kBatchLengthWidth
);
};
}
// namespace math
...
...
paddle/fluid/operators/math/sequence_padding_test.cc
浏览文件 @
17152510
...
...
@@ -23,7 +23,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
paddle
::
framework
::
LoDTensor
cpu_seq_back
;
paddle
::
framework
::
LoDTensor
seq
;
paddle
::
framework
::
LoDTensor
seq_back
;
paddle
::
framework
::
Tensor
padding
;
paddle
::
framework
::
LoD
Tensor
padding
;
const
size_t
level
=
lod
.
size
()
-
1
;
auto
seq_dims
=
...
...
@@ -56,13 +56,13 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
padding
.
mutable_data
<
T
>
(
padding_dims
,
*
place
);
paddle
::
operators
::
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
*
context
,
seq
,
&
padding
,
0
,
false
,
0
,
*
context
,
seq
,
&
padding
,
{
0
},
-
1
,
0
,
false
,
paddle
::
operators
::
math
::
kLengthBatchWidth
);
seq_back
.
set_lod
(
lod
);
seq_back
.
mutable_data
<
T
>
(
seq_dims
,
*
place
);
paddle
::
operators
::
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
*
context
,
&
seq_back
,
padding
,
false
,
0
,
*
context
,
padding
,
&
seq_back
,
-
1
,
0
,
false
,
paddle
::
operators
::
math
::
kLengthBatchWidth
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
...
...
paddle/fluid/operators/warpctc_op.h
浏览文件 @
17152510
...
...
@@ -153,7 +153,7 @@ class WarpCTCKernel : public framework::OpKernel<T> {
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
num_sequences
),
1
});
// warpctc needs sequences data stored in transposed padding format
Tensor
warpctc_logits
;
LoD
Tensor
warpctc_logits
;
const
size_t
max_sequence_length
=
math
::
MaximumSequenceLength
(
logits_lod
[
level
]);
auto
warpctc_logits_dims
=
...
...
@@ -163,7 +163,7 @@ class WarpCTCKernel : public framework::OpKernel<T> {
warpctc_logits
.
mutable_data
<
T
>
(
warpctc_logits_dims
,
ctx
.
GetPlace
());
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
logits
,
&
warpctc_logits
,
static_cast
<
T
>
(
0
),
false
/* norm_by_times */
,
0
,
{
static_cast
<
T
>
(
0
)},
-
1
,
0
,
false
/* norm_by_times */
,
math
::
kLengthBatchWidth
);
const
T
*
warpctc_logits_data
=
warpctc_logits
.
data
<
T
>
();
...
...
@@ -210,15 +210,15 @@ template <typename DeviceContext, typename T>
class
WarpCTCGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
warpctc_grad
=
ctx
.
Input
<
Tensor
>
(
"WarpCTCGrad"
);
auto
*
warpctc_grad
=
ctx
.
Input
<
LoD
Tensor
>
(
"WarpCTCGrad"
);
auto
*
logits_grad
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"Logits"
));
const
Tensor
*
loss_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Loss"
));
logits_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bool
norm_by_times
=
ctx
.
Attr
<
bool
>
(
"norm_by_times"
);
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
logits
_grad
,
*
warpctc_grad
,
norm_by_times
,
0
,
math
::
kLengthBatchWidth
);
ctx
.
template
device_context
<
DeviceContext
>(),
*
warpctc
_grad
,
logits_grad
,
-
1
,
0
,
norm_by_times
,
math
::
kLengthBatchWidth
);
const
T
*
loss_grad_data
=
loss_grad
->
data
<
T
>
();
math
::
ScaleLoDTensorFunctor
<
DeviceContext
,
T
>
()(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录