Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ec9bc1bd
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ec9bc1bd
编写于
9月 11, 2019
作者:
T
Tao Luo
提交者:
GitHub
9月 11, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
paddle::framework::vectorize() templatization (#19730)
remove unused accuracy-diff warpctc-cudnn implementation test=develop
上级
aa63d5ac
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
2 addition
and
251 deletion
+2
-251
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+0
-9
paddle/fluid/framework/ddim.h
paddle/fluid/framework/ddim.h
+0
-1
paddle/fluid/operators/warpctc_cudnn_op.cu.cc
paddle/fluid/operators/warpctc_cudnn_op.cu.cc
+0
-197
paddle/fluid/operators/warpctc_op.cc
paddle/fluid/operators/warpctc_op.cc
+0
-18
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-3
python/paddle/fluid/tests/unittests/test_warpctc_op.py
python/paddle/fluid/tests/unittests/test_warpctc_op.py
+1
-22
未找到文件。
paddle/fluid/API.spec
浏览文件 @
ec9bc1bd
...
...
@@ -159,7 +159,7 @@ paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized',
paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', 'c1df110ea65998984f564c5c10abc54a'))
paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', '3720b4a386585094435993deb028b592'))
paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e50940f3ce5a08cc477b72f517491bf3'))
paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', '
use_cudnn', 'input_length', 'label_length'], varargs=None, keywords=None, defaults=(0, False, False, None, None)), ('document', 'ba27f25141adf24706536d179fabdf1
7'))
paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', '
input_length', 'label_length'], varargs=None, keywords=None, defaults=(0, False, None, None)), ('document', 'a5be881ada816e47ea7a6ee4396da35
7'))
paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'f568714a876425004aca4ea2d4a27701'))
paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8e72db173d4c082e27cb11f31d8c9bfa'))
paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', '33134416fc27dd65a767e5f15116ee16'))
...
...
paddle/fluid/framework/ddim.cc
浏览文件 @
ec9bc1bd
...
...
@@ -48,15 +48,6 @@ bool DDim::operator==(const DDim& d) const {
bool
DDim
::
operator
!=
(
const
DDim
&
d
)
const
{
return
!
(
*
this
==
d
);
}
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std
::
vector
<
int
>
vectorize2int
(
const
DDim
&
ddim
)
{
std
::
vector
<
int
>
result
(
DDim
::
kMaxRank
);
dynamic_dim_assign
(
ddim
.
Get
(),
result
.
data
(),
ddim
.
size
());
result
.
resize
(
ddim
.
size
());
return
result
;
}
struct
ProductVisitor
{
template
<
int
D
>
inline
int64_t
operator
()(
const
Dim
<
D
>&
dim
)
{
...
...
paddle/fluid/framework/ddim.h
浏览文件 @
ec9bc1bd
...
...
@@ -177,7 +177,6 @@ std::vector<T> vectorize(const DDim& ddim) {
result
.
resize
(
ddim
.
size
());
return
result
;
}
std
::
vector
<
int
>
vectorize2int
(
const
DDim
&
ddim
);
int64_t
product
(
const
DDim
&
ddim
);
...
...
paddle/fluid/operators/warpctc_cudnn_op.cu.cc
已删除
100644 → 0
浏览文件 @
aa63d5ac
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/warpctc_op.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
#if CUDNN_VERSION >= 7001
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedCTCLossDescriptor
=
platform
::
ScopedCTCLossDescriptor
;
using
DataLayout
=
platform
::
DataLayout
;
template
<
typename
DeviceContext
,
typename
T
>
class
CudnnCTCKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// =====================Copied code from warpctc===========================
auto
*
logits
=
ctx
.
Input
<
LoDTensor
>
(
"Logits"
);
auto
*
label
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
);
auto
*
warpctc_grad
=
ctx
.
Output
<
LoDTensor
>
(
"WarpCTCGrad"
);
auto
*
loss
=
ctx
.
Output
<
LoDTensor
>
(
"Loss"
);
const
size_t
level
=
0
;
auto
logits_lod
=
framework
::
ToAbsOffset
(
logits
->
lod
());
auto
logits_dims
=
logits
->
dims
();
PADDLE_ENFORCE_EQ
(
logits_dims
[
0
],
static_cast
<
int64_t
>
(
logits_lod
[
level
].
back
()),
"The first dimension of Input(Logits) should be equal to "
"the sum of all sequences' lengths."
);
auto
label_lod
=
framework
::
ToAbsOffset
(
label
->
lod
());
auto
label_dims
=
label
->
dims
();
PADDLE_ENFORCE_EQ
(
label_dims
[
0
],
label
->
numel
(),
"The width of each timestep in Input(Label) should be 1."
);
const
size_t
num_sequences
=
logits_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
num_sequences
,
label_lod
[
level
].
size
()
-
1
,
"The number of sequences of Input(Logits) should be "
"equal to that of Input(Label)."
);
PADDLE_ENFORCE_LE
(
num_sequences
,
256
,
"The labelLengths must less than 256 for cudnn call."
);
const
size_t
sequence_width
=
logits
->
numel
()
/
logits_dims
[
0
];
auto
loss_dims
=
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
num_sequences
),
1
});
// NOTE: cudnn takes softmax input, calculate softmax first, then do padding
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
LoDTensor
softmax_logits
;
softmax_logits
.
mutable_data
<
T
>
(
logits
->
dims
(),
ctx
.
GetPlace
());
softmax_logits
.
set_lod
(
logits_lod
);
int
rank
=
logits
->
dims
().
size
();
int
axis_dim
=
logits
->
dims
()[
rank
-
1
];
Tensor
in_2d
=
framework
::
ReshapeToMatrix
(
*
logits
,
rank
-
1
);
Tensor
out_2d
=
framework
::
ReshapeToMatrix
(
softmax_logits
,
rank
-
1
);
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
false
>
()(
dev_ctx
,
axis_dim
,
&
in_2d
,
&
out_2d
);
// ctc needs sequences data stored in transposed padding format
// logits and grad using padding data of layout 'TNC'
// T: max_sequence_length
// N: batch_size (num_sequences)
// C: width
LoDTensor
warpctc_logits
;
const
size_t
max_sequence_length
=
math
::
MaximumSequenceLength
(
logits_lod
[
level
]);
auto
warpctc_logits_dims
=
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
max_sequence_length
),
static_cast
<
int64_t
>
(
num_sequences
),
static_cast
<
int64_t
>
(
sequence_width
)});
warpctc_logits
.
mutable_data
<
T
>
(
warpctc_logits_dims
,
ctx
.
GetPlace
());
LoDTensor
cpu_pad_value
;
T
*
pad_value_data
=
cpu_pad_value
.
mutable_data
<
T
>
({
1
},
platform
::
CPUPlace
());
*
pad_value_data
=
static_cast
<
T
>
(
0
);
LoDTensor
pad_value
;
if
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()))
{
pad_value
=
cpu_pad_value
;
}
else
{
TensorCopySync
(
cpu_pad_value
,
ctx
.
GetPlace
(),
&
pad_value
);
}
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
softmax_logits
,
&
warpctc_logits
,
pad_value
,
-
1
,
0
,
false
/* norm_by_times */
,
math
::
kLengthBatchWidth
);
const
T
*
warpctc_logits_data
=
warpctc_logits
.
data
<
T
>
();
std
::
vector
<
int
>
warpctc_label_lengths
(
num_sequences
);
std
::
vector
<
int
>
warpctc_logits_lengths
(
num_sequences
);
for
(
size_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
warpctc_label_lengths
[
i
]
=
label_lod
[
level
][
i
+
1
]
-
label_lod
[
level
][
i
];
warpctc_logits_lengths
[
i
]
=
logits_lod
[
level
][
i
+
1
]
-
logits_lod
[
level
][
i
];
}
T
*
warpctc_grad_data
=
warpctc_grad
->
mutable_data
<
T
>
(
warpctc_logits
.
dims
(),
ctx
.
GetPlace
());
math
::
SetConstant
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
warpctc_grad
,
static_cast
<
T
>
(
0
));
Tensor
warpctc_label
;
TensorCopySync
(
*
label
,
platform
::
CPUPlace
(),
&
warpctc_label
);
const
int
*
warpctc_label_data
=
warpctc_label
.
data
<
int
>
();
// ========================================================================
ScopedTensorDescriptor
logits_desc
;
ScopedTensorDescriptor
grad_desc
;
ScopedCTCLossDescriptor
ctcloss_desc
;
// layout here doesn't have effect.
DataLayout
layout
=
DataLayout
::
kNCHW
;
auto
cu_logits_desc
=
logits_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
warpctc_logits
.
dims
()));
auto
cu_grad_desc
=
grad_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
warpctc_grad
->
dims
()));
auto
cu_ctcloss_desc
=
ctcloss_desc
.
descriptor
<
T
>
();
auto
handle
=
dev_ctx
.
cudnn_handle
();
size_t
workspace_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetCTCLossWorkspaceSize
(
handle
,
cu_logits_desc
,
cu_grad_desc
,
warpctc_label_data
,
warpctc_label_lengths
.
data
(),
warpctc_logits_lengths
.
data
(),
CUDNN_CTC_LOSS_ALGO_DETERMINISTIC
,
cu_ctcloss_desc
,
&
workspace_size
));
T
*
loss_data
=
loss
->
mutable_data
<
T
>
(
loss_dims
,
ctx
.
GetPlace
());
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
auto
cudnn_func
=
[
&
](
void
*
cudnn_workspace
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCTCLoss
(
handle
,
cu_logits_desc
,
warpctc_logits_data
,
warpctc_label_data
,
warpctc_label_lengths
.
data
(),
warpctc_logits_lengths
.
data
(),
loss_data
,
cu_grad_desc
,
warpctc_grad_data
,
CUDNN_CTC_LOSS_ALGO_DETERMINISTIC
,
cu_ctcloss_desc
,
cudnn_workspace
,
workspace_size
));
};
workspace_handle
.
RunFunc
(
cudnn_func
,
workspace_size
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
CudnnCTCGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
warpctc_grad
=
ctx
.
Input
<
LoDTensor
>
(
"WarpCTCGrad"
);
auto
*
logits_grad
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"Logits"
));
const
Tensor
*
loss_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Loss"
));
logits_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bool
norm_by_times
=
ctx
.
Attr
<
bool
>
(
"norm_by_times"
);
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
warpctc_grad
,
logits_grad
,
-
1
,
0
,
norm_by_times
,
math
::
kLengthBatchWidth
);
const
T
*
loss_grad_data
=
loss_grad
->
data
<
T
>
();
math
::
ScaleLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
loss_grad_data
,
logits_grad
);
}
};
#endif
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
#if CUDNN_VERSION >= 7001
REGISTER_OP_KERNEL
(
warpctc
,
CUDNN
,
plat
::
CUDAPlace
,
ops
::
CudnnCTCKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_KERNEL
(
warpctc_grad
,
CUDNN
,
plat
::
CUDAPlace
,
ops
::
CudnnCTCGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
#endif
paddle/fluid/operators/warpctc_op.cc
浏览文件 @
ec9bc1bd
...
...
@@ -59,20 +59,6 @@ class WarpCTCOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
#if CUDA_VERSION >= 9000
LOG
(
WARNING
)
<<
"The cudnnCTCLoss of CUDNN7 have some diff between "
"CUDA9/CUDA10 and CUDA8. You can close use_cudnn option to "
"use "
"baidu-research/warp-ctc(https://github.com/baidu-research/"
"warp-ctc)"
;
#endif
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Logits"
)
->
type
(),
ctx
.
device_context
(),
layout_
,
library_
);
...
...
@@ -129,10 +115,6 @@ class WarpCTCOpMaker : public framework::OpProtoAndCheckerMaker {
"normalize the gradients by the number of time-step, "
"which is also the sequence's length."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"use_cudnn"
,
"(bool, default: false), whether to "
"use cudnn kernel."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
An operator integrating the open-source
[warp-ctc](https://github.com/baidu-research/warp-ctc) library, which is used in
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
ec9bc1bd
...
...
@@ -5793,7 +5793,6 @@ def warpctc(input,
label
,
blank
=
0
,
norm_by_times
=
False
,
use_cudnn
=
False
,
input_length
=
None
,
label_length
=
None
):
"""
...
...
@@ -5825,7 +5824,6 @@ def warpctc(input,
by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was
follewed by a mean_op.
use_cudnn (bool, default false): Whether to use cudnn.
input_length(Variable): The length for each input sequence if it is
of Tensor type, it should have shape `[batch_size]` and dtype int64.
label_length(Variable): The length for each label sequence if it is
...
...
@@ -5885,7 +5883,6 @@ def warpctc(input,
attrs
=
{
'blank'
:
blank
,
'norm_by_times'
:
norm_by_times
,
'use_cudnn'
:
use_cudnn
})
return
loss_out
...
...
python/paddle/fluid/tests/unittests/test_warpctc_op.py
浏览文件 @
ec9bc1bd
...
...
@@ -183,7 +183,6 @@ class TestWarpCTCOp(OpTest):
self
.
labels_lod
=
[[
3
,
1
,
4
,
4
]]
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
self
.
use_cudnn
=
False
def
setUp
(
self
):
self
.
op_type
=
"warpctc"
...
...
@@ -219,7 +218,6 @@ class TestWarpCTCOp(OpTest):
self
.
attrs
=
{
"blank"
:
self
.
blank
,
"norm_by_times"
:
self
.
norm_by_times
,
"use_cudnn"
:
self
.
use_cudnn
}
def
test_check_output
(
self
):
...
...
@@ -238,7 +236,6 @@ class TestWarpCTCOpCase1(TestWarpCTCOp):
self
.
labels_lod
=
[[
3
,
1
,
4
,
4
]]
self
.
blank
=
0
self
.
norm_by_times
=
False
self
.
use_cudnn
=
False
class
TestWarpCTCOpWithPadding
(
OpTest
):
...
...
@@ -251,7 +248,6 @@ class TestWarpCTCOpWithPadding(OpTest):
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
self
.
num_classes
-
1
self
.
norm_by_times
=
False
self
.
use_cudnn
=
False
def
setUp
(
self
):
self
.
op_type
=
"warpctc"
...
...
@@ -315,7 +311,6 @@ class TestWarpCTCOpWithPadding(OpTest):
self
.
attrs
=
{
"blank"
:
self
.
blank
,
"norm_by_times"
:
self
.
norm_by_times
,
"use_cudnn"
:
self
.
use_cudnn
}
def
test_check_output
(
self
):
...
...
@@ -336,23 +331,7 @@ class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding):
self
.
labels_length
=
np
.
array
([
3
,
1
,
4
,
4
],
dtype
=
np
.
int64
)
self
.
blank
=
0
self
.
norm_by_times
=
False
self
.
use_cudnn
=
False
# TODO: fix this test failed cuda9/10 manylinux images
# class TestCudnnCTCOp(TestWarpCTCOp):
# def config(self):
# self.batch_size = 4
# self.num_classes = 8
# self.logits_lod = [[4, 1, 3, 3]]
# self.labels_lod = [[3, 1, 4, 4]]
# self.blank = 0
# self.norm_by_times = False
# self.use_cudnn = True
# def test_check_grad(self):
# if sys.version_info < (3, 0):
# self.outputs['WarpCTCGrad'] = self.gradient
# self.check_grad(["Logits"], "Loss", max_relative_error=0.01)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录