未验证 提交 33cc0f7a 编写于 作者: Z Zhong Hui 提交者: GitHub

[Eager] Add warpctc yaml (#44617)

上级 b6e84806
......@@ -2403,6 +2403,18 @@
func : viterbi_decode
data_type : input
- api : warpctc
args : (Tensor logits, Tensor label, Tensor logits_length, Tensor labels_length, int blank, bool norm_by_times)
output : Tensor(loss), Tensor(warpctcgrad)
infer_meta :
func : WarpctcInferMeta
kernel :
func : warpctc
data_type: logits
optional: logits_length, labels_length
intermediate: warpctcgrad
backward : warpctc_grad
- api : where
args : (Tensor condition, Tensor x, Tensor y)
output : Tensor
......
......@@ -2376,6 +2376,18 @@
inplace : (out_grad -> x_grad)
backward : unsqueeze_double_grad
- backward_api : warpctc_grad
forward : warpctc (Tensor logits, Tensor label, Tensor logits_length, Tensor labels_length, int blank, bool norm_by_times) -> Tensor(loss), Tensor(warpctcgrad)
args : (Tensor logits, Tensor logits_length, Tensor warpctcgrad, Tensor loss_grad, int blank, bool norm_by_times)
output : Tensor(logits_grad)
infer_meta :
func : UnchangedInferMeta
param : [logits]
kernel :
func : warpctc_grad
optional : logits_length
no_need_buffer : logits
- backward_api : where_grad
forward : where (Tensor condition, Tensor x, Tensor y) -> Tensor(out)
args : (Tensor condition, Tensor x, Tensor y, Tensor out_grad)
......
......@@ -2049,7 +2049,7 @@ void WarpctcInferMeta(const MetaTensor& logits,
const MetaTensor& labels_length,
int blank,
bool norm_by_times,
MetaTensor* warpctc_grad,
MetaTensor* warpctcgrad,
MetaTensor* loss) {
auto logits_dims = logits.dims();
int sequence_width = 0;
......
......@@ -358,7 +358,7 @@ void WarpctcInferMeta(const MetaTensor& logits,
const MetaTensor& labels_length,
int blank,
bool norm_by_times,
MetaTensor* warpctc_grad,
MetaTensor* warpctcgrad,
MetaTensor* loss);
void WhereInferMeta(const MetaTensor& condition,
......
......@@ -29,33 +29,33 @@ namespace phi {
template <typename T, typename Context>
void WarpctcGradKernel(const Context& dev_ctx,
const DenseTensor& warpctc_grad,
const DenseTensor& logits,
const DenseTensor& loss_grad,
const paddle::optional<DenseTensor>& logits_length,
const DenseTensor& warpctcgrad,
const DenseTensor& loss_grad,
int blank,
bool norm_by_times,
DenseTensor* logits_grad) {
dev_ctx.template Alloc<T>(logits_grad);
if (logits_length.is_initialized()) {
int max_seq_length = warpctc_grad.dims()[0]; // Tmax
int num_sequences = warpctc_grad.dims()[1]; // B
int seq_width = warpctc_grad.dims()[2]; // D
int max_seq_length = warpctcgrad.dims()[0]; // Tmax
int num_sequences = warpctcgrad.dims()[1]; // B
int seq_width = warpctcgrad.dims()[2]; // D
// B
auto logits_len_e = EigenTensor<int64_t, 1>::From(*logits_length);
// (B, 1)
auto loss_grad_e = EigenTensor<T, 2>::From(loss_grad);
// (T, B, D)
auto warpctc_grad_e = EigenTensor<T, 3>::From(warpctc_grad);
auto warpctcgrad_e = EigenTensor<T, 3>::From(warpctcgrad);
auto logits_grad_e = EigenTensor<T, 3>::From(*logits_grad);
Eigen::DSizes<int, 3> grad_shape(1, num_sequences, 1);
Eigen::DSizes<int, 3> bcast(max_seq_length, 1, seq_width);
auto logits_g = warpctc_grad_e *
loss_grad_e.reshape(grad_shape).broadcast(bcast).eval();
auto logits_g =
warpctcgrad_e * loss_grad_e.reshape(grad_shape).broadcast(bcast).eval();
auto* place = dev_ctx.eigen_device();
if (norm_by_times) {
......@@ -71,7 +71,7 @@ void WarpctcGradKernel(const Context& dev_ctx,
} else {
paddle::operators::math::UnpaddingLoDTensorFunctor<Context, T>()(
dev_ctx,
warpctc_grad,
warpctcgrad,
logits_grad,
-1,
0,
......
......@@ -233,8 +233,8 @@ void WarpctcKernel(const Context& dev_ctx,
const paddle::optional<DenseTensor>& labels_length,
int blank,
bool norm_by_times,
DenseTensor* warpctc_grad,
DenseTensor* loss) {
DenseTensor* loss,
DenseTensor* warpctcgrad) {
size_t num_sequences, sequence_width, max_sequence_length;
paddle::framework::Vector<size_t> logits_lod;
paddle::framework::Vector<size_t> label_lod;
......@@ -383,11 +383,11 @@ void WarpctcKernel(const Context& dev_ctx,
// warpctc computes loss and gradient in one call, gradient data also stored
// in batch format
warpctc_grad->Resize(warpctc_logits.dims());
T* warpctc_grad_data = dev_ctx.template Alloc<T>(warpctc_grad);
warpctcgrad->Resize(warpctc_logits.dims());
T* warpctcgrad_data = dev_ctx.template Alloc<T>(warpctcgrad);
phi::funcs::SetConstant<Context, T>()(
dev_ctx, warpctc_grad, static_cast<T>(0));
dev_ctx, warpctcgrad, static_cast<T>(0));
// warpctc accesses labels in CPU memory
DenseTensor warpctc_label;
......@@ -439,7 +439,7 @@ void WarpctcKernel(const Context& dev_ctx,
T* warpctc_loss_data = dev_ctx.template HostAlloc<T>(&warpctc_loss);
WarpCTCFunctor<Context, T>()(dev_ctx,
warpctc_logits_data,
warpctc_grad_data,
warpctcgrad_data,
warpctc_label_data,
warpctc_label_lengths.data(),
warpctc_logits_lengths.data(),
......
......@@ -21,10 +21,10 @@ namespace phi {
template <typename T, typename Context>
void WarpctcGradKernel(const Context& dev_ctx,
const DenseTensor& warpctc_grad,
const DenseTensor& logits,
const DenseTensor& loss_grad,
const paddle::optional<DenseTensor>& logits_length,
const DenseTensor& warpctcgrad,
const DenseTensor& loss_grad,
int blank,
bool norm_by_times,
DenseTensor* logits_grad);
......
......@@ -27,7 +27,7 @@ void WarpctcKernel(const Context& dev_ctx,
const paddle::optional<DenseTensor>& labels_length,
int blank,
bool norm_by_times,
DenseTensor* warpctc_grad,
DenseTensor* loss);
DenseTensor* loss,
DenseTensor* warpctcgrad);
} // namespace phi
......@@ -20,13 +20,13 @@ KernelSignature WarpctcOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("warpctc",
{"Logits", "Label", "LogitsLength", "LabelLength"},
{"blank", "norm_by_times"},
{"WarpCTCGrad", "Loss"});
{"Loss", "WarpCTCGrad"});
}
KernelSignature WarpctcGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("warpctc_grad",
{"WarpCTCGrad", "Logits", "Loss@GRAD", "LogitsLength"},
{"Logits", "LogitsLength", "WarpCTCGrad", "Loss@GRAD"},
{"blank", "norm_by_times"},
{"Logits@GRAD"});
}
......
......@@ -546,6 +546,15 @@ def warpctc(input,
fetch_list=[cost.name])
print(output)
"""
if in_dygraph_mode():
if input_length is None or label_length is None:
raise ValueError(
"input_length and label_length must not be None in dygraph mode!"
)
loss_out = _C_ops.final_state_warpctc(input, label, input_length,
label_length, blank,
norm_by_times)
return loss_out
if _non_static_mode():
if input_length is None or label_length is None:
raise ValueError(
......
......@@ -191,6 +191,16 @@ class CTCForward(object):
return self.loss
def python_api(logits,
label,
logits_length=None,
labels_length=None,
blank=0,
norm_by_times=False):
return paddle.fluid.layers.warpctc(logits, label, blank, norm_by_times,
logits_length, labels_length)
class TestWarpCTCOp(OpTest):
def config(self):
......@@ -280,6 +290,8 @@ class TestWarpCTCOpWithPadding(OpTest):
def setUp(self):
self.op_type = "warpctc"
self.python_api = python_api
self.python_out_sig = ["Loss"]
self.config()
logits = np.random.uniform(
......@@ -344,7 +356,7 @@ class TestWarpCTCOpWithPadding(OpTest):
}
def test_check_output(self):
self.check_output()
self.check_output(check_eager=True)
def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient
......@@ -387,6 +399,8 @@ class TestWarpCTCOpFp64(OpTest):
def setUp(self):
self.op_type = "warpctc"
self.python_api = python_api
self.python_out_sig = ["Loss"]
self.config()
logits = np.random.uniform(
......@@ -451,11 +465,11 @@ class TestWarpCTCOpFp64(OpTest):
}
def test_check_output(self):
self.check_output()
self.check_output(check_eager=True)
def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient
self.check_grad(["Logits"], "Loss")
self.check_grad(["Logits"], "Loss", check_eager=True)
class TestWarpCTCOpError(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册