未验证 提交 33cc0f7a 编写于 作者: Z Zhong Hui 提交者: GitHub

[Eager] Add warpctc yaml (#44617)

上级 b6e84806
...@@ -2403,6 +2403,18 @@ ...@@ -2403,6 +2403,18 @@
func : viterbi_decode func : viterbi_decode
data_type : input data_type : input
- api : warpctc
args : (Tensor logits, Tensor label, Tensor logits_length, Tensor labels_length, int blank, bool norm_by_times)
output : Tensor(loss), Tensor(warpctcgrad)
infer_meta :
func : WarpctcInferMeta
kernel :
func : warpctc
data_type: logits
optional: logits_length, labels_length
intermediate: warpctcgrad
backward : warpctc_grad
- api : where - api : where
args : (Tensor condition, Tensor x, Tensor y) args : (Tensor condition, Tensor x, Tensor y)
output : Tensor output : Tensor
......
...@@ -2376,6 +2376,18 @@ ...@@ -2376,6 +2376,18 @@
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
backward : unsqueeze_double_grad backward : unsqueeze_double_grad
- backward_api : warpctc_grad
forward : warpctc (Tensor logits, Tensor label, Tensor logits_length, Tensor labels_length, int blank, bool norm_by_times) -> Tensor(loss), Tensor(warpctcgrad)
args : (Tensor logits, Tensor logits_length, Tensor warpctcgrad, Tensor loss_grad, int blank, bool norm_by_times)
output : Tensor(logits_grad)
infer_meta :
func : UnchangedInferMeta
param : [logits]
kernel :
func : warpctc_grad
optional : logits_length
no_need_buffer : logits
- backward_api : where_grad - backward_api : where_grad
forward : where (Tensor condition, Tensor x, Tensor y) -> Tensor(out) forward : where (Tensor condition, Tensor x, Tensor y) -> Tensor(out)
args : (Tensor condition, Tensor x, Tensor y, Tensor out_grad) args : (Tensor condition, Tensor x, Tensor y, Tensor out_grad)
......
...@@ -2049,7 +2049,7 @@ void WarpctcInferMeta(const MetaTensor& logits, ...@@ -2049,7 +2049,7 @@ void WarpctcInferMeta(const MetaTensor& logits,
const MetaTensor& labels_length, const MetaTensor& labels_length,
int blank, int blank,
bool norm_by_times, bool norm_by_times,
MetaTensor* warpctc_grad, MetaTensor* warpctcgrad,
MetaTensor* loss) { MetaTensor* loss) {
auto logits_dims = logits.dims(); auto logits_dims = logits.dims();
int sequence_width = 0; int sequence_width = 0;
......
...@@ -358,7 +358,7 @@ void WarpctcInferMeta(const MetaTensor& logits, ...@@ -358,7 +358,7 @@ void WarpctcInferMeta(const MetaTensor& logits,
const MetaTensor& labels_length, const MetaTensor& labels_length,
int blank, int blank,
bool norm_by_times, bool norm_by_times,
MetaTensor* warpctc_grad, MetaTensor* warpctcgrad,
MetaTensor* loss); MetaTensor* loss);
void WhereInferMeta(const MetaTensor& condition, void WhereInferMeta(const MetaTensor& condition,
......
...@@ -29,33 +29,33 @@ namespace phi { ...@@ -29,33 +29,33 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void WarpctcGradKernel(const Context& dev_ctx, void WarpctcGradKernel(const Context& dev_ctx,
const DenseTensor& warpctc_grad,
const DenseTensor& logits, const DenseTensor& logits,
const DenseTensor& loss_grad,
const paddle::optional<DenseTensor>& logits_length, const paddle::optional<DenseTensor>& logits_length,
const DenseTensor& warpctcgrad,
const DenseTensor& loss_grad,
int blank, int blank,
bool norm_by_times, bool norm_by_times,
DenseTensor* logits_grad) { DenseTensor* logits_grad) {
dev_ctx.template Alloc<T>(logits_grad); dev_ctx.template Alloc<T>(logits_grad);
if (logits_length.is_initialized()) { if (logits_length.is_initialized()) {
int max_seq_length = warpctc_grad.dims()[0]; // Tmax int max_seq_length = warpctcgrad.dims()[0]; // Tmax
int num_sequences = warpctc_grad.dims()[1]; // B int num_sequences = warpctcgrad.dims()[1]; // B
int seq_width = warpctc_grad.dims()[2]; // D int seq_width = warpctcgrad.dims()[2]; // D
// B // B
auto logits_len_e = EigenTensor<int64_t, 1>::From(*logits_length); auto logits_len_e = EigenTensor<int64_t, 1>::From(*logits_length);
// (B, 1) // (B, 1)
auto loss_grad_e = EigenTensor<T, 2>::From(loss_grad); auto loss_grad_e = EigenTensor<T, 2>::From(loss_grad);
// (T, B, D) // (T, B, D)
auto warpctc_grad_e = EigenTensor<T, 3>::From(warpctc_grad); auto warpctcgrad_e = EigenTensor<T, 3>::From(warpctcgrad);
auto logits_grad_e = EigenTensor<T, 3>::From(*logits_grad); auto logits_grad_e = EigenTensor<T, 3>::From(*logits_grad);
Eigen::DSizes<int, 3> grad_shape(1, num_sequences, 1); Eigen::DSizes<int, 3> grad_shape(1, num_sequences, 1);
Eigen::DSizes<int, 3> bcast(max_seq_length, 1, seq_width); Eigen::DSizes<int, 3> bcast(max_seq_length, 1, seq_width);
auto logits_g = warpctc_grad_e * auto logits_g =
loss_grad_e.reshape(grad_shape).broadcast(bcast).eval(); warpctcgrad_e * loss_grad_e.reshape(grad_shape).broadcast(bcast).eval();
auto* place = dev_ctx.eigen_device(); auto* place = dev_ctx.eigen_device();
if (norm_by_times) { if (norm_by_times) {
...@@ -71,7 +71,7 @@ void WarpctcGradKernel(const Context& dev_ctx, ...@@ -71,7 +71,7 @@ void WarpctcGradKernel(const Context& dev_ctx,
} else { } else {
paddle::operators::math::UnpaddingLoDTensorFunctor<Context, T>()( paddle::operators::math::UnpaddingLoDTensorFunctor<Context, T>()(
dev_ctx, dev_ctx,
warpctc_grad, warpctcgrad,
logits_grad, logits_grad,
-1, -1,
0, 0,
......
...@@ -233,8 +233,8 @@ void WarpctcKernel(const Context& dev_ctx, ...@@ -233,8 +233,8 @@ void WarpctcKernel(const Context& dev_ctx,
const paddle::optional<DenseTensor>& labels_length, const paddle::optional<DenseTensor>& labels_length,
int blank, int blank,
bool norm_by_times, bool norm_by_times,
DenseTensor* warpctc_grad, DenseTensor* loss,
DenseTensor* loss) { DenseTensor* warpctcgrad) {
size_t num_sequences, sequence_width, max_sequence_length; size_t num_sequences, sequence_width, max_sequence_length;
paddle::framework::Vector<size_t> logits_lod; paddle::framework::Vector<size_t> logits_lod;
paddle::framework::Vector<size_t> label_lod; paddle::framework::Vector<size_t> label_lod;
...@@ -383,11 +383,11 @@ void WarpctcKernel(const Context& dev_ctx, ...@@ -383,11 +383,11 @@ void WarpctcKernel(const Context& dev_ctx,
// warpctc computes loss and gradient in one call, gradient data also stored // warpctc computes loss and gradient in one call, gradient data also stored
// in batch format // in batch format
warpctc_grad->Resize(warpctc_logits.dims()); warpctcgrad->Resize(warpctc_logits.dims());
T* warpctc_grad_data = dev_ctx.template Alloc<T>(warpctc_grad); T* warpctcgrad_data = dev_ctx.template Alloc<T>(warpctcgrad);
phi::funcs::SetConstant<Context, T>()( phi::funcs::SetConstant<Context, T>()(
dev_ctx, warpctc_grad, static_cast<T>(0)); dev_ctx, warpctcgrad, static_cast<T>(0));
// warpctc accesses labels in CPU memory // warpctc accesses labels in CPU memory
DenseTensor warpctc_label; DenseTensor warpctc_label;
...@@ -439,7 +439,7 @@ void WarpctcKernel(const Context& dev_ctx, ...@@ -439,7 +439,7 @@ void WarpctcKernel(const Context& dev_ctx,
T* warpctc_loss_data = dev_ctx.template HostAlloc<T>(&warpctc_loss); T* warpctc_loss_data = dev_ctx.template HostAlloc<T>(&warpctc_loss);
WarpCTCFunctor<Context, T>()(dev_ctx, WarpCTCFunctor<Context, T>()(dev_ctx,
warpctc_logits_data, warpctc_logits_data,
warpctc_grad_data, warpctcgrad_data,
warpctc_label_data, warpctc_label_data,
warpctc_label_lengths.data(), warpctc_label_lengths.data(),
warpctc_logits_lengths.data(), warpctc_logits_lengths.data(),
......
...@@ -21,10 +21,10 @@ namespace phi { ...@@ -21,10 +21,10 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void WarpctcGradKernel(const Context& dev_ctx, void WarpctcGradKernel(const Context& dev_ctx,
const DenseTensor& warpctc_grad,
const DenseTensor& logits, const DenseTensor& logits,
const DenseTensor& loss_grad,
const paddle::optional<DenseTensor>& logits_length, const paddle::optional<DenseTensor>& logits_length,
const DenseTensor& warpctcgrad,
const DenseTensor& loss_grad,
int blank, int blank,
bool norm_by_times, bool norm_by_times,
DenseTensor* logits_grad); DenseTensor* logits_grad);
......
...@@ -27,7 +27,7 @@ void WarpctcKernel(const Context& dev_ctx, ...@@ -27,7 +27,7 @@ void WarpctcKernel(const Context& dev_ctx,
const paddle::optional<DenseTensor>& labels_length, const paddle::optional<DenseTensor>& labels_length,
int blank, int blank,
bool norm_by_times, bool norm_by_times,
DenseTensor* warpctc_grad, DenseTensor* loss,
DenseTensor* loss); DenseTensor* warpctcgrad);
} // namespace phi } // namespace phi
...@@ -20,13 +20,13 @@ KernelSignature WarpctcOpArgumentMapping(const ArgumentMappingContext& ctx) { ...@@ -20,13 +20,13 @@ KernelSignature WarpctcOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("warpctc", return KernelSignature("warpctc",
{"Logits", "Label", "LogitsLength", "LabelLength"}, {"Logits", "Label", "LogitsLength", "LabelLength"},
{"blank", "norm_by_times"}, {"blank", "norm_by_times"},
{"WarpCTCGrad", "Loss"}); {"Loss", "WarpCTCGrad"});
} }
KernelSignature WarpctcGradOpArgumentMapping( KernelSignature WarpctcGradOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature("warpctc_grad", return KernelSignature("warpctc_grad",
{"WarpCTCGrad", "Logits", "Loss@GRAD", "LogitsLength"}, {"Logits", "LogitsLength", "WarpCTCGrad", "Loss@GRAD"},
{"blank", "norm_by_times"}, {"blank", "norm_by_times"},
{"Logits@GRAD"}); {"Logits@GRAD"});
} }
......
...@@ -546,6 +546,15 @@ def warpctc(input, ...@@ -546,6 +546,15 @@ def warpctc(input,
fetch_list=[cost.name]) fetch_list=[cost.name])
print(output) print(output)
""" """
if in_dygraph_mode():
if input_length is None or label_length is None:
raise ValueError(
"input_length and label_length must not be None in dygraph mode!"
)
loss_out = _C_ops.final_state_warpctc(input, label, input_length,
label_length, blank,
norm_by_times)
return loss_out
if _non_static_mode(): if _non_static_mode():
if input_length is None or label_length is None: if input_length is None or label_length is None:
raise ValueError( raise ValueError(
......
...@@ -191,6 +191,16 @@ class CTCForward(object): ...@@ -191,6 +191,16 @@ class CTCForward(object):
return self.loss return self.loss
def python_api(logits,
label,
logits_length=None,
labels_length=None,
blank=0,
norm_by_times=False):
return paddle.fluid.layers.warpctc(logits, label, blank, norm_by_times,
logits_length, labels_length)
class TestWarpCTCOp(OpTest): class TestWarpCTCOp(OpTest):
def config(self): def config(self):
...@@ -280,6 +290,8 @@ class TestWarpCTCOpWithPadding(OpTest): ...@@ -280,6 +290,8 @@ class TestWarpCTCOpWithPadding(OpTest):
def setUp(self): def setUp(self):
self.op_type = "warpctc" self.op_type = "warpctc"
self.python_api = python_api
self.python_out_sig = ["Loss"]
self.config() self.config()
logits = np.random.uniform( logits = np.random.uniform(
...@@ -344,7 +356,7 @@ class TestWarpCTCOpWithPadding(OpTest): ...@@ -344,7 +356,7 @@ class TestWarpCTCOpWithPadding(OpTest):
} }
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_eager=True)
def test_check_grad(self): def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient self.outputs['WarpCTCGrad'] = self.gradient
...@@ -387,6 +399,8 @@ class TestWarpCTCOpFp64(OpTest): ...@@ -387,6 +399,8 @@ class TestWarpCTCOpFp64(OpTest):
def setUp(self): def setUp(self):
self.op_type = "warpctc" self.op_type = "warpctc"
self.python_api = python_api
self.python_out_sig = ["Loss"]
self.config() self.config()
logits = np.random.uniform( logits = np.random.uniform(
...@@ -451,11 +465,11 @@ class TestWarpCTCOpFp64(OpTest): ...@@ -451,11 +465,11 @@ class TestWarpCTCOpFp64(OpTest):
} }
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output(check_eager=True)
def test_check_grad(self): def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient self.outputs['WarpCTCGrad'] = self.gradient
self.check_grad(["Logits"], "Loss") self.check_grad(["Logits"], "Loss", check_eager=True)
class TestWarpCTCOpError(unittest.TestCase): class TestWarpCTCOpError(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册