Fixed warpctc, test=develop (#20011)

Use AllocateTmpTensor() for creating temporary tensors in warpctc.

Fixed warpctc, test=develop (#20011)
Use AllocateTmpTensor() for creating temporary tensors in warpctc.
c8e12587 · Li Fuchen · GitHub · 63dd3183 · c8e12587
隐藏空白更改
内联并排

Showing with 8 addition and 5 deletion

paddle/fluid/operators/warpctc_op.h paddle/fluid/operators/warpctc_op.h +8 -5

未找到文件。
--- a/paddle/fluid/operators/warpctc_op.h
+++ b/paddle/fluid/operators/warpctc_op.h
@@ -73,11 +73,12 @@ class WarpCTCFunctor {
                      "Bytes of workspace got by warp-ctc function, "
                      "get_workspace_size(), should be larger than 0.");

-    Tensor workspace;
+    auto& dev_ctx = ctx.template device_context<DeviceContext>();
    size_t workspace_elements = workspace_bytes / sizeof(float) + 1UL;
-    float* workspace_data = workspace.mutable_data<float>(
+    Tensor workspace = ctx.AllocateTmpTensor<float, DeviceContext>(
        framework::make_ddim({static_cast<int64_t>(workspace_elements)}),
-        ctx.GetPlace());
+        dev_ctx);
+    float* workspace_data = workspace.data<float>();
    math::SetConstant<DeviceContext, float>()(
        ctx.template device_context<DeviceContext>(), &workspace,
        static_cast<float>(0));
@@ -186,8 +187,10 @@ class WarpCTCKernel : public framework::OpKernel<T> {
        framework::make_ddim({static_cast<int64_t>(max_sequence_length),
                              static_cast<int64_t>(num_sequences),
                              static_cast<int64_t>(sequence_width)});
-    warpctc_logits.mutable_data<T>(warpctc_logits_dims, ctx.GetPlace());
-
+    auto& dev_ctx = ctx.template device_context<DeviceContext>();
+    Tensor warpctc_logits_tmp =
+        ctx.AllocateTmpTensor<T, DeviceContext>(warpctc_logits_dims, dev_ctx);
+    warpctc_logits.ShareDataWith(warpctc_logits_tmp);
    if (ctx.HasInput("LogitsLength")) {
      TensorCopySync(*logits, ctx.GetPlace(), &warpctc_logits);
    } else {