diff --git a/paddle/fluid/platform/for_range.h b/paddle/fluid/platform/for_range.h
index f3f7064efeeb2e1121c09a29473a4a81a063f849..abc427a3ca8815ecf193e4f9213223aa79069ea5 100644
--- a/paddle/fluid/platform/for_range.h
+++ b/paddle/fluid/platform/for_range.h
@@ -13,136 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
+
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace paddle {
 namespace platform {
 
 template <typename DeviceContext>
-struct ForRange {
-  ForRange(const DeviceContext& dev_ctx, size_t limit);
-
-  template <typename Function>
-  void operator()(Function func) const;
-};
-
-// NOTE: After the pten kernel is migrated, it needs to be deleted.
-template <>
-struct ForRange<CPUDeviceContext> {
-  ForRange(const CPUDeviceContext& dev_ctx, size_t limit) : limit_(limit) {}
-
-  template <typename Function>
-  void operator()(Function func) const {
-    for (size_t i = 0; i < limit_; ++i) {
-      func(i);
-    }
-  }
-
-  size_t limit_;
-};
-
-template <>
-struct ForRange<phi::CPUContext> {
-  ForRange(const phi::CPUContext& dev_ctx, size_t limit) : limit_(limit) {}
-
-  template <typename Function>
-  void operator()(Function func) const {
-    for (size_t i = 0; i < limit_; ++i) {
-      func(i);
-    }
-  }
-
-  size_t limit_;
-};
-
-#if defined(__NVCC__) || defined(__HIPCC__)
-template <typename Function>
-__global__ static void ForRangeElemwiseOpGridIsOne(Function func) {
-  size_t idx = static_cast<size_t>(threadIdx.x);
-  func(idx);
-}
-
-template <typename Function>
-__global__ static void ForRangeElemwiseOp(Function func, size_t limit) {
-  size_t idx = static_cast<size_t>(blockIdx.x * blockDim.x + threadIdx.x);
-  if (idx < limit) {
-    func(idx);
-  }
-}
-
-// NOTE: After the pten kernel is migrated, it needs to be deleted.
-template <>
-struct ForRange<CUDADeviceContext> {
-  ForRange(const CUDADeviceContext& dev_ctx, size_t limit)
-      : dev_ctx_(dev_ctx), limit_(static_cast<size_t>(limit)) {}
-
-  template <typename Function>
-  inline void operator()(Function func) const {
-#ifdef __HIPCC__
-    // HIP will throw core dump when threads > 256
-    constexpr int num_threads = 256;
-#elif WITH_NV_JETSON
-    // JETSON_NANO will throw core dump when threads > 128
-    int num_thread = 256;
-    platform::ChangeThreadNum(dev_ctx_, &num_thread, 128);
-    const int num_threads = num_thread;
-#else
-    constexpr int num_threads = 1024;
-#endif
-    size_t block_size = limit_ <= num_threads ? limit_ : num_threads;
-    size_t grid_size = (limit_ + num_threads - 1) / num_threads;
-
-    if (grid_size == 1) {
-      ForRangeElemwiseOpGridIsOne<<<1, block_size, 0, dev_ctx_.stream()>>>(
-          func);
-    } else {
-      ForRangeElemwiseOp<<<grid_size, block_size, 0, dev_ctx_.stream()>>>(
-          func, limit_);
-    }
-  }
-
-  const CUDADeviceContext& dev_ctx_;
-  size_t limit_;
-};
-
-template <>
-struct ForRange<phi::GPUContext> {
-  ForRange(const phi::GPUContext& dev_ctx, size_t limit)
-      : dev_ctx_(dev_ctx), limit_(static_cast<size_t>(limit)) {}
-
-  template <typename Function>
-  inline void operator()(Function func) const {
-#ifdef __HIPCC__
-    // HIP will throw core dump when threads > 256
-    constexpr int num_threads = 256;
-#elif WITH_NV_JETSON
-    // JETSON_NANO will throw core dump when threads > 128
-    int num_thread = 256;
-    platform::ChangeThreadNum(dev_ctx_, &num_thread, 128);
-    const int num_threads = num_thread;
-#else
-    constexpr int num_threads = 1024;
-#endif
-    size_t block_size = limit_ <= num_threads ? limit_ : num_threads;
-    size_t grid_size = (limit_ + num_threads - 1) / num_threads;
-
-    if (grid_size == 1) {
-      ForRangeElemwiseOpGridIsOne<<<1, block_size, 0, dev_ctx_.stream()>>>(
-          func);
-    } else {
-      ForRangeElemwiseOp<<<grid_size, block_size, 0, dev_ctx_.stream()>>>(
-          func, limit_);
-    }
-  }
-
-  const phi::GPUContext& dev_ctx_;
-  size_t limit_;
-};
-
-#endif
+using ForRange = phi::funcs::ForRange<DeviceContext>;
 
 }  // namespace platform
 }  // namespace paddle
diff --git a/paddle/phi/kernels/cpu/abs_kernel.cc b/paddle/phi/kernels/cpu/abs_kernel.cc
index 71d818c45e6f3f28697d3496cc9ae8a0d209ce6e..efe7d090405df72ce07b2b2bb7f045977d982eff 100644
--- a/paddle/phi/kernels/cpu/abs_kernel.cc
+++ b/paddle/phi/kernels/cpu/abs_kernel.cc
@@ -13,11 +13,11 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/abs_kernel.h"
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/common/complex.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/complex_functors.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -29,7 +29,7 @@ void AbsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
       out, size_t(x.numel() * sizeof(phi::funcs::Real<T>)));
   auto* out_data = out->data<phi::funcs::Real<T>>();
 
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   phi::funcs::AbsFunctor<T> functor(x_data, out_data, numel);
   for_range(functor);
 }
diff --git a/paddle/phi/kernels/funcs/diagonal.h b/paddle/phi/kernels/funcs/diagonal.h
index a82c4f66d010273f0f09fa71a38c3081fd1bc2ee..19a93970d090af060b888f512782975b073fff72 100644
--- a/paddle/phi/kernels/funcs/diagonal.h
+++ b/paddle/phi/kernels/funcs/diagonal.h
@@ -22,8 +22,8 @@
 
 #include <algorithm>
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 namespace funcs {
@@ -118,7 +118,7 @@ DenseTensor Diagonal(const DeviceContext& context,
 #endif
 
     // auto& dev_ctx = context.template device_context<DeviceContext>();
-    paddle::platform::ForRange<DeviceContext> for_range(context, diag.numel());
+    phi::funcs::ForRange<DeviceContext> for_range(context, diag.numel());
     DiagonalFunctor<T> functor(
         input_data, diag_arr, ret_arr, pos, dim_size, diag_data);
     for_range(functor);
diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h
index 47f1593a11eb9e29cc90b7db36650826734ac27f..d369781f845eb0887817f83be761b1027fc0bab0 100644
--- a/paddle/phi/kernels/funcs/elementwise_base.h
+++ b/paddle/phi/kernels/funcs/elementwise_base.h
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/fluid/platform/transform.h"
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/empty_kernel.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 #if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
@@ -418,7 +418,7 @@ void ElemwiseGradComputeNoBroadcast(const DeviceContext &dev_ctx,
                                     DX_OP dx_op,
                                     DY_OP dy_op) {
   size_t N = static_cast<size_t>(phi::product(x_dim));
-  paddle::platform::ForRange<DeviceContext> for_range(dev_ctx, N);
+  phi::funcs::ForRange<DeviceContext> for_range(dev_ctx, N);
   for_range(ElemwiseGradNoBroadcast<T, DX_OP, DY_OP, Tout>{
       x.data<T>(),
       y.data<T>(),
diff --git a/paddle/phi/kernels/funcs/for_range.h b/paddle/phi/kernels/funcs/for_range.h
new file mode 100644
index 0000000000000000000000000000000000000000..bf0888c301fe739994089b8e05357bd810455756
--- /dev/null
+++ b/paddle/phi/kernels/funcs/for_range.h
@@ -0,0 +1,129 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "paddle/phi/backends/all_context.h"
+#include "paddle/phi/backends/gpu/gpu_launch_config.h"
+
+namespace phi {
+namespace funcs {
+
+template <typename Context>
+struct ForRange {
+  ForRange(const Context& dev_ctx, size_t limit);
+
+  template <typename Function>
+  void operator()(Function func) const;
+};
+
+template <>
+struct ForRange<phi::CPUContext> {
+  ForRange(const phi::CPUContext& dev_ctx, size_t limit) : limit_(limit) {}
+
+  template <typename Function>
+  void operator()(Function func) const {
+    for (size_t i = 0; i < limit_; ++i) {
+      func(i);
+    }
+  }
+
+  size_t limit_;
+};
+
+// NOTE: After the pten kernel is migrated, it needs to be deleted.
+template <>
+struct ForRange<paddle::platform::CPUDeviceContext> {
+  ForRange(const paddle::platform::CPUDeviceContext& dev_ctx, size_t limit)
+      : dev_ctx_(dev_ctx), limit_(limit) {}
+
+  template <typename Function>
+  void operator()(Function func) const {
+    phi::funcs::ForRange<phi::CPUContext> for_range(dev_ctx_, limit_);
+    for_range(func);
+  }
+
+  const paddle::platform::CPUDeviceContext& dev_ctx_;
+  size_t limit_;
+};
+
+#if defined(__NVCC__) || defined(__HIPCC__)
+
+template <typename Function>
+__global__ static void ForRangeElemwiseOpGridIsOne(Function func) {
+  size_t idx = static_cast<size_t>(threadIdx.x);
+  func(idx);
+}
+
+template <typename Function>
+__global__ static void ForRangeElemwiseOp(Function func, size_t limit) {
+  size_t idx = static_cast<size_t>(blockIdx.x * blockDim.x + threadIdx.x);
+  if (idx < limit) {
+    func(idx);
+  }
+}
+
+template <>
+struct ForRange<phi::GPUContext> {
+  ForRange(const phi::GPUContext& dev_ctx, size_t limit)
+      : dev_ctx_(dev_ctx), limit_(limit) {}
+
+  template <typename Function>
+  inline void operator()(Function func) const {
+#ifdef __HIPCC__
+    // HIP will throw core dump when threads > 256
+    constexpr int num_threads = 256;
+#elif WITH_NV_JETSON
+    // JETSON_NANO will throw core dump when threads > 128
+    int num_thread = 256;
+    backends::gpu::ChangeThreadNum(dev_ctx_, &num_thread, 128);
+    const int num_threads = num_thread;
+#else
+    constexpr int num_threads = 1024;
+#endif
+    size_t block_size = limit_ <= num_threads ? limit_ : num_threads;
+    size_t grid_size = (limit_ + num_threads - 1) / num_threads;
+
+    if (grid_size == 1) {
+      ForRangeElemwiseOpGridIsOne<<<1, block_size, 0, dev_ctx_.stream()>>>(
+          func);
+    } else {
+      ForRangeElemwiseOp<<<grid_size, block_size, 0, dev_ctx_.stream()>>>(
+          func, limit_);
+    }
+  }
+
+  const phi::GPUContext& dev_ctx_;
+  size_t limit_;
+};
+
+// NOTE: After the pten kernel is migrated, it needs to be deleted.
+template <>
+struct ForRange<paddle::platform::CUDADeviceContext> {
+  ForRange(const paddle::platform::CUDADeviceContext& dev_ctx, size_t limit)
+      : dev_ctx_(dev_ctx), limit_(limit) {}
+
+  template <typename Function>
+  inline void operator()(Function func) const {
+    phi::funcs::ForRange<phi::GPUContext> for_range(dev_ctx_, limit_);
+    for_range(func);
+  }
+
+  const paddle::platform::CUDADeviceContext& dev_ctx_;
+  size_t limit_;
+};
+
+#endif
+
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/gpu/poisson_kernel.cu b/paddle/phi/kernels/gpu/poisson_kernel.cu
index ae97f2fca68cb37445086065ed421f160b481235..347f70b166657622840fbd3cfb4e62aa1f87eb2a 100644
--- a/paddle/phi/kernels/gpu/poisson_kernel.cu
+++ b/paddle/phi/kernels/gpu/poisson_kernel.cu
@@ -19,9 +19,9 @@ limitations under the License. */
 #include <hiprand_kernel.h>
 #endif
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/poisson_kernel.h"
 
 namespace phi {
@@ -65,7 +65,7 @@ void PoissonKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
   uint64_t seed = seed_offset.first;
   uint64_t offset = seed_offset.second;
 
-  paddle::platform::ForRange<Context> for_range(ctx, size);
+  phi::funcs::ForRange<Context> for_range(ctx, size);
 
   PoissonCudaFunctor<T> functor(x_data, out_data, seed, offset);
   for_range(functor);
diff --git a/paddle/phi/kernels/impl/abs_grad_kernel_impl.h b/paddle/phi/kernels/impl/abs_grad_kernel_impl.h
index 4b31393a71f3623bff168dfc17612ceda250c506..78c25200bbd284489ee431cdb78a81748565050b 100644
--- a/paddle/phi/kernels/impl/abs_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/abs_grad_kernel_impl.h
@@ -14,10 +14,10 @@
 
 #pragma once
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/kernels/abs_grad_kernel.h"
 #include "paddle/phi/kernels/funcs/complex_functors.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -53,7 +53,7 @@ void AbsGradKernel(const Context& ctx,
   ctx.template Alloc<T>(dx, static_cast<size_t>(numel * sizeof(T)));
   auto* dx_data = dx->data<T>();
 
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   phi::funcs::AbsGradFunctor<T> functor(dout_data, x_data, dx_data, numel);
   for_range(functor);
 }
@@ -70,7 +70,7 @@ void AbsDoubleGradKernel(const Context& ctx,
   ctx.template Alloc<T>(ddout, static_cast<size_t>(numel * sizeof(T)));
   auto* ddout_data = ddout->data<T>();
 
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   phi::funcs::AbsGradGradFunctor<T> functor(
       ddx_data, x_data, ddout_data, numel);
   for_range(functor);
diff --git a/paddle/phi/kernels/impl/atan2_grad_kernel_impl.h b/paddle/phi/kernels/impl/atan2_grad_kernel_impl.h
index 5f75a95f4a7b18f0ccf450e003860eeeef3c649d..d0dd18298518ab351918aa2492eb48d11d3cf1d7 100644
--- a/paddle/phi/kernels/impl/atan2_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/atan2_grad_kernel_impl.h
@@ -14,9 +14,9 @@
 
 #pragma once
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/atan2_grad_kernel.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -85,7 +85,7 @@ void Atan2GradKernel(const Context& ctx,
   auto* y_grad_data =
       ctx.template Alloc<T>(y_grad, size_t(y.numel() * sizeof(T)));
 
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   phi::Atan2GradFunctor<T> functor(
       x_data, y_data, out_grad_data, x_grad_data, y_grad_data, numel);
   for_range(functor);
diff --git a/paddle/phi/kernels/impl/atan2_kernel_impl.h b/paddle/phi/kernels/impl/atan2_kernel_impl.h
index c29449a27e0b5603c4e6f50c8ed676677c29796a..2cae914e2f61555377f7a41b3d89cdbb2b589247 100644
--- a/paddle/phi/kernels/impl/atan2_kernel_impl.h
+++ b/paddle/phi/kernels/impl/atan2_kernel_impl.h
@@ -14,9 +14,9 @@
 
 #pragma once
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/atan2_kernel.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 template <typename T>
@@ -80,7 +80,7 @@ void Atan2Kernel(const Context& ctx,
   auto* out_data = ctx.template Alloc<typename Atan2Out<T>::type>(
       out, size_t(x.numel() * sizeof(typename Atan2Out<T>::type)));
 
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   phi::Atan2Functor<T> functor(x_data, y_data, out_data, numel);
   for_range(functor);
 }
diff --git a/paddle/phi/kernels/impl/complex_grad_kernel_impl.h b/paddle/phi/kernels/impl/complex_grad_kernel_impl.h
index febc464e6a1f5780ac6a25f0baa55449014a4f66..a10481284b17fbc21865ab8aa3b5ebad4e0a7d95 100644
--- a/paddle/phi/kernels/impl/complex_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/complex_grad_kernel_impl.h
@@ -14,8 +14,8 @@
 
 #pragma once
 
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/kernels/funcs/complex_functors.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -28,7 +28,7 @@ void RealGradKernel(const Context& dev_ctx,
   auto* dx_data =
       dev_ctx.template Alloc<T>(dx, static_cast<size_t>(numel * sizeof(T)));
 
-  paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
+  phi::funcs::ForRange<Context> for_range(dev_ctx, numel);
   phi::funcs::RealToComplexFunctor<T> functor(dout_data, dx_data, numel);
   for_range(functor);
 }
@@ -42,7 +42,7 @@ void ImagGradKernel(const Context& dev_ctx,
   auto* dx_data =
       dev_ctx.template Alloc<T>(dx, static_cast<size_t>(numel * sizeof(T)));
 
-  paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
+  phi::funcs::ForRange<Context> for_range(dev_ctx, numel);
   phi::funcs::ImagToComplexFunctor<T> functor(dout_data, dx_data, numel);
   for_range(functor);
 }
diff --git a/paddle/phi/kernels/impl/complex_kernel_impl.h b/paddle/phi/kernels/impl/complex_kernel_impl.h
index 2f9b1ad04665378307b099f0fc3a0c75f487e41a..ff5cf86ed2ea240747f70f4410b339a135a49d3a 100644
--- a/paddle/phi/kernels/impl/complex_kernel_impl.h
+++ b/paddle/phi/kernels/impl/complex_kernel_impl.h
@@ -15,8 +15,8 @@
 #pragma once
 
 // See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/kernels/funcs/complex_functors.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -28,7 +28,7 @@ void ConjKernel(const Context& dev_ctx,
   auto* x_data = x.data<T>();
   auto* out_data = dev_ctx.template Alloc<T>(out);
 
-  paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
+  phi::funcs::ForRange<Context> for_range(dev_ctx, numel);
   phi::funcs::ConjFunctor<T> functor(x_data, numel, out_data);
   for_range(functor);
 }
@@ -42,7 +42,7 @@ void RealKernel(const Context& dev_ctx,
   auto* out_data = dev_ctx.template Alloc<phi::funcs::Real<T>>(
       out, static_cast<size_t>(numel * sizeof(phi::funcs::Real<T>)));
 
-  paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
+  phi::funcs::ForRange<Context> for_range(dev_ctx, numel);
   phi::funcs::RealFunctor<T> functor(x_data, out_data, numel);
   for_range(functor);
 }
@@ -56,7 +56,7 @@ void ImagKernel(const Context& dev_ctx,
   auto* out_data = dev_ctx.template Alloc<phi::funcs::Real<T>>(
       out, static_cast<size_t>(numel * sizeof(phi::funcs::Real<T>)));
 
-  paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
+  phi::funcs::ForRange<Context> for_range(dev_ctx, numel);
   phi::funcs::ImagFunctor<T> functor(x_data, out_data, numel);
   for_range(functor);
 }
diff --git a/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h b/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h
index f94fe7168b2a5cb338f5fdc741d9be56b810f7c6..74ded1569eb5804950898bc1b824367b56480cda 100644
--- a/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h
@@ -15,8 +15,8 @@
 #pragma once
 
 #include <unsupported/Eigen/SpecialFunctions>
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -47,7 +47,7 @@ void DigammaGradKernel(const Context& ctx,
   auto* x_data = x.data<T>();
   auto* dx_data = x_grad->data<T>();
   auto numel = out_grad.numel();
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   DigammaGradFunctor<T> functor(dout_data, x_data, dx_data, numel);
   for_range(functor);
 }
diff --git a/paddle/phi/kernels/impl/digamma_kernel_impl.h b/paddle/phi/kernels/impl/digamma_kernel_impl.h
index 5a924a322d6e9941475854dbc01bc4b1d0084bb5..8994979e64d70753ba7b0a6a4debc5e48a95f243 100644
--- a/paddle/phi/kernels/impl/digamma_kernel_impl.h
+++ b/paddle/phi/kernels/impl/digamma_kernel_impl.h
@@ -15,8 +15,8 @@
 #pragma once
 
 #include <unsupported/Eigen/SpecialFunctions>
-#include "paddle/fluid/platform/for_range.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 
 namespace phi {
 
@@ -41,7 +41,7 @@ void DigammaKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
   auto* x_data = x.data<T>();
   auto* out_data = out->data<T>();
   auto numel = x.numel();
-  paddle::platform::ForRange<Context> for_range(ctx, numel);
+  phi::funcs::ForRange<Context> for_range(ctx, numel);
   DigammaFunctor<T> functor(x_data, out_data, numel);
   for_range(functor);
 }
diff --git a/paddle/phi/kernels/impl/trace_grad_kernel_impl.h b/paddle/phi/kernels/impl/trace_grad_kernel_impl.h
index 5263f92cb578b9cda612e7bfa4edb2b425876b20..b0878d779462a9c351caa038af2ac017bbf4a14f 100644
--- a/paddle/phi/kernels/impl/trace_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/trace_grad_kernel_impl.h
@@ -21,7 +21,7 @@
 
 #include <algorithm>
 
-#include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/kernels/funcs/for_range.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace phi {
@@ -130,7 +130,7 @@ void TraceGradKernel(const Context& ctx,
     const auto* input_arr = input_stride.Get();
 #endif
 
-    paddle::platform::ForRange<Context> for_range(ctx, in_grad->numel());
+    phi::funcs::ForRange<Context> for_range(ctx, in_grad->numel());
     TraceGradFunctor<T> functor(out_data,
                                 output_arr,
                                 input_arr,