[PTen]Migrate Dim and DDim from paddle::framework into pten namespace (#39053)

* Migrate Dim and DDim from paddle::framework into pten namespace * fix paddle::framework::Array * fix framework::Array

[PTen]Migrate Dim and DDim from paddle::framework into pten namespace (#39053)
* Migrate Dim and DDim from paddle::framework into pten namespace * fix paddle::framework::Array * fix framework::Array
4e23ba32 · Aurelius84 · GitHub · cf6516ff · 4e23ba32 · 4e23ba32
106 changed file
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -55,14 +55,6 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto

 cc_library(string_array SRCS string_array.cc DEPS utf8proc)

-cc_library(ddim SRCS ddim.cc DEPS eigen3 boost enforce)
-cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
-if(WITH_GPU)
-  nv_test(dim_test SRCS dim_test.cu DEPS ddim)
-elseif(WITH_ROCM)
-  hip_test(dim_test SRCS dim_test.cu DEPS ddim)
-endif()
-cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc)
 cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
 cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
 if(WITH_GPU)

--- a/paddle/fluid/framework/ddim.h
+++ b/paddle/fluid/framework/ddim.h
@@ -14,237 +14,13 @@ limitations under the License. */

 #pragma once

-#include <initializer_list>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/dim.h"
+#include "paddle/pten/core/ddim.h"

 namespace paddle {
 namespace framework {

-#define PADDLE_VISIT_DDIM_BASE(rank, callback) \
-  case (rank): {                               \
-    constexpr auto kRank = (rank);             \
-    return (callback);                         \
-  }
-
-#define PADDLE_VISIT_DDIM(rank, callback)                                  \
-  switch (rank) {                                                          \
-    PADDLE_VISIT_DDIM_BASE(0, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(1, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(2, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(3, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(4, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(5, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(6, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(7, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(8, callback);                                   \
-    PADDLE_VISIT_DDIM_BASE(9, callback);                                   \
-    default:                                                               \
-      PADDLE_THROW(platform::errors::Unimplemented(                        \
-          "Invalid dimension to be accessed. Now only supports access to " \
-          "dimension 0 to 9, but received dimension is %d.",               \
-          rank));                                                          \
-  }
-
-template <typename T1, typename T2>
-inline void dynamic_dim_assign(const T1* in, T2* out, int n) {
-  PADDLE_VISIT_DDIM(n, (static_dim_assign<kRank, T1, T2>(in, out)));
-}
-
-/**
- * \brief A dynamically sized dimension.
- *
- * The number of dimensions must be between [1, 9].
- */
-class DDim {
- public:
-  constexpr static int kMaxRank = 9;
-
-  DDim() : rank_(1) { dim_[0] = 0; }
-
-  DDim(const DDim& ddim) : dim_() { CopyFrom(ddim); }
-
-  DDim(const int* d, int n) : rank_(n) {
-    dynamic_dim_assign(d, dim_.GetMutable(), n);
-  }
-
-  DDim(const int64_t* d, int n) : rank_(n) {
-    dynamic_dim_assign(d, dim_.GetMutable(), n);
-  }
-
-  template <int D>
-  /*implicit*/ DDim(const Dim<D>& in) : rank_(D) {  // NOLINT
-    UnsafeCast<D>() = in;
-  }
-
-  /*implicit*/ DDim(std::initializer_list<int64_t> init_list)
-      : DDim(init_list.begin(), init_list.size()) {}
-
-  inline DDim& operator=(const DDim& ddim) { return CopyFrom(ddim); }
-
-  template <int D>
-  inline DDim& operator=(const Dim<D>& dim) {
-    rank_ = D;
-    UnsafeCast<D>() = dim;
-    return *this;
-  }
-
-  inline int64_t& operator[](int idx) { return dim_[idx]; }
-
-  inline int64_t operator[](int idx) const { return dim_[idx]; }
-
-  int64_t& at(int idx) {
-    PADDLE_ENFORCE_GE(idx, 0,
-                      platform::errors::InvalidArgument(
-                          "Invalid DDim index to be accessed. The valid index "
-                          "is between 0 and %d, but received index is %d.",
-                          rank_, idx));
-    PADDLE_ENFORCE_LT(idx, rank_,
-                      platform::errors::InvalidArgument(
-                          "Invalid DDim index to be accessed. The valid index "
-                          "is between 0 and %d, but received index is %d.",
-                          rank_, idx));
-    return dim_[idx];
-  }
-
-  int64_t at(int idx) const {
-    PADDLE_ENFORCE_GE(idx, 0,
-                      platform::errors::InvalidArgument(
-                          "Invalid DDim index to be accessed. The valid index "
-                          "is between 0 and %d, but received index is %d.",
-                          rank_, idx));
-    PADDLE_ENFORCE_LT(idx, rank_,
-                      platform::errors::InvalidArgument(
-                          "Invalid DDim index to be accessed. The valid index "
-                          "is between 0 and %d, but received index is %d.",
-                          rank_, idx));
-    return dim_[idx];
-  }
-
-  template <typename Visitor>
-  typename std::result_of<Visitor(Dim<0>&)>::type apply_visitor(
-      Visitor&& visitor) {
-    PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast<kRank>()));
-  }
-
-  template <typename Visitor>
-  typename std::result_of<Visitor(const Dim<0>&)>::type apply_visitor(
-      Visitor&& visitor) const {
-    PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast<kRank>()));
-  }
-
-  bool operator==(const DDim& d) const;
-
-  bool operator!=(const DDim& d) const;
-
-  inline const int64_t* Get() const { return dim_.Get(); }
-
-  inline int64_t* GetMutable() { return dim_.GetMutable(); }
-
-  inline int size() const { return rank_; }
-
-  std::string to_str() const;
-
-  DDim reshape(const std::vector<int>& shape) const;
-
-  DDim transpose(const std::vector<int>& axis) const;
-
- private:
-  template <int D>
-  inline Dim<D>& UnsafeCast() {
-    static_assert(D >= 0 && D <= kMaxRank, "Invalid rank");
-    auto* p = static_cast<void*>(&dim_);
-    return *reinterpret_cast<Dim<D>*>(p);
-  }
-
-  template <int D>
-  inline const Dim<D>& UnsafeCast() const {
-    static_assert(D >= 0 && D <= kMaxRank, "Invalid rank");
-    auto* p = static_cast<const void*>(&dim_);
-    return *reinterpret_cast<const Dim<D>*>(p);
-  }
-
-  inline DDim& CopyFrom(const DDim& ddim) {
-    PADDLE_VISIT_DDIM(ddim.rank_, (*this = ddim.UnsafeCast<kRank>()));
-  }
-
-  friend DDim stride(const DDim& ddim);
-  friend DDim stride_numel(const DDim& ddim);
-
- private:
-  Dim<kMaxRank> dim_;
-  int rank_;
-};
-
-#undef PADDLE_VISIT_DDIM_BASE
-#undef PADDLE_VISIT_DDIM
-
-/**
- * \brief Make a DDim from std::vector<int64_t>
- *
- * \param dims An vector of ints. Must be sized between [1, 9]
- */
-DDim make_ddim(const std::vector<int64_t>& dims);
-
-DDim make_ddim(const std::vector<int>& dims);
-
-/**
- * \brief Make a DDim from an initializer list
- *
- * \param dims An initializer list of ints. Must be sized between [1, 9]
- *
- */
-DDim make_ddim(std::initializer_list<int64_t> dims);
-
-template <typename T = int64_t>
-std::vector<T> vectorize(const DDim& ddim) {
-  std::vector<T> result(DDim::kMaxRank);
-  dynamic_dim_assign(ddim.Get(), result.data(), ddim.size());
-  result.resize(ddim.size());
-  return result;
-}
-
-int64_t product(const DDim& ddim);
-
-bool contain_unknown_dim(const DDim& ddim);
-
-/**
- * \brief Slice a ddim
- *
- * Slice dim with [begin, end).
- * e.g.  DDim d = make_ddim({1,2,3,4,5});
- *       slice_ddim(d, 1, 3); ====> {2,3}
- */
-DDim slice_ddim(const DDim& dim, int begin, int end);
-
-/**
- * \brief What is the length of this dimension?
- *
- * \param Dynamic dimension to inspect
- */
-
-int arity(const DDim& ddim);
-
-std::ostream& operator<<(std::ostream&, const DDim&);
-
-/**
-* \brief Flatten dim to 3d
-* e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6})
-*       flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30}
-*/
-DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims);
-
-// Reshape a tensor to a matrix. The matrix's first dimension(column length)
-// will be the product of tensor's first `num_col_dims` dimensions.
-DDim flatten_to_2d(const DDim& src, int num_col_dims);
-
-DDim flatten_to_1d(const DDim& src);
-
-DDim stride(const DDim& ddim);
+using DDim = pten::framework::DDim;
+using namespace pten::framework;  // NOLINT

-DDim stride_numel(const DDim& ddim);
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/dim.h
+++ b/paddle/fluid/framework/dim.h
@@ -12,89 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
-
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-#include "paddle/fluid/framework/array.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/dim.h"

 namespace paddle {
 namespace framework {
-
-// Statically sized, statically indexed dimension
 template <int D>
-class Dim : public Array<int64_t, D> {
- public:
-  static_assert(D >= 0, "D must be not less than 0");
-
-  static constexpr int kRank = D;
-  using BaseClass = Array<int64_t, D>;
-
-  inline Dim(int64_t head, const Dim<D - 1>& tail) {
-    (*this)[0] = head;
-    new (this->GetMutable() + 1) Dim<D - 1>(tail);
-  }
-
-  template <typename... Args>
-  HOSTDEVICE explicit Dim(int64_t head, Args... args)
-      : BaseClass(head, args...) {}
-
-  /** Construct a Dim with each dimension set to the given index */
-  HOSTDEVICE explicit Dim(int64_t idx) { this->Fill(idx); }
-
-  HOSTDEVICE Dim() = default;
-
-  HOST std::string to_string() const;
-};
-
-// Product of a Dim
-template <int D>
-HOSTDEVICE inline int64_t product(const Dim<D>& a) {
-  return UnrollProduct<D>::Run(a.Get());
-}
-
-/**
- * Helper function to create a Dim
- *
- * \param idxes The type of Dim constructed depends on the number of params
- *
- */
-
-template <typename... Args>
-HOSTDEVICE inline Dim<sizeof...(Args)> make_dim(Args... idxes) {
-  return Dim<sizeof...(Args)>(idxes...);
-}
-
-// Allows us to output a Dim
-template <int D>
-inline std::ostream& operator<<(std::ostream& os, const Dim<D>& d) {
-  os << d[0];
-  for (int i = 1; i < D; ++i) {
-    os << ", " << d[i];
-  }
-  return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os, const Dim<0>& d) {
-  return os;
-}
-
-template <int D>
-HOST std::string Dim<D>::to_string() const {
-  std::stringstream stream;
-  stream << *this;
-  return stream.str();
-}
-
-template <int D, typename T1, typename T2>
-inline void static_dim_assign(const T1* in, T2* out) {
-  UnrollAssign<D>::Run(in, out);
-}
+using Dim = pten::framework::Dim<D>;
+using namespace pten::framework;  // NOLINT

 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/operators/amp/check_finite_and_unscale_op.h
+++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op.h
@@ -18,7 +18,7 @@ limitations under the License. */
 #include <vector>

 #include "paddle/fluid/operators/isfinite_op.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/amp/update_loss_scaling_op.h
+++ b/paddle/fluid/operators/amp/update_loss_scaling_op.h
@@ -24,7 +24,7 @@
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/errors.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/bce_loss_op.cu
+++ b/paddle/fluid/operators/bce_loss_op.cu
@@ -17,7 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/bernoulli_op.h
+++ b/paddle/fluid/operators/bernoulli_op.h
@@ -14,7 +14,7 @@ limitations under the License. */

 #pragma once
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/bilateral_slice_op.h
+++ b/paddle/fluid/operators/bilateral_slice_op.h
@@ -13,7 +13,7 @@
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/bincount_op.cu
+++ b/paddle/fluid/operators/bincount_op.cu
@@ -16,7 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/bincount_op.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/deformable_conv_func.h
+++ b/paddle/fluid/operators/deformable_conv_func.h
@@ -24,7 +24,7 @@
 #pragma once
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 template <typename T>
 HOSTDEVICE T DmcnGetGradientWeight(T argmax_h, T argmax_w, const int h,

--- a/paddle/fluid/operators/dequantize_log_op.cu
+++ b/paddle/fluid/operators/dequantize_log_op.cu
@@ -15,7 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/dequantize_log_op.h"
 #include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@@ -16,7 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/detection/box_clip_op.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu
+++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu
@@ -14,7 +14,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/detection/sigmoid_focal_loss_op.h"
 #include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/detection/yolo_box_op.h
+++ b/paddle/fluid/operators/detection/yolo_box_op.h
@@ -14,7 +14,7 @@
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/distribution_helper.h
+++ b/paddle/fluid/operators/distribution_helper.h
@@ -26,7 +26,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/for_range.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 #if !defined(_WIN32)
 #define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)

--- a/paddle/fluid/operators/elementwise/elementwise_functor.h
+++ b/paddle/fluid/operators/elementwise/elementwise_functor.h
@@ -14,8 +14,8 @@ limitations under the License. */

 #pragma once

-#include "paddle/fluid/framework/array.h"
 #include "paddle/fluid/platform/complex.h"
+#include "paddle/pten/core/array.h"
 #include "paddle/pten/kernels/funcs/elementwise_functor.h"

 namespace paddle {
@@ -92,12 +92,12 @@ using Complex = paddle::platform::complex<T>;

 template <typename InT, typename OutT>
 struct DivGradXYFunctor {
-  inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT a,
-                                                                 const InT b,
-                                                                 const InT c) {
+  inline HOSTDEVICE pten::framework::Array<OutT, 2> operator()(const InT a,
+                                                               const InT b,
+                                                               const InT c) {
    // dx = dout / y
    // dy = - dout * out / y
-    paddle::framework::Array<OutT, 2> outs;
+    pten::framework::Array<OutT, 2> outs;
    outs[0] = a / c;
    outs[1] = -a * b / c;
    return outs;
@@ -106,9 +106,9 @@ struct DivGradXYFunctor {

 template <typename InT, typename OutT>
 struct DivGradXYFunctor<Complex<InT>, Complex<OutT>> {
-  inline HOSTDEVICE paddle::framework::Array<Complex<OutT>, 2> operator()(
+  inline HOSTDEVICE pten::framework::Array<Complex<OutT>, 2> operator()(
      const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
-    paddle::framework::Array<Complex<OutT>, 2> outs;
+    pten::framework::Array<Complex<OutT>, 2> outs;
    Complex<InT> c_conj(c.real, -c.imag);
    Complex<InT> out_div_c_conj((b / c).real, -(b / c).imag);
    outs[0] = a / c_conj;
@@ -247,9 +247,9 @@ struct MinGradYFunctor {

 template <typename InT, typename OutT>
 struct MinGradXYFunctor {
-  inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(
+  inline HOSTDEVICE pten::framework::Array<OutT, 2> operator()(
      const InT& x, const InT& y, const InT& dout) {
-    paddle::framework::Array<OutT, 2> outs;
+    pten::framework::Array<OutT, 2> outs;
    // dx = dout * (x < y)
    outs[0] = static_cast<OutT>(dout * static_cast<InT>(x < y));
    // dy = dout * (x >= y)
@@ -273,10 +273,10 @@ struct MulGradFunctor<Complex<T>> {

 template <typename InT, typename OutT>
 struct MulGradXYFunctor {
-  inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT a,
-                                                                 const InT b,
-                                                                 const InT c) {
-    paddle::framework::Array<OutT, 2> outs;
+  inline HOSTDEVICE pten::framework::Array<OutT, 2> operator()(const InT a,
+                                                               const InT b,
+                                                               const InT c) {
+    pten::framework::Array<OutT, 2> outs;
    // dx = dout * y
    outs[0] = a * b;
    // dy = dout * x
@@ -287,9 +287,9 @@ struct MulGradXYFunctor {

 template <typename InT, typename OutT>
 struct MulGradXYFunctor<Complex<InT>, Complex<OutT>> {
-  inline HOSTDEVICE paddle::framework::Array<Complex<OutT>, 2> operator()(
+  inline HOSTDEVICE pten::framework::Array<Complex<OutT>, 2> operator()(
      const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
-    paddle::framework::Array<Complex<OutT>, 2> outs;
+    pten::framework::Array<Complex<OutT>, 2> outs;
    // dx = dout * y
    Complex<InT> b_conj(b.real, -b.imag);
    outs[0] = a * b_conj;
@@ -316,9 +316,9 @@ struct MaxGradYFunctor {

 template <typename InT, typename OutT>
 struct MaxGradXYFunctor {
-  inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(
+  inline HOSTDEVICE pten::framework::Array<OutT, 2> operator()(
      const InT& x, const InT& y, const InT& dout) {
-    paddle::framework::Array<OutT, 2> outs;
+    pten::framework::Array<OutT, 2> outs;
    // dx = dout * (x > y)
    outs[0] = static_cast<OutT>(dout * static_cast<InT>(x > y));
    // dy = dout * (x <= y)

--- a/paddle/fluid/operators/fake_quantize_op.h
+++ b/paddle/fluid/operators/fake_quantize_op.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/operators/math/blas.h"
-#include "paddle/fluid/platform/hostdevice.h"
 #include "paddle/fluid/platform/transform.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/grid_sampler_op.h
+++ b/paddle/fluid/operators/grid_sampler_op.h
@@ -20,7 +20,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/gather.h"
 #include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/histogram_op.cu
+++ b/paddle/fluid/operators/histogram_op.cu
@@ -16,7 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/histogram_op.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/huber_loss_op.h
+++ b/paddle/fluid/operators/huber_loss_op.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/interpolate_op.h
+++ b/paddle/fluid/operators/interpolate_op.h
@@ -15,7 +15,7 @@
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/interpolate_v2_op.h
+++ b/paddle/fluid/operators/interpolate_v2_op.h
@@ -15,7 +15,7 @@
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/kernel_primitives/datamover_primitives.h
+++ b/paddle/fluid/operators/kernel_primitives/datamover_primitives.h
@@ -20,6 +20,7 @@
 #ifdef PADDLE_WITH_HIP
 #include <hip/hip_fp16.h>
 #endif
+#include "paddle/pten/core/ddim.h"

 namespace paddle {
 namespace operators {
@@ -85,7 +86,7 @@ struct FastDivMod {
 template <int kDims>
 struct BroadcastConfig {
  FastDivMod divmoders[kDims];
-  uint32_t strides[framework::DDim::kMaxRank];
+  uint32_t strides[pten::framework::DDim::kMaxRank];
  HOSTDEVICE BroadcastConfig() {}

  HOSTDEVICE BroadcastConfig(const std::vector<int64_t>& out_dims,

--- a/paddle/fluid/operators/kldiv_loss_op.h
+++ b/paddle/fluid/operators/kldiv_loss_op.h
@@ -13,7 +13,7 @@
 #include <string>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/lstm_unit_op.cu
+++ b/paddle/fluid/operators/lstm_unit_op.cu
@@ -19,7 +19,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/cross_entropy_op.h"
 #include "paddle/fluid/operators/lstm_unit_op.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math.h
+++ b/paddle/fluid/operators/math.h
@@ -15,7 +15,7 @@
 #pragma once

 #include "paddle/fluid/platform/float16.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 #include "math.h"  // NOLINT


--- a/paddle/fluid/operators/math/algorithm.h
+++ b/paddle/fluid/operators/math/algorithm.h
@@ -18,7 +18,7 @@
 #include <cstdint>  // for int64_t
 #include <numeric>

-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/complex_functors.h
+++ b/paddle/fluid/operators/math/complex_functors.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <type_traits>

 #include "paddle/fluid/platform/complex.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/cos_sim_functor.h
+++ b/paddle/fluid/operators/math/cos_sim_functor.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <stdlib.h>

 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/cross_entropy.h
+++ b/paddle/fluid/operators/math/cross_entropy.h
@@ -16,7 +16,7 @@ limitations under the License. */
 #include <limits>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/depthwise_conv.h
+++ b/paddle/fluid/operators/math/depthwise_conv.h
@@ -16,7 +16,7 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/detail/activation_functions.h
+++ b/paddle/fluid/operators/math/detail/activation_functions.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <stdexcept>
 #include <string>
 #include "paddle/fluid/platform/cpu_info.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/detail/gru_kernel.h
+++ b/paddle/fluid/operators/math/detail/gru_kernel.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 #include <type_traits>
 #include "paddle/fluid/operators/math/detail/activation_functions.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 // TODO(guosheng): refine code style in gru_kernel
 namespace paddle {

--- a/paddle/fluid/operators/math/detail/lstm_kernel.h
+++ b/paddle/fluid/operators/math/detail/lstm_kernel.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 #include <type_traits>
 #include "paddle/fluid/operators/math/detail/activation_functions.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/maxouting.h
+++ b/paddle/fluid/operators/math/maxouting.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/hostdevice.h"
 #include "paddle/fluid/platform/macros.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/math/pooling.h
+++ b/paddle/fluid/operators/math/pooling.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/hostdevice.h"
 #include "paddle/fluid/platform/macros.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/modified_huber_loss_op.cu
+++ b/paddle/fluid/operators/modified_huber_loss_op.cu
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <thrust/tuple.h>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/modified_huber_loss_op.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/modified_huber_loss_op.h
+++ b/paddle/fluid/operators/modified_huber_loss_op.h
@@ -16,7 +16,7 @@ limitations under the License. */

 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/multinomial_op.h
+++ b/paddle/fluid/operators/multinomial_op.h
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/nll_loss_op.cu
+++ b/paddle/fluid/operators/nll_loss_op.cu
@@ -14,7 +14,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/nll_loss_op.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/roll_op.cu
+++ b/paddle/fluid/operators/roll_op.cu
@@ -13,11 +13,11 @@
 // limitations under the License.

 #pragma once
-#include "paddle/fluid/framework/array.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/roll_op.h"
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
+#include "paddle/pten/core/array.h"

 namespace paddle {
 namespace operators {
@@ -28,9 +28,9 @@ using LoDTensor = framework::LoDTensor;

 template <typename T, size_t Rank>
 __global__ void RollCudaKernel(const T* input, T* output, int64_t N,
-                               paddle::framework::Array<int64_t, Rank> shifts,
-                               paddle::framework::Array<int64_t, Rank> strides,
-                               paddle::framework::Array<int64_t, Rank> sizes) {
+                               pten::framework::Array<int64_t, Rank> shifts,
+                               pten::framework::Array<int64_t, Rank> strides,
+                               pten::framework::Array<int64_t, Rank> sizes) {
  int64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
  if (idx >= N) {
    return;
@@ -101,9 +101,9 @@ class RollKernel<platform::CUDADeviceContext, T>

 #define CALL_ROLL_CUDA_KERNEL(N)                                               \
  case N: {                                                                    \
-    paddle::framework::Array<int64_t, N> _strides;                             \
-    paddle::framework::Array<int64_t, N> _shifts;                              \
-    paddle::framework::Array<int64_t, N> _sizes;                               \
+    pten::framework::Array<int64_t, N> _strides;                               \
+    pten::framework::Array<int64_t, N> _shifts;                                \
+    pten::framework::Array<int64_t, N> _sizes;                                 \
    for (size_t idx = 0; idx < N; ++idx) {                                     \
      _strides[idx] = strides[idx];                                            \
      _shifts[idx] = shifts[idx];                                              \

--- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
+++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
@@ -22,7 +22,7 @@ namespace cub = hipcub;
 #include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h"
 #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/smooth_l1_loss_op.h
+++ b/paddle/fluid/operators/smooth_l1_loss_op.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/unstack_op.h
+++ b/paddle/fluid/operators/unstack_op.h
@@ -20,7 +20,6 @@ limitations under the License. */

 #if defined(__NVCC__) || defined(__HIPCC__)
 #include <thrust/device_vector.h>
-#include "paddle/fluid/framework/array.h"
 #endif

 namespace paddle {

--- a/paddle/fluid/platform/aligned_vector.h
+++ b/paddle/fluid/platform/aligned_vector.h
@@ -14,7 +14,7 @@ limitations under the License. */

 #pragma once

-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace paddle {
 namespace platform {

--- a/paddle/fluid/platform/eigen_ext.h
+++ b/paddle/fluid/platform/eigen_ext.h
@@ -17,7 +17,7 @@
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/float16.h"
-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

 #include "unsupported/Eigen/CXX11/Tensor"


--- a/paddle/fluid/platform/transform.h
+++ b/paddle/fluid/platform/transform.h
@@ -19,8 +19,8 @@ limitations under the License. */

 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/hostdevice.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/pten/core/hostdevice.h"

 #if defined(__NVCC__) || defined(__HIPCC__)
 #include <thrust/execution_policy.h>

--- a/paddle/fluid/platform/transform_test.cu
+++ b/paddle/fluid/platform/transform_test.cu
@@ -15,8 +15,8 @@ limitations under the License. */
 #include <gtest/gtest.h>
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/memory/memory.h"
-#include "paddle/fluid/platform/hostdevice.h"
 #include "paddle/fluid/platform/transform.h"
+#include "paddle/pten/core/hostdevice.h"

 template <typename T>
 class Scale {

--- a/paddle/pten/api/include/tensor.h
+++ b/paddle/pten/api/include/tensor.h
@@ -42,12 +42,12 @@ class DenseTensor;

 namespace pten {
 class TensorBase;
+namespace framework {
+class DDim;
+}  // namespace framework
 }  // namespace pten

 namespace paddle {
-namespace framework {
-class DDim;
-}

 namespace experimental {

@@ -159,9 +159,9 @@ class PADDLE_API Tensor final {
  /**
   * @brief Return the dimensions of Tensor.
   *
-   * @return paddle::framework::DDim
+   * @return pten::framework::DDim
   */
-  paddle::framework::DDim dims() const;
+  pten::framework::DDim dims() const;

  /**
   * @brief Return the shape (dimensions) of Tensor.

--- a/paddle/pten/api/lib/tensor.cc
+++ b/paddle/pten/api/lib/tensor.cc
@@ -47,13 +47,13 @@ limitations under the License. */
 * In the future, the necessary components will be moved to the this library,
 * or the corresponding components will be re-implemented.
 */
-#include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/stream/cuda_stream.h"
 #include "paddle/pten/common/complex.h"
 #include "paddle/pten/common/float16.h"
+#include "paddle/pten/core/ddim.h"

 namespace paddle {
 namespace experimental {
@@ -94,10 +94,10 @@ int64_t Tensor::numel() const { return impl_->numel(); }

 int64_t Tensor::size() const { return impl_->numel(); }

-paddle::framework::DDim Tensor::dims() const { return impl_->dims(); }
+pten::framework::DDim Tensor::dims() const { return impl_->dims(); }

 std::vector<int64_t> Tensor::shape() const {
-  return paddle::framework::vectorize<int64_t>(impl_->dims());
+  return pten::framework::vectorize<int64_t>(impl_->dims());
 }

 void Tensor::reshape(const std::vector<int64_t> &shape) {

--- a/paddle/pten/core/CMakeLists.txt
+++ b/paddle/pten/core/CMakeLists.txt
@@ -15,6 +15,15 @@ cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
 cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base)
 cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base )

+cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc)
+cc_library(ddim SRCS ddim.cc DEPS eigen3 boost enforce)
+cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
+if(WITH_GPU)
+  nv_test(dim_test SRCS dim_test.cu DEPS ddim)
+elseif(WITH_ROCM)
+  hip_test(dim_test SRCS dim_test.cu DEPS ddim)
+endif()
+
 # Will remove once we implemented MKLDNN_Tensor
 if(WITH_MKLDNN)
    add_dependencies(dense_tensor mkldnn)

--- a/paddle/fluid/framework/array.h
+++ b/paddle/fluid/framework/array.h
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,10 +15,12 @@
 #pragma once

 #include <cstdint>
-#include "paddle/fluid/framework/unroll_array_ops.h"
+#include "paddle/pten/core/unroll_array_ops.h"
+// TODO(paddle-dev): Need to modify into pten/core/enforce.h
 #include "paddle/fluid/platform/enforce.h"

-namespace paddle {
+namespace pten {
+namespace platform = paddle::platform;
 namespace framework {

 template <typename T, size_t N>
@@ -146,4 +148,4 @@ class Array<T, 0> {
 };

 }  // namespace framework
-}  // namespace paddle
+}  // namespace pten
--- a/paddle/fluid/framework/ddim.cc
+++ b/paddle/fluid/framework/ddim.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/ddim.h"
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/core/ddim.h"
 #include <set>
-#include "paddle/fluid/platform/enforce.h"

-namespace paddle {
+namespace pten {
+namespace platform = paddle::platform;
 namespace framework {

 DDim make_ddim(std::initializer_list<int64_t> dims) {
@@ -82,10 +82,13 @@ bool contain_unknown_dim(const DDim& ddim) {

 DDim slice_ddim(const DDim& dim, int begin, int end) {
  PADDLE_ENFORCE_EQ(
-      (begin >= 0 && end <= dim.size()), true,
+      (begin >= 0 && end <= dim.size()),
+      true,
      platform::errors::InvalidArgument(
-          "[begin(%d), end(%d)) must be inside [0, %d) in ddim slice.", begin,
-          end, dim.size()));
+          "[begin(%d), end(%d)) must be inside [0, %d) in ddim slice.",
+          begin,
+          end,
+          dim.size()));
  // Constructor of DDim would check whether end - begin is valid
  return DDim(dim.Get() + begin, end - begin);
 }
@@ -108,27 +111,34 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
 }

 DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims) {
-  PADDLE_ENFORCE_GE(src.size(), 3,
+  PADDLE_ENFORCE_GE(src.size(),
+                    3,
                    platform::errors::InvalidArgument(
                        "The rank of src dim should be at least 3 "
                        "in flatten_to_3d, but received %d.",
                        src.size()));
-  PADDLE_ENFORCE_EQ((num_row_dims >= 1 && num_row_dims < src.size()), true,
+  PADDLE_ENFORCE_EQ((num_row_dims >= 1 && num_row_dims < src.size()),
+                    true,
                    platform::errors::InvalidArgument(
                        "The num_row_dims should be inside [1, %d] "
                        "in flatten_to_3d, but received %d.",
-                        src.size() - 1, num_row_dims));
-  PADDLE_ENFORCE_EQ((num_col_dims >= 2 && num_col_dims <= src.size()), true,
+                        src.size() - 1,
+                        num_row_dims));
+  PADDLE_ENFORCE_EQ((num_col_dims >= 2 && num_col_dims <= src.size()),
+                    true,
                    platform::errors::InvalidArgument(
                        "The num_col_dims should be inside [2, %d] "
                        "in flatten_to_3d, but received %d.",
-                        src.size(), num_col_dims));
+                        src.size(),
+                        num_col_dims));
  PADDLE_ENFORCE_GE(
-      num_col_dims, num_row_dims,
+      num_col_dims,
+      num_row_dims,
      platform::errors::InvalidArgument(
          "The num_row_dims should be less than num_col_dims in flatten_to_3d,"
          "but received num_row_dims = %d, num_col_dims = %d.",
-          num_row_dims, num_col_dims));
+          num_row_dims,
+          num_col_dims));

  return DDim({product(slice_ddim(src, 0, num_row_dims)),
               product(slice_ddim(src, num_row_dims, num_col_dims)),
@@ -169,13 +179,16 @@ DDim DDim::reshape(const std::vector<int>& shape) const {
  out_dims.rank_ = shape.size();
  for (size_t i = 0; i < shape.size(); ++i) {
    if (shape[i] == copy_dim_val) {
-      PADDLE_ENFORCE_LT(static_cast<int>(i), in_dims.size(),
+      PADDLE_ENFORCE_LT(static_cast<int>(i),
+                        in_dims.size(),
                        platform::errors::InvalidArgument(
                            "Index %d of shape under which the value of 0 "
                            "is stored, must be lower than the number of "
                            "old dimensions. But received shape[%d] = 0, "
                            "dimensions = %d, shape = [%s].",
-                            i, in_dims.size(), in_dims));
+                            i,
+                            in_dims.size(),
+                            in_dims));
      out_dims[i] = in_dims[i];
    } else {
      out_dims[i] = shape[i];
@@ -190,19 +203,23 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
  size_t axis_size = axis.size();

  auto axis_set = std::set<int>(axis.begin(), axis.end());
-  PADDLE_ENFORCE_EQ(axis_set.size(), axis_size,
+  PADDLE_ENFORCE_EQ(axis_set.size(),
+                    axis_size,
                    platform::errors::InvalidArgument(
                        "In an axis array, elements must be unique."));

  PADDLE_ENFORCE_EQ(
-      in_rank, axis_size,
+      in_rank,
+      axis_size,
      platform::errors::InvalidArgument("The input dimension's size "
                                        "should be equal to the axis's size. "
                                        "But received dimension is %d, "
                                        "axis's size is %d",
-                                        in_rank, axis_size));
+                                        in_rank,
+                                        axis_size));

-  PADDLE_ENFORCE_LT(*std::max_element(axis.begin(), axis.end()), axis_size,
+  PADDLE_ENFORCE_LT(*std::max_element(axis.begin(), axis.end()),
+                    axis_size,
                    platform::errors::InvalidArgument(
                        "Axis values must be ranging from 0 to (dims - 1)."));

@@ -214,4 +231,4 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
 }

 }  // namespace framework
-}  // namespace paddle
+}  // namespace pten
\ No newline at end of file
--- a/paddle/pten/core/ddim.h
+++ b/paddle/pten/core/ddim.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <initializer_list>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "paddle/pten/core/dim.h"
+
+namespace pten {
+namespace platform = paddle::platform;
+namespace framework {
+
+#define PADDLE_VISIT_DDIM_BASE(rank, callback) \
+  case (rank): {                               \
+    constexpr auto kRank = (rank);             \
+    return (callback);                         \
+  }
+
+#define PADDLE_VISIT_DDIM(rank, callback)                                  \
+  switch (rank) {                                                          \
+    PADDLE_VISIT_DDIM_BASE(0, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(1, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(2, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(3, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(4, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(5, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(6, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(7, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(8, callback);                                   \
+    PADDLE_VISIT_DDIM_BASE(9, callback);                                   \
+    default:                                                               \
+      PADDLE_THROW(platform::errors::Unimplemented(                        \
+          "Invalid dimension to be accessed. Now only supports access to " \
+          "dimension 0 to 9, but received dimension is %d.",               \
+          rank));                                                          \
+  }
+
+template <typename T1, typename T2>
+inline void dynamic_dim_assign(const T1* in, T2* out, int n) {
+  PADDLE_VISIT_DDIM(n, (static_dim_assign<kRank, T1, T2>(in, out)));
+}
+
+/**
+ * \brief A dynamically sized dimension.
+ *
+ * The number of dimensions must be between [1, 9].
+ */
+class DDim {
+ public:
+  constexpr static int kMaxRank = 9;
+
+  DDim() : rank_(1) { dim_[0] = 0; }
+
+  DDim(const DDim& ddim) : dim_() { CopyFrom(ddim); }
+
+  DDim(const int* d, int n) : rank_(n) {
+    dynamic_dim_assign(d, dim_.GetMutable(), n);
+  }
+
+  DDim(const int64_t* d, int n) : rank_(n) {
+    dynamic_dim_assign(d, dim_.GetMutable(), n);
+  }
+
+  template <int D>
+  /*implicit*/ DDim(const Dim<D>& in) : rank_(D) {  // NOLINT
+    UnsafeCast<D>() = in;
+  }
+
+  /*implicit*/ DDim(std::initializer_list<int64_t> init_list)
+      : DDim(init_list.begin(), init_list.size()) {}
+
+  inline DDim& operator=(const DDim& ddim) { return CopyFrom(ddim); }
+
+  template <int D>
+  inline DDim& operator=(const Dim<D>& dim) {
+    rank_ = D;
+    UnsafeCast<D>() = dim;
+    return *this;
+  }
+
+  inline int64_t& operator[](int idx) { return dim_[idx]; }
+
+  inline int64_t operator[](int idx) const { return dim_[idx]; }
+
+  int64_t& at(int idx) {
+    PADDLE_ENFORCE_GE(idx,
+                      0,
+                      platform::errors::InvalidArgument(
+                          "Invalid DDim index to be accessed. The valid index "
+                          "is between 0 and %d, but received index is %d.",
+                          rank_,
+                          idx));
+    PADDLE_ENFORCE_LT(idx,
+                      rank_,
+                      platform::errors::InvalidArgument(
+                          "Invalid DDim index to be accessed. The valid index "
+                          "is between 0 and %d, but received index is %d.",
+                          rank_,
+                          idx));
+    return dim_[idx];
+  }
+
+  int64_t at(int idx) const {
+    PADDLE_ENFORCE_GE(idx,
+                      0,
+                      platform::errors::InvalidArgument(
+                          "Invalid DDim index to be accessed. The valid index "
+                          "is between 0 and %d, but received index is %d.",
+                          rank_,
+                          idx));
+    PADDLE_ENFORCE_LT(idx,
+                      rank_,
+                      platform::errors::InvalidArgument(
+                          "Invalid DDim index to be accessed. The valid index "
+                          "is between 0 and %d, but received index is %d.",
+                          rank_,
+                          idx));
+    return dim_[idx];
+  }
+
+  template <typename Visitor>
+  typename std::result_of<Visitor(Dim<0>&)>::type apply_visitor(
+      Visitor&& visitor) {
+    PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast<kRank>()));
+  }
+
+  template <typename Visitor>
+  typename std::result_of<Visitor(const Dim<0>&)>::type apply_visitor(
+      Visitor&& visitor) const {
+    PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast<kRank>()));
+  }
+
+  bool operator==(const DDim& d) const;
+
+  bool operator!=(const DDim& d) const;
+
+  inline const int64_t* Get() const { return dim_.Get(); }
+
+  inline int64_t* GetMutable() { return dim_.GetMutable(); }
+
+  inline int size() const { return rank_; }
+
+  std::string to_str() const;
+
+  DDim reshape(const std::vector<int>& shape) const;
+
+  DDim transpose(const std::vector<int>& axis) const;
+
+ private:
+  template <int D>
+  inline Dim<D>& UnsafeCast() {
+    static_assert(D >= 0 && D <= kMaxRank, "Invalid rank");
+    auto* p = static_cast<void*>(&dim_);
+    return *reinterpret_cast<Dim<D>*>(p);
+  }
+
+  template <int D>
+  inline const Dim<D>& UnsafeCast() const {
+    static_assert(D >= 0 && D <= kMaxRank, "Invalid rank");
+    auto* p = static_cast<const void*>(&dim_);
+    return *reinterpret_cast<const Dim<D>*>(p);
+  }
+
+  inline DDim& CopyFrom(const DDim& ddim) {
+    PADDLE_VISIT_DDIM(ddim.rank_, (*this = ddim.UnsafeCast<kRank>()));
+  }
+
+  friend DDim stride(const DDim& ddim);
+  friend DDim stride_numel(const DDim& ddim);
+
+ private:
+  Dim<kMaxRank> dim_;
+  int rank_;
+};
+
+#undef PADDLE_VISIT_DDIM_BASE
+#undef PADDLE_VISIT_DDIM
+
+/**
+ * \brief Make a DDim from std::vector<int64_t>
+ *
+ * \param dims An vector of ints. Must be sized between [1, 9]
+ */
+DDim make_ddim(const std::vector<int64_t>& dims);
+
+DDim make_ddim(const std::vector<int>& dims);
+
+/**
+ * \brief Make a DDim from an initializer list
+ *
+ * \param dims An initializer list of ints. Must be sized between [1, 9]
+ *
+ */
+DDim make_ddim(std::initializer_list<int64_t> dims);
+
+template <typename T = int64_t>
+std::vector<T> vectorize(const DDim& ddim) {
+  std::vector<T> result(DDim::kMaxRank);
+  dynamic_dim_assign(ddim.Get(), result.data(), ddim.size());
+  result.resize(ddim.size());
+  return result;
+}
+
+int64_t product(const DDim& ddim);
+
+bool contain_unknown_dim(const DDim& ddim);
+
+/**
+ * \brief Slice a ddim
+ *
+ * Slice dim with [begin, end).
+ * e.g.  DDim d = make_ddim({1,2,3,4,5});
+ *       slice_ddim(d, 1, 3); ====> {2,3}
+ */
+DDim slice_ddim(const DDim& dim, int begin, int end);
+
+/**
+ * \brief What is the length of this dimension?
+ *
+ * \param Dynamic dimension to inspect
+ */
+
+int arity(const DDim& ddim);
+
+std::ostream& operator<<(std::ostream&, const DDim&);
+
+/**
+* \brief Flatten dim to 3d
+* e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6})
+*       flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30}
+*/
+DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims);
+
+// Reshape a tensor to a matrix. The matrix's first dimension(column length)
+// will be the product of tensor's first `num_col_dims` dimensions.
+DDim flatten_to_2d(const DDim& src, int num_col_dims);
+
+DDim flatten_to_1d(const DDim& src);
+
+DDim stride(const DDim& ddim);
+
+DDim stride_numel(const DDim& ddim);
+}  // namespace framework
+}  // namespace pten
--- a/paddle/fluid/framework/ddim_test.cc
+++ b/paddle/fluid/framework/ddim_test.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.

-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
 #include <sstream>

 #include "gtest/gtest.h"
-#include "paddle/fluid/framework/ddim.h"
+#include "paddle/pten/core/ddim.h"

 TEST(DDim, Equality) {
  // construct a DDim from an initialization list
-  paddle::framework::DDim ddim = paddle::framework::make_ddim({9, 1, 5});
+  pten::framework::DDim ddim = pten::framework::make_ddim({9, 1, 5});
  EXPECT_EQ(ddim[0], 9);
  EXPECT_EQ(ddim[1], 1);
  EXPECT_EQ(ddim[2], 5);

  // construct a DDim from a vector
  std::vector<int64_t> vec({9, 1, 5});
-  paddle::framework::DDim vddim = paddle::framework::make_ddim(vec);
+  pten::framework::DDim vddim = pten::framework::make_ddim(vec);
  EXPECT_EQ(ddim[0], 9);
  EXPECT_EQ(ddim[1], 1);
  EXPECT_EQ(ddim[2], 5);
@@ -37,35 +38,33 @@ TEST(DDim, Equality) {
  EXPECT_EQ(ddim[0], 6);

  // vectorize a DDim
-  std::vector<int64_t> res_vec = paddle::framework::vectorize(vddim);
+  std::vector<int64_t> res_vec = pten::framework::vectorize(vddim);
  EXPECT_EQ(res_vec[0], 9);
  EXPECT_EQ(res_vec[1], 1);
  EXPECT_EQ(res_vec[2], 5);
-  paddle::framework::Dim<3> d(3, 2, 1);
-  res_vec = paddle::framework::vectorize(paddle::framework::DDim(d));
+  pten::framework::Dim<3> d(3, 2, 1);
+  res_vec = pten::framework::vectorize(pten::framework::DDim(d));
  EXPECT_EQ(res_vec[0], 3);
  EXPECT_EQ(res_vec[1], 2);
  EXPECT_EQ(res_vec[2], 1);

  // arity of a DDim
-  EXPECT_EQ(paddle::framework::arity(ddim), 3);
+  EXPECT_EQ(pten::framework::arity(ddim), 3);
  EXPECT_EQ(ddim.size(), 3);

  // product of a DDim
-  EXPECT_EQ(paddle::framework::product(vddim), 45);
-  EXPECT_EQ(
-      paddle::framework::product(paddle::framework::make_ddim({3, 2, 5, 3})),
-      90);
+  EXPECT_EQ(pten::framework::product(vddim), 45);
+  EXPECT_EQ(pten::framework::product(pten::framework::make_ddim({3, 2, 5, 3})),
+            90);

  // slice a DDim
-  paddle::framework::DDim ddim2 =
-      paddle::framework::make_ddim({1, 2, 3, 4, 5, 6});
-  paddle::framework::DDim ss = paddle::framework::slice_ddim(ddim2, 2, 5);
+  pten::framework::DDim ddim2 = pten::framework::make_ddim({1, 2, 3, 4, 5, 6});
+  pten::framework::DDim ss = pten::framework::slice_ddim(ddim2, 2, 5);
  EXPECT_EQ(arity(ss), 3);
  EXPECT_EQ(ss[0], 3);
  EXPECT_EQ(ss[1], 4);
  EXPECT_EQ(ss[2], 5);
-  paddle::framework::DDim ss2 = paddle::framework::slice_ddim(ddim2, 0, 6);
+  pten::framework::DDim ss2 = pten::framework::slice_ddim(ddim2, 0, 6);
  EXPECT_EQ(arity(ss2), 6);
  EXPECT_EQ(ss2[0], 1);
  EXPECT_EQ(ss2[1], 2);
@@ -78,7 +77,7 @@ TEST(DDim, Equality) {
 TEST(DDim, Print) {
  // print a DDim
  std::stringstream ss;
-  paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 3, 4});
+  pten::framework::DDim ddim = pten::framework::make_ddim({2, 3, 4});
  ss << ddim;
  EXPECT_EQ("2, 3, 4", ss.str());
 }
--- a/paddle/pten/core/dim.h
+++ b/paddle/pten/core/dim.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+
+#include "paddle/pten/core/array.h"
+#include "paddle/pten/core/hostdevice.h"
+
+namespace pten {
+namespace framework {
+
+// Statically sized, statically indexed dimension
+template <int D>
+class Dim : public Array<int64_t, D> {
+ public:
+  static_assert(D >= 0, "D must be not less than 0");
+
+  static constexpr int kRank = D;
+  using BaseClass = Array<int64_t, D>;
+
+  inline Dim(int64_t head, const Dim<D - 1>& tail) {
+    (*this)[0] = head;
+    new (this->GetMutable() + 1) Dim<D - 1>(tail);
+  }
+
+  template <typename... Args>
+  HOSTDEVICE explicit Dim(int64_t head, Args... args)
+      : BaseClass(head, args...) {}
+
+  /** Construct a Dim with each dimension set to the given index */
+  HOSTDEVICE explicit Dim(int64_t idx) { this->Fill(idx); }
+
+  HOSTDEVICE Dim() = default;
+
+  HOST std::string to_string() const;
+};
+
+// Product of a Dim
+template <int D>
+HOSTDEVICE inline int64_t product(const Dim<D>& a) {
+  return UnrollProduct<D>::Run(a.Get());
+}
+
+/**
+ * Helper function to create a Dim
+ *
+ * \param idxes The type of Dim constructed depends on the number of params
+ *
+ */
+
+template <typename... Args>
+HOSTDEVICE inline Dim<sizeof...(Args)> make_dim(Args... idxes) {
+  return Dim<sizeof...(Args)>(idxes...);
+}
+
+// Allows us to output a Dim
+template <int D>
+inline std::ostream& operator<<(std::ostream& os, const Dim<D>& d) {
+  os << d[0];
+  for (int i = 1; i < D; ++i) {
+    os << ", " << d[i];
+  }
+  return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Dim<0>& d) {
+  return os;
+}
+
+template <int D>
+HOST std::string Dim<D>::to_string() const {
+  std::stringstream stream;
+  stream << *this;
+  return stream.str();
+}
+
+template <int D, typename T1, typename T2>
+inline void static_dim_assign(const T1* in, T2* out) {
+  UnrollAssign<D>::Run(in, out);
+}
+
+}  // namespace framework
+}  // namespace pten
--- a/paddle/fluid/framework/dim_test.cu
+++ b/paddle/fluid/framework/dim_test.cu
-//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
-//    http://www.apache.org/licenses/LICENSE-2.0
+//     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
 #include <thrust/device_vector.h>
 #include <sstream>

 #include "gtest/gtest.h"
-#include "paddle/fluid/framework/dim.h"
+#include "paddle/pten/core/dim.h"

-__global__ void test(paddle::framework::Dim<2>* o) {
-  o[0] = paddle::framework::make_dim(5, 6);
+__global__ void test(pten::framework::Dim<2>* o) {
+  o[0] = pten::framework::make_dim(5, 6);
 }

 __global__ void dyn_idx_gpu(int64_t* o) {
-  auto d = paddle::framework::make_dim(5, 6);
+  auto d = pten::framework::make_dim(5, 6);
  o[0] = d[1];
 }

 TEST(Dim, Equality) {
  // construct a Dim on the CPU
-  auto a = paddle::framework::make_dim(3, 4);
+  auto a = pten::framework::make_dim(3, 4);
  EXPECT_EQ(a[0], 3);
  EXPECT_EQ(a[1], 4);

  // construct a Dim on the GPU
-  thrust::device_vector<paddle::framework::Dim<2>> t(2);
+  thrust::device_vector<pten::framework::Dim<2>> t(2);
 #ifdef PADDLE_WITH_HIP
-  hipLaunchKernelGGL(test, dim3(1), dim3(1), 0, 0,
-                     thrust::raw_pointer_cast(t.data()));
+  hipLaunchKernelGGL(
+      test, dim3(1), dim3(1), 0, 0, thrust::raw_pointer_cast(t.data()));
 #else
  test<<<1, 1>>>(thrust::raw_pointer_cast(t.data()));
 #endif
@@ -45,10 +46,10 @@ TEST(Dim, Equality) {
  EXPECT_EQ(a[1], 6);

  // product
-  EXPECT_EQ(paddle::framework::product(a), 30);
+  EXPECT_EQ(pten::framework::product(a), 30);

  // mutate a Dim
-  auto b = paddle::framework::make_dim(7, 8);
+  auto b = pten::framework::make_dim(7, 8);
  b[1] = 10;
  EXPECT_EQ(b[0], 7);
  EXPECT_EQ(b[1], 10);
@@ -61,8 +62,8 @@ TEST(Dim, Equality) {
  // dynamic access on GPU
  thrust::device_vector<int64_t> r(1);
 #ifdef PADDLE_WITH_HIP
-  hipLaunchKernelGGL(dyn_idx_gpu, dim3(1), dim3(1), 0, 0,
-                     thrust::raw_pointer_cast(r.data()));
+  hipLaunchKernelGGL(
+      dyn_idx_gpu, dim3(1), dim3(1), 0, 0, thrust::raw_pointer_cast(r.data()));
 #else
  dyn_idx_gpu<<<1, 1>>>(thrust::raw_pointer_cast(r.data()));
 #endif
@@ -71,9 +72,9 @@ TEST(Dim, Equality) {
 }

 TEST(Dim, Bool) {
-  auto a = paddle::framework::make_dim(3, 4);
-  auto b = paddle::framework::make_dim(5, 6);
-  auto c = paddle::framework::make_dim(3, 4);
+  auto a = pten::framework::make_dim(3, 4);
+  auto b = pten::framework::make_dim(5, 6);
+  auto c = pten::framework::make_dim(3, 4);

  // comparison
  EXPECT_TRUE(a == a);
@@ -84,13 +85,13 @@ TEST(Dim, Bool) {
 TEST(Dim, Print) {
  {
    std::stringstream ss;
-    auto a = paddle::framework::make_dim(2, 3);
+    auto a = pten::framework::make_dim(2, 3);
    ss << a;
    EXPECT_EQ(ss.str(), "2, 3");
  }
  {
    std::stringstream ss;
-    ss << paddle::framework::make_dim(8);
+    ss << pten::framework::make_dim(8);
    EXPECT_EQ(ss.str(), "8");
  }
-}
+}
\ No newline at end of file
--- a/paddle/fluid/platform/hostdevice.h
+++ b/paddle/fluid/platform/hostdevice.h
-//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
-//    http://www.apache.org/licenses/LICENSE-2.0
+//     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
 #pragma once

 #ifdef __HIPCC__

--- a/paddle/pten/core/tensor_base.h
+++ b/paddle/pten/core/tensor_base.h
@@ -14,11 +14,11 @@ limitations under the License. */

 #pragma once

-#include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/pten/common/backend.h"
 #include "paddle/pten/common/data_type.h"
 #include "paddle/pten/common/layout.h"
+#include "paddle/pten/core/ddim.h"
 #include "paddle/pten/core/storage.h"
 #include "paddle/pten/core/utils/type_registry.h"

@@ -28,7 +28,7 @@ class TensorBase {
 public:
  using DataType = paddle::experimental::DataType;
  using DataLayout = paddle::experimental::DataLayout;
-  using DDim = paddle::framework::DDim;
+  using DDim = pten::framework::DDim;
  using Place = paddle::platform::Place;

  virtual ~TensorBase() = default;

--- a/paddle/pten/core/tensor_meta.h
+++ b/paddle/pten/core/tensor_meta.h
@@ -21,7 +21,7 @@ limitations under the License. */
 #include "paddle/pten/common/layout.h"

 // See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/framework/ddim.h"
+#include "paddle/pten/core/ddim.h"

 // Note: mixed_vector include many header now, LoD will be
 // used on CUDA device? Can we use small_vector here?
@@ -30,7 +30,7 @@ limitations under the License. */

 namespace pten {

-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;
 using LoD = std::vector<paddle::framework::Vector<size_t>>;
 /// \brief The meta data of dense tensor. Take the structure type
 /// and use all default operations.

--- a/paddle/fluid/framework/unroll_array_ops.h
+++ b/paddle/fluid/framework/unroll_array_ops.h
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@
 #include <cstddef>
 #include <type_traits>

-#include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/core/hostdevice.h"

-namespace paddle {
+namespace pten {
 namespace framework {

 namespace detail {
@@ -130,4 +130,4 @@ template <size_t N>
 using UnrollProduct = detail::UnrollProduct<0, N, N == 0>;

 }  // namespace framework
-}  // namespace paddle
+}  // namespace pten
--- a/paddle/fluid/framework/unroll_array_ops_test.cc
+++ b/paddle/fluid/framework/unroll_array_ops_test.cc
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/framework/unroll_array_ops.h"
+#include "paddle/pten/core/unroll_array_ops.h"

 #include <gtest/gtest.h>
 #include <array>

-namespace paddle {
+namespace pten {
 namespace framework {

 template <typename T>
@@ -79,4 +79,4 @@ TEST(unroll_ops, product) {
 }

 }  // namespace framework
-}  // namespace paddle
+}  // namespace pten
\ No newline at end of file
--- a/paddle/pten/infermeta/binary.cc
+++ b/paddle/pten/infermeta/binary.cc
@@ -64,8 +64,8 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
                                const DenseTensorMeta& y_meta,
                                bool trans_x,
                                bool trans_y) {
-  std::vector<int64_t> dims_x = paddle::framework::vectorize(x_meta.dims);
-  std::vector<int64_t> dims_y = paddle::framework::vectorize(y_meta.dims);
+  std::vector<int64_t> dims_x = pten::framework::vectorize(x_meta.dims);
+  std::vector<int64_t> dims_y = pten::framework::vectorize(y_meta.dims);
  auto ndims_x = dims_x.size();
  auto ndims_y = dims_y.size();
  PADDLE_ENFORCE_GT(ndims_x,
@@ -125,7 +125,7 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
    new_dims.push_back(1);
  }

-  auto ddim_out = paddle::framework::make_ddim(new_dims);
+  auto ddim_out = pten::framework::make_ddim(new_dims);

  return {x_meta.dtype, ddim_out, x_meta.layout};
 }
@@ -169,7 +169,7 @@ DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
                                  out_dims_array.data(),
                                  max_dim,
                                  axis);
-    return_meta.dims = paddle::framework::make_ddim(out_dims_array);
+    return_meta.dims = pten::framework::make_ddim(out_dims_array);
  }
  return_meta.lod = x_meta.lod;
  return return_meta;

--- a/paddle/pten/infermeta/nullary.cc
+++ b/paddle/pten/infermeta/nullary.cc
@@ -20,14 +20,14 @@ namespace pten {
 DenseTensorMeta CreateInferMeta(const std::vector<int64_t>& shape,
                                DataType dtype,
                                DataLayout layout) {
-  const auto& out_dims = paddle::framework::make_ddim(shape);
+  const auto& out_dims = pten::framework::make_ddim(shape);
  return {dtype, out_dims, layout};
 }

 DenseTensorMeta CreateInferMeta(const ScalarArray& shape,
                                DataType dtype,
                                DataLayout layout) {
-  const auto& out_dims = paddle::framework::make_ddim(shape.GetData());
+  const auto& out_dims = pten::framework::make_ddim(shape.GetData());
  return {dtype, out_dims, layout};
 }


--- a/paddle/pten/infermeta/unary.cc
+++ b/paddle/pten/infermeta/unary.cc
@@ -23,7 +23,7 @@ DenseTensorMeta UnchangedInferMeta(const DenseTensorMeta& x_meta) {
 }

 DenseTensorMeta ReductionInferMeta(const DenseTensorMeta& x_meta) {
-  const auto& out_dims = paddle::framework::make_ddim({1});
+  const auto& out_dims = pten::framework::make_ddim({1});
  DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout);
  return return_meta;
 }
@@ -63,7 +63,7 @@ DenseTensorMeta FlattenInferMeta(const DenseTensorMeta& x_meta,
  for (int i = stop_axis + 1; i < in_dims_size; i++) {
    out_shape.push_back(x_dims[i]);
  }
-  const auto& out_dims = paddle::framework::make_ddim(out_shape);
+  const auto& out_dims = pten::framework::make_ddim(out_shape);
  DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout);

  if (x_dims[0] == return_meta.dims[0]) {
@@ -89,10 +89,10 @@ DenseTensorMeta CreateLikeInferMeta(const DenseTensorMeta& x_meta,
          layout == DataLayout::UNDEFINED ? x_meta.layout : layout};
 }

-static paddle::framework::DDim ValidateShape(
-    const std::vector<int64_t> shape, const paddle::framework::DDim& in_dims) {
-  const int64_t in_size = paddle::framework::product(in_dims);
-  auto in_dims_vec = paddle::framework::vectorize(in_dims);
+static pten::framework::DDim ValidateShape(
+    const std::vector<int64_t> shape, const pten::framework::DDim& in_dims) {
+  const int64_t in_size = pten::framework::product(in_dims);
+  auto in_dims_vec = pten::framework::vectorize(in_dims);
  bool all_positive = std::all_of(in_dims_vec.cbegin(),
                                  in_dims_vec.cend(),
                                  [](int64_t i) { return i > 0; });
@@ -112,7 +112,7 @@ static paddle::framework::DDim ValidateShape(
          paddle::platform::errors::InvalidArgument(
              "Only one dimension value of 'shape' in ReshapeOp can "
              "be -1. But received shape = [%s], shape[%d] is also -1.",
-              paddle::framework::make_ddim(shape),
+              pten::framework::make_ddim(shape),
              i));
      unk_dim_idx = i;
    } else if (shape[i] == copy_dim_val) {
@@ -124,7 +124,7 @@ static paddle::framework::DDim ValidateShape(
              "the input tensor X's dimensions. "
              "But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
              "X's dimensions = %d.",
-              paddle::framework::make_ddim(shape),
+              pten::framework::make_ddim(shape),
              i,
              in_dims,
              in_dims.size()));
@@ -136,7 +136,7 @@ static paddle::framework::DDim ValidateShape(
              "Each dimension value of 'shape' in ReshapeOp must not "
              "be negative except one unknown dimension. "
              "But received  shape = [%s], shape[%d] = %d.",
-              paddle::framework::make_ddim(shape),
+              pten::framework::make_ddim(shape),
              i,
              shape[i]));
    }
@@ -165,7 +165,7 @@ static paddle::framework::DDim ValidateShape(
              "'shape' is [%s], known capacity of 'shape' is %d.",
              in_dims,
              in_size,
-              paddle::framework::make_ddim(shape),
+              pten::framework::make_ddim(shape),
              capacity));
    } else {
      output_shape[unk_dim_idx] = -1;
@@ -183,7 +183,7 @@ static paddle::framework::DDim ValidateShape(
              "[%s], the capacity of 'shape' is %d.",
              in_dims,
              in_size,
-              paddle::framework::make_ddim(shape),
+              pten::framework::make_ddim(shape),
              capacity));
    }
  }
@@ -202,11 +202,11 @@ static paddle::framework::DDim ValidateShape(
            "capacity of 'Out' is %d.",
            in_dims,
            in_size,
-            paddle::framework::make_ddim(shape),
+            pten::framework::make_ddim(shape),
            capacity));
  }

-  return paddle::framework::make_ddim(output_shape);
+  return pten::framework::make_ddim(output_shape);
 }

 DenseTensorMeta InferMetaFromVecValue(const DenseTensorMeta& x_meta,
@@ -267,7 +267,7 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
      out_dim_vector.push_back(1);
    }
  }
-  DDim out_dim = paddle::framework::make_ddim(out_dim_vector);
+  DDim out_dim = pten::framework::make_ddim(out_dim_vector);

  DataType out_dtype;
  if (dtype != DataType::UNDEFINED) {

--- a/paddle/pten/kernels/cpu/elementwise.h
+++ b/paddle/pten/kernels/cpu/elementwise.h
@@ -583,8 +583,8 @@ void CommonElementwiseBroadcastBackward(const CPUContext& ctx,
  }

  VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
-          << paddle::framework::make_ddim(x_dims_array)
-          << " ydim:" << paddle::framework::make_ddim(y_dims_array);
+          << pten::framework::make_ddim(x_dims_array)
+          << " ydim:" << pten::framework::make_ddim(y_dims_array);

  CommonGradBroadcastCPU<T, DX_OP, DY_OP, Tout>(x,
                                                y,

--- a/paddle/pten/kernels/cpu/reduce.h
+++ b/paddle/pten/kernels/cpu/reduce.h
@@ -50,13 +50,13 @@ void ReduceFunctor(const DeviceContext& context,
  DDim out_dims = output->dims();
  if (keep_dim && x_rank > 1) {
    const int kDelFlag = -2;
-    auto dims_vector = paddle::framework::vectorize(out_dims);
+    auto dims_vector = pten::framework::vectorize(out_dims);
    for (size_t i = 0; i < dims_ref.size(); ++i) {
      dims_vector[dims_ref[i]] = kDelFlag;
    }
    dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag),
                      dims_vector.end());
-    out_dims = paddle::framework::make_ddim(dims_vector);
+    out_dims = pten::framework::make_ddim(dims_vector);
  }
  auto& place = *context.eigen_device();
  Functor functor;

--- a/paddle/pten/kernels/empty_kernel.cc
+++ b/paddle/pten/kernels/empty_kernel.cc
@@ -24,7 +24,7 @@ template <typename T, typename Context>
 void EmptyKernel(const Context& dev_ctx,
                 const ScalarArray& shape,
                 DenseTensor* out) {
-  out->ResizeAndAllocate(paddle::framework::make_ddim(shape.GetData()));
+  out->ResizeAndAllocate(pten::framework::make_ddim(shape.GetData()));
 }

 template <typename T, typename Context>

--- a/paddle/pten/kernels/flatten_grad_kernel.cc
+++ b/paddle/pten/kernels/flatten_grad_kernel.cc
@@ -25,8 +25,7 @@ void FlattenGradKernel(const Context& dev_ctx,
                       const DenseTensor& xshape,
                       DenseTensor* x_grad) {
  auto xshape_dims = xshape.dims();
-  auto x_dims =
-      paddle::framework::slice_ddim(xshape_dims, 1, xshape_dims.size());
+  auto x_dims = pten::framework::slice_ddim(xshape_dims, 1, xshape_dims.size());
  pten::Copy(dev_ctx, out_grad, false, x_grad);
  x_grad->ResizeAndAllocate(x_dims);
 }

--- a/paddle/pten/kernels/funcs/common_shape.h
+++ b/paddle/pten/kernels/funcs/common_shape.h
@@ -26,7 +26,7 @@ inline void SetXShape(const DenseTensor &x, DenseTensor *xshape) {
  for (int i = 0; i < in_dims.size(); ++i) {
    xshape_dims[i + 1] = in_dims[i];
  }
-  xshape->ResizeAndAllocate(paddle::framework::make_ddim(xshape_dims));
+  xshape->ResizeAndAllocate(pten::framework::make_ddim(xshape_dims));
  xshape->ResetLoD(x.meta().lod);
 }


--- a/paddle/pten/kernels/funcs/elementwise_base.h
+++ b/paddle/pten/kernels/funcs/elementwise_base.h
@@ -36,10 +36,10 @@ enum ElementwiseType { kUnary = 1, kBinary = 2, kTernary = 3, kAny = -1 };
   for supporting multiple-output feature in elementwise system.*/
 template <class T, int Num>
 using ConditionalT =
-    typename std::conditional_t<Num == 1, T, paddle::framework::Array<T, Num>>;
+    typename std::conditional_t<Num == 1, T, pten::framework::Array<T, Num>>;

 namespace funcs {
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 template <typename T, typename DX_OP, typename DY_OP, typename Tout = T>
 struct ElemwiseGradNoBroadcast {
@@ -303,9 +303,9 @@ inline DDim trim_trailing_singular_dims(const DDim &dims) {
    trim_dims[i] = dims[i];
  }
  if (trim_dims.size() == 0) {
-    return DDim(paddle::framework::make_dim());
+    return DDim(pten::framework::make_dim());
  }
-  DDim actual_dims = paddle::framework::make_ddim(trim_dims);
+  DDim actual_dims = pten::framework::make_ddim(trim_dims);
  return actual_dims;
 }

@@ -377,7 +377,7 @@ void ElemwiseGradComputeNoBroadcast(const DeviceContext &dev_ctx,
                                    DenseTensor *dy,
                                    DX_OP dx_op,
                                    DY_OP dy_op) {
-  size_t N = static_cast<size_t>(paddle::framework::product(x_dim));
+  size_t N = static_cast<size_t>(pten::framework::product(x_dim));
  paddle::platform::ForRange<DeviceContext> for_range(dev_ctx, N);
  for_range(ElemwiseGradNoBroadcast<T, DX_OP, DY_OP, Tout>{
      x.data<T>(),
@@ -462,7 +462,7 @@ struct ElementwisePrimitiveCaller<InT, OutT, VecSize, Functor, 3, false> {
 template <typename OutT, int VecSize, bool IsBoundary, int NumOuts>
 struct ElementwiseWriteDataCaller {
  __device__ __forceinline__ void operator()(
-      paddle::framework::Array<_ptr_ OutT *, NumOuts> outs,
+      pten::framework::Array<_ptr_ OutT *, NumOuts> outs,
      ConditionalT<OutT, NumOuts> src[VecSize],
      int block_offset,
      int num) {
@@ -485,7 +485,7 @@ struct ElementwiseWriteDataCaller {
 template <typename OutT, int VecSize, bool IsBoundary>
 struct ElementwiseWriteDataCaller<OutT, VecSize, IsBoundary, 1> {
  __device__ __forceinline__ void operator()(
-      paddle::framework::Array<_ptr_ OutT *, 1> outs,
+      pten::framework::Array<_ptr_ OutT *, 1> outs,
      OutT src[VecSize],
      int block_offset,
      int num) {
@@ -502,8 +502,8 @@ template <typename InT,
          int VecSize,
          bool IsBoundary>
 __device__ void VectorizedElementwiseKernelImpl(
-    const paddle::framework::Array<const _ptr_ InT *__restrict__, Arity> &in,
-    paddle::framework::Array<_ptr_ OutT *, NumOuts> outs,
+    const pten::framework::Array<const _ptr_ InT *__restrict__, Arity> &in,
+    pten::framework::Array<_ptr_ OutT *, NumOuts> outs,
    int num,
    int data_offset,
    Functor func) {
@@ -537,8 +537,8 @@ template <typename InT,
          int NumOuts,
          int VecSize>
 __global__ void VectorizedElementwiseKernel(
-    paddle::framework::Array<const _ptr_ InT *__restrict__, Arity> ins,
-    paddle::framework::Array<_ptr_ OutT *, NumOuts> outs,
+    pten::framework::Array<const _ptr_ InT *__restrict__, Arity> ins,
+    pten::framework::Array<_ptr_ OutT *, NumOuts> outs,
    int size,
    int main_offset,
    Functor func) {
@@ -578,8 +578,8 @@ void ElementwiseCudaKernel(const KPDevice &ctx,
                           std::vector<DenseTensor *> *outs,
                           Functor func) {
  auto numel = ins[0]->numel();
-  paddle::framework::Array<const _ptr_ InT *__restrict__, Arity> ins_data;
-  paddle::framework::Array<_ptr_ OutT *, NumOuts> outs_data;
+  pten::framework::Array<const _ptr_ InT *__restrict__, Arity> ins_data;
+  pten::framework::Array<_ptr_ OutT *, NumOuts> outs_data;

  for (int i = 0; i < Arity; ++i) {
    ins_data[i] = ins[i]->data<InT>();

--- a/paddle/pten/kernels/funcs/elementwise_functor.h
+++ b/paddle/pten/kernels/funcs/elementwise_functor.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once

 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/hostdevice.h"
 #include "paddle/pten/common/float16.h"
+#include "paddle/pten/core/hostdevice.h"

 namespace pten {
 namespace funcs {

--- a/paddle/pten/kernels/funcs/transpose.cc
+++ b/paddle/pten/kernels/funcs/transpose.cc
@@ -13,8 +13,8 @@
 // limitations under the License.

 #include "paddle/pten/kernels/funcs/transpose.h"
-#include "paddle/fluid/framework/ddim.h"
 #include "paddle/pten/backends/cpu/cpu_context.h"
+#include "paddle/pten/core/ddim.h"
 #include "paddle/pten/core/dense_tensor.h"

 // See Note [ Why still include the fluid headers? ]
@@ -33,8 +33,8 @@ struct TransposeNormal<CPUContext, T> {
                  pten::DenseTensor* out,
                  const std::vector<int64_t>& axis) {
    const int rank = axis.size();
-    auto in_stride = paddle::framework::stride(in.dims());
-    auto out_stride = paddle::framework::stride(out->dims());
+    auto in_stride = pten::framework::stride(in.dims());
+    auto out_stride = pten::framework::stride(out->dims());
    const T* in_ptr = in.data<T>();
    T* out_ptr = out->mutable_data<T>();


--- a/paddle/pten/kernels/funcs/transpose.cu
+++ b/paddle/pten/kernels/funcs/transpose.cu
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/pten/backends/gpu/gpu_context.h"
+#include "paddle/pten/core/ddim.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/kernels/funcs/transpose.h"

@@ -58,8 +58,8 @@ struct TransposeNormal<GPUContext, T> {
                  pten::DenseTensor* out,
                  const std::vector<int64_t>& axis) {
    const int rank = axis.size();
-    auto in_stride = paddle::framework::stride(in.dims());
-    auto out_stride = paddle::framework::stride(out->dims());
+    auto in_stride = pten::framework::stride(in.dims());
+    auto out_stride = pten::framework::stride(out->dims());
    auto* in_ptr = in.data<T>();
    auto* out_ptr = out->mutable_data<T>();


--- a/paddle/pten/kernels/funcs/transpose.h
+++ b/paddle/pten/kernels/funcs/transpose.h
@@ -14,7 +14,7 @@

 #pragma once

-#include "paddle/fluid/framework/ddim.h"
+#include "paddle/pten/core/ddim.h"
 #include "paddle/pten/core/dense_tensor.h"

 #include "paddle/fluid/operators/eigen/eigen_function.h"

--- a/paddle/pten/kernels/gpu/elementwise.h
+++ b/paddle/pten/kernels/gpu/elementwise.h
@@ -130,14 +130,14 @@ struct DimensionsTransform {

 public:
  explicit DimensionsTransform(const std::vector<const DenseTensor *> &ins,
-                               const paddle::framework::DDim &dims,
+                               const pten::framework::DDim &dims,
                               int axis) {
    const int N = ins.size();
    dim_size = dims.size();
-    out_dims = paddle::framework::vectorize<int64_t>(dims);
+    out_dims = pten::framework::vectorize<int64_t>(dims);
    in_dims.resize(N);
    for (int j = 0; j < N; ++j) {
-      in_dims[j] = paddle::framework::vectorize<int64_t>(ins[j]->dims());
+      in_dims[j] = pten::framework::vectorize<int64_t>(ins[j]->dims());
    }
    InputDimensionsExtend(N, axis);

@@ -214,11 +214,11 @@ template <typename InT,
          int Rank,
          bool IsBoundary = false>
 __device__ void ElementwiseBroadcastKernelImpl(
-    const paddle::framework::Array<const _ptr_ InT *__restrict__, Arity> &ins,
-    paddle::framework::Array<_ptr_ OutT *, NumOuts> outs,
-    const paddle::framework::Array<int, Arity> &use_broadcast,
+    const pten::framework::Array<const _ptr_ InT *__restrict__, Arity> &ins,
+    pten::framework::Array<_ptr_ OutT *, NumOuts> outs,
+    const pten::framework::Array<int, Arity> &use_broadcast,
    uint32_t numel,
-    const paddle::framework::Array<kps::details::BroadcastConfig<Rank>, Arity>
+    const pten::framework::Array<kps::details::BroadcastConfig<Rank>, Arity>
        &configs,
    int num,
    int block_offset,
@@ -259,12 +259,11 @@ template <typename InT,
          int VecSize,
          int Rank>
 __global__ void ElementwiseBroadcastKernel(
-    paddle::framework::Array<const _ptr_ InT *__restrict__, Arity> ins,
-    paddle::framework::Array<_ptr_ OutT *, NumOuts> outs,
-    paddle::framework::Array<int, Arity> use_broadcast,
+    pten::framework::Array<const _ptr_ InT *__restrict__, Arity> ins,
+    pten::framework::Array<_ptr_ OutT *, NumOuts> outs,
+    pten::framework::Array<int, Arity> use_broadcast,
    uint32_t numel,
-    paddle::framework::Array<kps::details::BroadcastConfig<Rank>, Arity>
-        configs,
+    pten::framework::Array<kps::details::BroadcastConfig<Rank>, Arity> configs,
    int main_offset,
    int tail_tid,
    Functor func) {
@@ -345,10 +344,10 @@ void LaunchKernel(const KPDevice &ctx,
                  Functor func,
                  DimensionsTransform merge_dims) {
  int numel = (*outs)[0]->numel();
-  paddle::framework::Array<kps::details::BroadcastConfig<Rank>, Arity> configs;
-  paddle::framework::Array<int, Arity> use_broadcast;
-  paddle::framework::Array<const _ptr_ InT *__restrict__, Arity> ins_data;
-  paddle::framework::Array<_ptr_ OutT *, NumOuts> outs_data;
+  pten::framework::Array<kps::details::BroadcastConfig<Rank>, Arity> configs;
+  pten::framework::Array<int, Arity> use_broadcast;
+  pten::framework::Array<const _ptr_ InT *__restrict__, Arity> ins_data;
+  pten::framework::Array<_ptr_ OutT *, NumOuts> outs_data;

  for (int i = 0; i < NumOuts; ++i) {
    outs_data[i] = (*outs)[i]->mutable_data<OutT>();
@@ -444,7 +443,7 @@ void LaunchBroadcastKernelForDifferentVecSize(
          "The maximum dimension of input tensor is expected to be less than "
          "%d, but recieved %d.\n",
          merge_dims.dim_size,
-          paddle::framework::DDim::kMaxRank));
+          pten::framework::DDim::kMaxRank));
    }
  }
 #undef CALL_BROADCAST_FOR_DIM_SIZE
@@ -1826,8 +1825,8 @@ void CommonElementwiseBroadcastBackward(const GPUContext &ctx,
  }

  VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
-          << paddle::framework::make_ddim(x_dims_array)
-          << " ydim:" << paddle::framework::make_ddim(y_dims_array);
+          << pten::framework::make_ddim(x_dims_array)
+          << " ydim:" << pten::framework::make_ddim(y_dims_array);

  CommonGradBroadcastCUDA<T, DX_OP, DY_OP, Tout>(x,
                                                 y,

--- a/paddle/pten/kernels/gpu/reduce.h
+++ b/paddle/pten/kernels/gpu/reduce.h
@@ -32,7 +32,6 @@
 namespace cub = hipcub;
 #endif

-#include "paddle/fluid/framework/array.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
 #include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
@@ -41,6 +40,7 @@ namespace cub = hipcub;
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/fast_divmod.h"
 #include "paddle/fluid/string/string_helper.h"
+#include "paddle/pten/core/array.h"

 #include "paddle/pten/api/ext/dispatch.h"
 #include "paddle/pten/backends/gpu/gpu_context.h"
@@ -118,7 +118,7 @@ static inline void CheckReduceRank(int reduce_rank, int rank) {

 // convert dims from vector to array
 template <typename T, size_t ElementCount, typename VectorLikeType>
-static inline paddle::framework::Array<T, ElementCount> VectorToArray(
+static inline pten::framework::Array<T, ElementCount> VectorToArray(
    const VectorLikeType& vec) {
  PADDLE_ENFORCE_LE(vec.size(),
                    ElementCount,
@@ -128,7 +128,7 @@ static inline paddle::framework::Array<T, ElementCount> VectorToArray(
                        vec.size(),
                        ElementCount));
  size_t n = static_cast<size_t>(vec.size());
-  paddle::framework::Array<T, ElementCount> ret;
+  pten::framework::Array<T, ElementCount> ret;
  for (size_t i = 0; i < n; ++i) {
    ret[i] = vec[i];
  }
@@ -162,7 +162,7 @@ static inline std::vector<int> GetReduceDim(const std::vector<int64_t>& dims,

 }  // namespace details

-constexpr int kMaxRank = paddle::framework::DDim::kMaxRank;
+constexpr int kMaxRank = pten::framework::DDim::kMaxRank;

 enum ReduceType {
  kReduceLastDim = 0x01,    // when reduce_dim[0] == x_dim.size() - 1;
@@ -202,9 +202,9 @@ struct IndexCalculator {
  }

  int dim;
-  paddle::framework::Array<int, kMaxRank> dims;
-  paddle::framework::Array<int, kMaxRank> strides;
-  paddle::framework::Array<paddle::platform::FastDivMod, kMaxRank> divmoders;
+  pten::framework::Array<int, kMaxRank> dims;
+  pten::framework::Array<int, kMaxRank> strides;
+  pten::framework::Array<paddle::platform::FastDivMod, kMaxRank> divmoders;
 };

 template <bool ReduceLastDim = false>
@@ -326,7 +326,7 @@ struct ReduceConfig {
                     const paddle::platform::Place& place,
                     pten::DenseTensor* tmp) {
    if (should_reduce_again) {
-      tmp->ResizeAndAllocate(paddle::framework::make_ddim(
+      tmp->ResizeAndAllocate(pten::framework::make_ddim(
          {static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))}));
      output_data = tmp->mutable_data<Ty>();
    } else {
@@ -1029,7 +1029,7 @@ static
  pten::DenseTensor tmp = pten::DenseTensor(
      pten::make_intrusive<paddle::experimental::SharedStorage>(place),
      pten::DenseTensorMeta(pten::DataType::UINT8,
-                            paddle::framework::make_ddim(
+                            pten::framework::make_ddim(
                                {static_cast<int64_t>(temp_storage_bytes)})));

  auto* temp_storage = tmp.mutable_data<uint8_t>();
@@ -1073,7 +1073,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
  // Allocate memory
  y->mutable_data<Ty>();

-  auto x_dim = paddle::framework::vectorize<int>(x.dims());
+  auto x_dim = pten::framework::vectorize<int>(x.dims());
  auto config = ReduceConfig<Ty>(origin_reduce_dims, x_dim);
  config.Run();
  int numel = x.numel();

--- a/paddle/pten/kernels/impl/dot_grad_kernel_impl.h
+++ b/paddle/pten/kernels/impl/dot_grad_kernel_impl.h
@@ -103,7 +103,7 @@ struct DotGradFunction<DeviceContext,
      auto* data_dx = tensor_dx->mutable_data<T>();
      const auto* data_y = tensor_y->data<T>();
      const DDim& dim = tensor_x->dims();
-      size_t N = static_cast<size_t>(paddle::framework::product(dim));
+      size_t N = static_cast<size_t>(pten::framework::product(dim));

      auto step = dim[dim.size() - 1];

@@ -118,7 +118,7 @@ struct DotGradFunction<DeviceContext,
      auto* data_dy = tensor_dy->mutable_data<T>();
      const auto* data_x = tensor_x->data<T>();
      const DDim& dim = tensor_y->dims();
-      size_t N = static_cast<size_t>(paddle::framework::product(dim));
+      size_t N = static_cast<size_t>(pten::framework::product(dim));

      auto step = dim[dim.size() - 1];


--- a/paddle/pten/kernels/impl/full_kernel_impl.h
+++ b/paddle/pten/kernels/impl/full_kernel_impl.h
@@ -36,7 +36,7 @@ void FullKernel(const Context& dev_ctx,
                const ScalarArray& shape,
                const Scalar& val,
                DenseTensor* out) {
-  out->ResizeAndAllocate(paddle::framework::make_ddim(shape.GetData()));
+  out->ResizeAndAllocate(pten::framework::make_ddim(shape.GetData()));
  FullValue<T>(dev_ctx, out, val.to<T>());
 }


--- a/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h
+++ b/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h
@@ -135,7 +135,7 @@ static DDim RowMatrixFromVector(const DDim& x_dim) {
  if (x_dim.size() > 1) {
    return x_dim;
  }
-  return paddle::framework::make_ddim({1, x_dim[0]});
+  return pten::framework::make_ddim({1, x_dim[0]});
 }

 /**
@@ -146,7 +146,7 @@ static DDim ColumnMatrixFromVector(const DDim& y_dim) {
  if (y_dim.size() > 1) {
    return y_dim;
  }
-  return paddle::framework::make_ddim({y_dim[0], 1});
+  return pten::framework::make_ddim({y_dim[0], 1});
 }

 /**

--- a/paddle/pten/kernels/impl/matmul_kernel_impl.h
+++ b/paddle/pten/kernels/impl/matmul_kernel_impl.h
@@ -164,7 +164,7 @@ void MatMulFunction(const Context& dev_ctx,
      std::copy_n(y_dims.cbegin(), y_ndim - 2, out_dims.begin());
      out_dims.back() = y_dims.back();
    }
-    Out->ResizeAndAllocate(paddle::framework::make_ddim(out_dims));
+    Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims));
    Out->mutable_data<T>();
    if (trans_y) {
      const int M = Y.numel() / N;
@@ -242,7 +242,7 @@ void MatMulFunction(const Context& dev_ctx,
    } else {
      std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin());
    }
-    Out->ResizeAndAllocate(paddle::framework::make_ddim(out_dims));
+    Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims));
    Out->mutable_data<T>();

    if (trans_x) {
@@ -330,7 +330,7 @@ void MatMulFunction(const Context& dev_ctx,
  out_broadcast_dims[ndim - 2] = M;
  out_broadcast_dims[ndim - 1] = N;

-  Out->ResizeAndAllocate(paddle::framework::make_ddim(out_broadcast_dims));
+  Out->ResizeAndAllocate(pten::framework::make_ddim(out_broadcast_dims));
  Out->mutable_data<T>();

  const int batch_dim = ndim - 2;
@@ -493,12 +493,12 @@ void MatmulKernel(const Context& dev_ctx,
                  bool transpose_x,
                  bool transpose_y,
                  DenseTensor* out) {
-  PADDLE_ENFORCE_NE(paddle::framework::product(x.dims()),
+  PADDLE_ENFORCE_NE(pten::framework::product(x.dims()),
                    0,
                    paddle::platform::errors::InvalidArgument(
                        "The Input(X) dims size must not be equal 0,"
                        " but reviced dims size is 0. "));
-  PADDLE_ENFORCE_NE(paddle::framework::product(y.dims()),
+  PADDLE_ENFORCE_NE(pten::framework::product(y.dims()),
                    0,
                    paddle::platform::errors::InvalidArgument(
                        "The Input(Y) dims size must not be equal 0,"

--- a/paddle/pten/tests/api/test_cast_api.cc
+++ b/paddle/pten/tests/api/test_cast_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, cast) {

--- a/paddle/pten/tests/api/test_conj_api.cc
+++ b/paddle/pten/tests/api/test_conj_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, conj) {

--- a/paddle/pten/tests/api/test_dot_api.cc
+++ b/paddle/pten/tests/api/test_dot_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, dot) {

--- a/paddle/pten/tests/api/test_elementwise_api.cc
+++ b/paddle/pten/tests/api/test_elementwise_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, add) {

--- a/paddle/pten/tests/api/test_empty_api.cc
+++ b/paddle/pten/tests/api/test_empty_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, empty_like) {

--- a/paddle/pten/tests/api/test_fill_api.cc
+++ b/paddle/pten/tests/api/test_fill_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, full_like) {

--- a/paddle/pten/tests/api/test_flatten_api.cc
+++ b/paddle/pten/tests/api/test_flatten_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, flatten) {

--- a/paddle/pten/tests/api/test_matmul_api.cc
+++ b/paddle/pten/tests/api/test_matmul_api.cc
@@ -26,7 +26,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 TEST(API, matmul_cpu) {
  // 1. create tensor

--- a/paddle/pten/tests/api/test_mean_api.cc
+++ b/paddle/pten/tests/api/test_mean_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, mean) {

--- a/paddle/pten/tests/api/test_reshape_api.cc
+++ b/paddle/pten/tests/api/test_reshape_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, reshape) {

--- a/paddle/pten/tests/api/test_scale_api.cc
+++ b/paddle/pten/tests/api/test_scale_api.cc
@@ -24,7 +24,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 void CheckScaleResult(experimental::Tensor* out) {
  ASSERT_EQ(out->dims().size(), 2);

--- a/paddle/pten/tests/api/test_sum_api.cc
+++ b/paddle/pten/tests/api/test_sum_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(chenweihang): Remove this test after the API is used in the dygraph
 TEST(API, sum) {

--- a/paddle/pten/tests/api/test_to_api.cc
+++ b/paddle/pten/tests/api/test_to_api.cc
@@ -25,7 +25,7 @@ namespace paddle {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 paddle::experimental::Tensor CreateInputTensor() {
  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(

--- a/paddle/pten/tests/kernels/test_cast_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_cast_dev_api.cc
@@ -28,7 +28,7 @@ namespace pten {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 TEST(DEV_API, cast) {
  // 1. create tensor

--- a/paddle/pten/tests/kernels/test_conj_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_conj_dev_api.cc
@@ -26,7 +26,7 @@ namespace pten {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 TEST(DEV_API, conj) {
  // 1. create tensor

--- a/paddle/pten/tests/kernels/test_copy_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_copy_dev_api.cc
@@ -26,7 +26,7 @@ namespace pten {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 // TODO(YuanRisheng): This TEST file need to be refactored after 'copy' realized
 // in 'paddle/api'

--- a/paddle/pten/tests/kernels/test_creation_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_creation_dev_api.cc
@@ -27,7 +27,7 @@ namespace pten {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 TEST(DEV_API, empty) {
  // 1. create input

--- a/paddle/pten/tests/kernels/test_dot_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_dot_dev_api.cc
@@ -26,7 +26,7 @@ namespace pten {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 TEST(DEV_API, dot) {
  // 1. create tensor

--- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
@@ -26,7 +26,7 @@ namespace pten {
 namespace tests {

 namespace framework = paddle::framework;
-using DDim = paddle::framework::DDim;
+using DDim = pten::framework::DDim;

 TEST(DEV_API, add) {
  // 1. create tensor

--- a/paddle/pten/tests/kernels/test_flatten_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_flatten_dev_api.cc
--- a/paddle/pten/tests/kernels/test_matmul_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_matmul_dev_api.cc
--- a/paddle/pten/tests/kernels/test_mean_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_mean_dev_api.cc
--- a/paddle/pten/tests/kernels/test_reshape_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_reshape_dev_api.cc
--- a/paddle/pten/tests/kernels/test_scale_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_scale_dev_api.cc
--- a/paddle/pten/tests/kernels/test_sum_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_sum_dev_api.cc