[XPU] add context_gm_size in XpuConfig, don't alloc gm in pass. (#54674)

52ad918b · AlbertVan · GitHub · f38e126e · 52ad918b · 52ad918b
14 changed file
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -9,9 +9,13 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
 set(XPU_XFT_LIB_NAME "libxft.so")
 set(XPU_XPTI_LIB_NAME "libxpti.so")

-set(XPU_BASE_DATE "20230602")
+if(NOT DEFINED XPU_BASE_DATE)
+  set(XPU_BASE_DATE "20230602")
+endif()
 set(XPU_XCCL_BASE_VERSION "1.0.49.2")
-set(XPU_XFT_BASE_VERSION "latest")
+if(NOT DEFINED XPU_XFT_BASE_VERSION)
+  set(XPU_XFT_BASE_VERSION "20230602")
+endif()
 set(XPU_XPTI_BASE_VERSION "0.0.1")

 if(NOT DEFINED XPU_BASE_URL)

--- a/paddle/fluid/framework/ir/xpu/quant_utils.cc
+++ b/paddle/fluid/framework/ir/xpu/quant_utils.cc
@@ -265,7 +265,7 @@ void PrepareWeight(phi::DenseTensor* weight,
  }

  // Find max
-  int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(0);
+  int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1);
  int size = weight_fp32.numel();
  auto* weight_data = weight_fp32.data<float>();
  float max_val = FindMaxAbs(weight_data, size);

--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -303,6 +303,7 @@ struct Argument {
  DECL_ARGUMENT_FIELD(xpu_l3_size, XpuL3Size, size_t);
  DECL_POINTER_ARGUMENT_FIELD(xpu_l3_ptr, XpuL3Ptr, void*);
  DECL_ARGUMENT_FIELD(xpu_l3_autotune_size, XpuL3AutotuneSize, size_t);
+  DECL_ARGUMENT_FIELD(xpu_context_gm_size, XpuContextGmSize, int);
  DECL_POINTER_ARGUMENT_FIELD(xpu_context, XpuContext, void*);
  DECL_POINTER_ARGUMENT_FIELD(xpu_stream, XpuStream, void*);
  DECL_ARGUMENT_FIELD(xpu_conv_autotune_level, XpuConvAutotuneLevel, int);

--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -272,6 +272,8 @@ void IRPassManager::CreatePasses(Argument *argument,
      pass->Set("xpu_l3_ptr", new void *(argument->xpu_l3_ptr()));
      pass->Set("xpu_l3_autotune_size",
                new size_t(argument->xpu_l3_autotune_size()));
+      pass->Set("xpu_context_gm_size",
+                new int(argument->xpu_context_gm_size()));
      pass->Set("xpu_context", new void *(argument->xpu_context()));
      pass->Set("xpu_stream", new void *(argument->xpu_stream()));
      pass->Set("xpu_conv_autotune_level",

--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -1096,6 +1096,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
  ss << xpu_config_.l3_size;
  ss << xpu_config_.l3_ptr;
  ss << xpu_config_.l3_autotune_size;
+  ss << xpu_config_.context_gm_size;
  ss << xpu_config_.context;
  ss << xpu_config_.stream;
  ss << xpu_config_.conv_autotune_level;
@@ -1347,6 +1348,8 @@ std::string AnalysisConfig::Summary() {
         std::to_string(reinterpret_cast<int64_t>(xpu_config_.l3_ptr))});
    os.InsertRow(
        {"xpu_l3_autotune_size", std::to_string(xpu_config_.l3_autotune_size)});
+    os.InsertRow(
+        {"xpu_context_gm_size", std::to_string(xpu_config_.context_gm_size)});
    os.InsertRow(
        {"xpu_context",
         std::to_string(reinterpret_cast<int64_t>(xpu_config_.context))});

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -93,6 +93,10 @@
 #include "paddle/fluid/platform/device/ipu/paddle_ipu_handler.h"
 #endif

+#ifdef PADDLE_WITH_XPU
+#include "paddle/phi/backends/xpu/xpu_info.h"
+#endif
+
 namespace paddle {
 namespace {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -436,6 +440,7 @@ void AnalysisPredictor::InitPlace() {
 #endif  // LITE_SUBGRAPH_WITH_XPU
    } else {
 #ifdef PADDLE_WITH_XPU
+      phi::backends::xpu::SetXPUDeviceId(config_.xpu_device_id());
      place_ = paddle::platform::XPUPlace(config_.xpu_device_id());
 #else
      PADDLE_THROW(platform::errors::Unavailable(
@@ -509,7 +514,8 @@ void AnalysisPredictor::InitDeviceContexts() {
    device_contexts_.emplace(
        place_, std::async(std::launch::deferred, [=] {
          auto &instance = memory::allocation::AllocatorFacade::Instance();
-          auto *xpu_context = new InferXPUContext(place_);
+          auto *xpu_context =
+              new InferXPUContext(place_, config_.xpu_config().context_gm_size);
          xpu_context->SetAllocator(instance.GetAllocator(place_).get());
          xpu_context->SetGenerator(
              phi::DefaultXPUGenerator(place_.GetDeviceId()).get());
@@ -1504,6 +1510,7 @@ void AnalysisPredictor::PrepareArgument() {
  argument_->SetXpuL3Size(config_.xpu_config_.l3_size);
  argument_->SetXpuL3Ptr(config_.xpu_config_.l3_ptr);
  argument_->SetXpuL3AutotuneSize(config_.xpu_config_.l3_autotune_size);
+  argument_->SetXpuContextGmSize(config_.xpu_config_.context_gm_size);
  argument_->SetXpuContext(config_.xpu_config_.context);
  argument_->SetXpuStream(config_.xpu_config_.stream);
  argument_->SetXpuConvAutotuneLevel(config_.xpu_config_.conv_autotune_level);

--- a/paddle/fluid/inference/api/infer_context.cc
+++ b/paddle/fluid/inference/api/infer_context.cc
@@ -27,8 +27,15 @@ InferGPUContext::InferGPUContext(const phi::Place& place)
 #endif

 #ifdef PADDLE_WITH_XPU
-InferXPUContext::InferXPUContext(const phi::Place& place)
-    : phi::XPUContext(place) {}
+InferXPUContext::InferXPUContext(const phi::Place& place, int context_gm_size)
+    : phi::XPUContext(place) {
+  if (context_gm_size >= 0) {
+    x_context()->set_option("XPUAPI_DEFAULT_SIZE",
+                            std::to_string(context_gm_size).c_str());
+  } else {
+    x_context()->set_option("XPUAPI_DEFAULT_SIZE", "");
+  }
+}

 void* InferXPUContext::Alloc(phi::TensorBase* tensor,
                             phi::DataType dtype,

--- a/paddle/fluid/inference/api/infer_context.h
+++ b/paddle/fluid/inference/api/infer_context.h
@@ -52,7 +52,7 @@ class InferGPUContext : public phi::GPUContext {
 #ifdef PADDLE_WITH_XPU
 class InferXPUContext : public phi::XPUContext {
 public:
-  explicit InferXPUContext(const phi::Place& place);
+  explicit InferXPUContext(const phi::Place& place, int context_gm_size = -1);

  void* Alloc(phi::TensorBase* tensor,
              phi::DataType dtype,

--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -93,6 +93,10 @@ struct PD_INFER_DECL XpuConfig {
  // kernels (both paddle/xdnn kernels)
  size_t l3_autotune_size{0};

+  // Reserved xpu global memory size for xpu_context;
+  // If not set(-1), default memory size for xpu_context is 128MB in XPU2 or
+  // 64MB in XPU1. If set 1*1024*1024, memory size for xpu_conext will be 1MB;
+  int context_gm_size{-1};
  // xpu_context(from baidu::xpu::api::create_context) for execution.
  // If context is nullptr, new context will be created by default.
  void* context{nullptr};

--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -1009,6 +1009,7 @@ void BindXpuConfig(py::module *m) {
      .def_readwrite("l3_ptr", &XpuConfig::l3_ptr)
      .def_readwrite("l3_size", &XpuConfig::l3_size)
      .def_readwrite("l3_autotune_size", &XpuConfig::l3_autotune_size)
+      .def_readwrite("context_gm_size", &XpuConfig::context_gm_size)
      .def_readwrite("context", &XpuConfig::context)
      .def_readwrite("stream", &XpuConfig::stream)
      .def_readwrite("conv_autotune_level", &XpuConfig::conv_autotune_level)

--- a/paddle/ir/core/type_name.h
+++ b/paddle/ir/core/type_name.h
@@ -27,7 +27,7 @@ inline std::string get_type_name() {
  name = name.substr(name.find(key));
  assert(!name.empty() && "Unable to find the template parameter!");
  name = name.substr(key.size());
-  assert(name.back() == "]" && "Name doesn't end in the substitution key!");
+  assert(name.back() == ']' && "Name doesn't end in the substitution key!");
  auto sem_pos = name.find_first_of(";");
  if (sem_pos == std::string::npos)
    name.pop_back();

--- a/paddle/phi/backends/xpu/xpu_context.cc
+++ b/paddle/phi/backends/xpu/xpu_context.cc
@@ -151,6 +151,7 @@ struct XPUContext::Impl {
    LOG_FIRST_N(WARNING, 1)
        << "Please NOTE: xpu device: " << static_cast<int>(place_.device);
    context_ = xpu::create_context();
+    context_->set_option("XPUAPI_DEFAULT_SIZE", "1");
    xpu_version_ = backends::xpu::get_xpu_version(place_.device);
    SetL3Cache();
  }

--- a/paddle/phi/backends/xpu/xpu_info.cc
+++ b/paddle/phi/backends/xpu/xpu_info.cc
@@ -187,6 +187,9 @@ void MemcpySyncD2D(void* dst,

 XPUVersion get_xpu_version(int dev_id) {
  uint64_t v = 0;
+  if (dev_id == -1) {
+    dev_id = GetXPUCurrentDeviceId();
+  }
  PADDLE_ENFORCE_XPU_SUCCESS(xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id));

  if (v == K100 || v == K200) {

--- a/paddle/phi/backends/xpu/xpu_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu_op_list.cc
@@ -68,7 +68,7 @@ bool is_in_xpu_black_list(const std::string& fluid_op_name) {
 bool is_xpu_kp_support_op(const std::string& fluid_op_name,
                          const phi::DataType type) {
  if (is_in_xpu_black_list(fluid_op_name)) return false;
-  auto v = get_xpu_version(0);
+  auto v = get_xpu_version(-1);
  auto& ops = (v == phi::backends::xpu::XPUVersion::XPU1)
                  ? phi::backends::xpu::get_kl1_ops()
                  : phi::backends::xpu::get_kp_ops();
@@ -84,7 +84,7 @@ bool is_xpu_kp_support_op(const std::string& fluid_op_name,
 bool is_xpu_support_op(const std::string& fluid_op_name,
                       const phi::DataType type) {
  if (is_in_xpu_black_list(fluid_op_name)) return false;
-  auto v = get_xpu_version(0);
+  auto v = get_xpu_version(-1);
  auto& ops = (v == phi::backends::xpu::XPUVersion::XPU1) ? get_kl1_ops()
                                                          : get_kl2_ops();
  if (ops.find(fluid_op_name) != ops.end() &&