diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7abb5ef84e301729a97a0c0c88213643a5dd40f0..18c036a1ebe18a14d17cb0fb1f7e49cd80a898fa 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2180,7 +2180,7 @@ bool AnalysisPredictor::ExpRunWithRuntimeConfig(void *config) { "l3_autotune_size(%zu) should be less than or equal to l3_size(%zu).", l3_autotune_size, l3_size)); - dev_ctx->SetL3Info(l3_size, l3_ptr, l3_autotune_size); + dev_ctx->SetL3Info(l3_size, l3_ptr, l3_autotune_size, place_); bool ret = ZeroCopyRun(); dev_ctx->L3CacheAutotune(); diff --git a/paddle/fluid/inference/api/infer_context.cc b/paddle/fluid/inference/api/infer_context.cc index 6c963f49363e92b428832602a51e70a5337c4579..bda0eecec0b0d5bf77051fb3b325ac13a908ffed 100644 --- a/paddle/fluid/inference/api/infer_context.cc +++ b/paddle/fluid/inference/api/infer_context.cc @@ -77,7 +77,9 @@ void* InferXPUContext::Alloc(phi::TensorBase* tensor, void InferXPUContext::SetL3Info(size_t l3_size, void* l3_ptr, - size_t l3_autotune_size) { + size_t l3_autotune_size, + const phi::Place& place) { + phi::backends::xpu::XPUDeviceGuard guard(place.GetDeviceId()); if (l3_ptr == nullptr) { if (l3_size_ != l3_size) { if (l3_owned_) { diff --git a/paddle/fluid/inference/api/infer_context.h b/paddle/fluid/inference/api/infer_context.h index ebc55098c9705a14168b6c7892d601181a3e6a53..121399dca5a4c240f864c9c3fb54db6902945883 100644 --- a/paddle/fluid/inference/api/infer_context.h +++ b/paddle/fluid/inference/api/infer_context.h @@ -60,7 +60,10 @@ class InferXPUContext : public phi::XPUContext { bool pinned = false, bool fake_alloc = false) const override; - void SetL3Info(size_t l3_size, void* l3_ptr, size_t l3_autotune_size); + void SetL3Info(size_t l3_size, + void* l3_ptr, + size_t l3_autotune_size, + const phi::Place& place); void L3CacheAutotune();