diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 49248edd322d29e015d4b2e4f8ec20cc592c4a22..6af07caaf88b2a907807b84e63d4ed5499ca98d1 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -2176,6 +2176,16 @@ Scope* OperatorWithKernel::PreparePhiData( if (!new_scope) { new_scope = &scope.NewScope(); } + // For inference, if a gpu model has an op which could only run on CPU, + // each result of different input will be the same with the first one. + // The reason is that if a gpu tensor is the input of a cpu kernel, + // we will create a new cpu tensor in new scope. + // However, if enable_cache_runtime_context_, we get the cpu tensor each + // time, not the gpu tensor. Thus, we set pre_scope_ = nullptr + // to trigger `new RuntimeContext()` in RunImpl(). + if (enable_cache_runtime_context_) { + pre_scope_ = nullptr; + } // Create new var with the same name in transfer scopes auto* trans_var = new_scope->Var(name_vec[offset]); diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu index 916f6aa5537a6fd0d3e6a95d7a2ab40dd2115186..9ea0d7c5393c37cf51bd37be86a45c4b3432cc64 100644 --- a/paddle/phi/kernels/gpu/arange_kernel.cu +++ b/paddle/phi/kernels/gpu/arange_kernel.cu @@ -64,7 +64,7 @@ void ArangeKernel(const Context& dev_ctx, PD_REGISTER_KERNEL( arange, GPU, ALL_LAYOUT, phi::ArangeKernel, float, double, int64_t, int) { - kernel->InputAt(0).SetBackend(phi::Backend::CPU); - kernel->InputAt(1).SetBackend(phi::Backend::CPU); - kernel->InputAt(2).SetBackend(phi::Backend::CPU); + kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); + kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND); + kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND); }