diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index b856bbec4b0c47f387487a79388013ed91b1fc32..3cf16266baf088dd029c7f9f2fe3b08171459195 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -315,6 +315,7 @@ std::shared_ptr TransferDevice(const std::string& var_name, op_type = kMemcpyH2D; int dst_place_type = platform::is_gpu_place(dst_place) ? 0 : platform::is_npu_place(dst_place) ? 1 + : platform::is_ipu_place(dst_place) ? 3 : platform::is_xpu_place(dst_place) ? 2 : -1; attr_map = {{"dst_place_type", dst_place_type}}; diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index c321069537c8974af6a231a6e46fe3e8f0dc16d9..3680f0aa900c6905075f918b622ee6f82c96a73e 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -25,6 +25,7 @@ #include "paddle/fluid/platform/os_info.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/supplement_tracing.h" +#include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_context.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -475,8 +476,13 @@ void InterpreterCore::Convert( BuildSkipShareLoDInfo(); for (size_t i = 0; i < vec_instruction_.size(); ++i) { +#ifdef PADDLE_WITH_IPU + gc_event_.emplace_back(phi::CPUPlace(), 0); +#else gc_event_.emplace_back(vec_instruction_[i].DeviceContext().GetPlace(), platform::GenerateDeviceEventFlag()); + +#endif } bool inplaced = false; for (auto inst : vec_instruction_) { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index af3951f4538f12f035fdc0e5944c75ff33fb63f8..31e27a07c665d4a47b548d6ad9421fb7c382ea03 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -390,7 +390,7 @@ static bool IsCpuOp(const Instruction& instr) { // is supported heterogeneous place static bool IsSupportedHetePlace(const phi::Place& place) { return platform::is_gpu_place(place) || platform::is_npu_place(place) || - platform::is_xpu_place(place); + platform::is_xpu_place(place) || platform::is_ipu_place(place); } } // namespace interpreter diff --git a/paddle/fluid/framework/new_executor/stream_analyzer.cc b/paddle/fluid/framework/new_executor/stream_analyzer.cc index 086dac8dac1fbf2ce82cc31089ceb57933b4415e..760a852baee68f3c3f53386ce28611e923d80342 100644 --- a/paddle/fluid/framework/new_executor/stream_analyzer.cc +++ b/paddle/fluid/framework/new_executor/stream_analyzer.cc @@ -204,8 +204,9 @@ bool StreamAnalyzer::IsDirectRun(Instruction& cur_instr, const Instruction& next_instr) { if (&cur_instr.DeviceContext() == &next_instr.DeviceContext()) return true; - // xpu memcpy kerenl is synchronous. - if (platform::is_xpu_place(place_)) return true; + // xpu&ipu memcpy kerenl is synchronous. + if (platform::is_ipu_place(place_) || platform::is_xpu_place(place_)) + return true; // npu d2h kernel is asynchronous. if (platform::is_npu_place(place_)) { diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index d38efbff3165c86997df4a93eb9a290b3792b49f..53b77d538b3ed1db1509895de7561d17695ad1e0 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -408,6 +408,12 @@ struct OpKernelRegistrarFunctorEx, + ops::MemcpyD2HKernel, + paddle::platform::complex, + ops::MemcpyD2HKernel, + plat::float16, + ops::MemcpyD2HKernel, + int16_t, + ops::MemcpyD2HKernel); +#endif diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index 98ed68cf84f87dc5cdda4318549d6d353679502e..ff7b786d04018e0961832a5fe914c58687aa8450 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -100,6 +100,7 @@ class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker { "0. CUDAPinnedPlace/CPU <->CUDAPlace" "1. NPUPinnedPlace/CPU <-> NPUPlace" "2. CPU <->XPUPlace" + "3. CPU <->IPUPlace" "Other place type is Unimplemented and will cause ERROR."); AddComment(R"DOC( MemcpyD2H Operator. @@ -233,3 +234,31 @@ REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_h2d, int16_t, ops::MemcpyH2DKernel); #endif + +#ifdef PADDLE_WITH_IPU +REGISTER_OP_IPU_KERNEL_FUNCTOR(memcpy_h2d, + float, + ops::MemcpyH2DKernel, + double, + ops::MemcpyH2DKernel, + int8_t, + ops::MemcpyH2DKernel, + uint8_t, + ops::MemcpyH2DKernel, + int, + ops::MemcpyH2DKernel, + int64_t, + ops::MemcpyH2DKernel, + bool, + ops::MemcpyH2DKernel, + paddle::platform::bfloat16, + ops::MemcpyH2DKernel, + paddle::platform::complex, + ops::MemcpyH2DKernel, + paddle::platform::complex, + ops::MemcpyH2DKernel, + plat::float16, + ops::MemcpyH2DKernel, + int16_t, + ops::MemcpyH2DKernel); +#endif diff --git a/paddle/fluid/operators/memcpy_h2d_op.h b/paddle/fluid/operators/memcpy_h2d_op.h index 3fcc4b89eefe8d5e931bddea3d18899ab96f6b2c..8cd84f4b59e8ceae1e65db38e202a98ef2635920 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.h +++ b/paddle/fluid/operators/memcpy_h2d_op.h @@ -50,7 +50,7 @@ class MemcpyH2DFunctor { lod_tensor.dtype(), phi::Stream(reinterpret_cast(stream))); - if (dst_place_type_ == 0 || dst_place_type_ == 1 || dst_place_type_ == 2) { + if (dst_place_type_ >= 0 && dst_place_type_ <= 3) { framework::TensorCopy( lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); } else { diff --git a/paddle/fluid/platform/device_event_base.h b/paddle/fluid/platform/device_event_base.h index 51df0fd4f40adc42780333cc7fd90bb39634ac6e..a2d3fc1dc38185b3ddeb080d5155a6dfa04813a5 100644 --- a/paddle/fluid/platform/device_event_base.h +++ b/paddle/fluid/platform/device_event_base.h @@ -64,7 +64,7 @@ class DeviceEvent { "Required type < %d, but received type = %d", MaxDeviceTypes, type_id_)); - // TODO(Aurelius84): only support CPU/CUDA, need consider XPU/NPU later + // TODO(Aurelius84): only support CPU/CUDA/XPU/NPU. PADDLE_ENFORCE_LT(type_id_, 4, platform::errors::Unavailable( diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index cf00075edcf8615116242175d23f9b63d0709ed8..c7bfd19e5a9d0234a1d8340d9d17540643d46f29 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -1388,8 +1388,8 @@ class Executor(object): program = pruned_program def _can_use_interpreter_core(program, place): - if core.is_compiled_with_mlu() or core.is_compiled_with_ipu( - ) or isinstance(place, core.CustomPlace): + if core.is_compiled_with_mlu() or isinstance( + place, core.CustomPlace): return False compiled = isinstance(program, compiler.CompiledProgram)