From 4efbebea53711e73a4f0ce07ad48795f3715b581 Mon Sep 17 00:00:00 2001 From: Allen Guo Date: Wed, 26 Jan 2022 17:05:52 +0800 Subject: [PATCH] [IPU] sync misc changes 01 (#38876) * sync misc changes * apply comments 01 * fix compile error * remove is_ipu_place check * add authors Co-authored-by: Xiaobing Wang Co-authored-by: Allen Guo Co-authored-by: Zhixin Yao Co-authored-by: Haicheng Jiang Co-authored-by: Han Zhao * sync changes * restore cmake * update ir cmake and setup.py * update inference_lib cmake * split PR Co-authored-by: Xiaobing Wang Co-authored-by: Zhixin Yao Co-authored-by: Haicheng Jiang Co-authored-by: Han Zhao --- paddle/fluid/framework/ir/CMakeLists.txt | 16 +++++++ paddle/fluid/framework/operator.cc | 10 +++++ paddle/fluid/memory/memcpy.cc | 45 ++++++++----------- paddle/fluid/platform/CMakeLists.txt | 2 +- paddle/fluid/platform/device/device_wrapper.h | 4 ++ .../fluid/platform/device/ipu/CMakeLists.txt | 15 ++++--- .../fluid/platform/device/ipu/ipu_device.cc | 16 ++++++- paddle/fluid/platform/device/ipu/ipu_device.h | 1 - paddle/fluid/platform/device/ipu/ipu_info.cc | 6 +-- .../ipu/popart_canonicalization/math_ops.cc | 4 +- .../ipu/popart_canonicalization/nn_ops.cc | 2 +- .../ipu/popart_canonicalization/search_ops.cc | 2 +- .../ipu/popart_canonicalization/tensor_ops.cc | 11 +++-- paddle/fluid/platform/device_context.cc | 11 +---- paddle/fluid/platform/device_context.h | 5 --- 15 files changed, 86 insertions(+), 64 deletions(-) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 0d9c460628..323e743087 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -132,6 +132,22 @@ if(WITH_MKLDNN) pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn) endif() +if(WITH_IPU) + pass_library(forward_graph_extract_pass base DIR ipu) + pass_library(optimizer_extract_pass base DIR ipu) + pass_library(optimizer_state_align_pass base DIR ipu) + pass_library(ipu_graph_builder_pass base DIR ipu) + pass_library(ipu_runtime_replacer_pass base DIR ipu) + pass_library(inference_process_pass base DIR ipu) + pass_library(inference_postprocess_pass base DIR ipu) + pass_library(popart_canonicalization_pass base DIR ipu) + pass_library(ipu_inplace_pass base DIR ipu) + pass_library(infer_shape_pass base DIR ipu) + pass_library(delete_scale_op_pass base DIR ipu) + pass_library(avg_shard_pass base DIR ipu) + pass_library(transfer_cast_op_pass base DIR ipu) +endif() + cc_library(fuse_bn_act_pass SRCS fuse_bn_act_pass.cc DEPS pass graph_pattern_detector ) cc_library(fuse_bn_add_act_pass SRCS fuse_bn_add_act_pass.cc DEPS pass graph_pattern_detector ) cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector ) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index ae61b7388d..087a817d03 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1350,6 +1350,16 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const { kernel_iter = kernels.find(expected_kernel_key); } #endif +#ifdef PADDLE_WITH_IPU + if (kernel_iter == kernels.end() && + platform::is_ipu_place(expected_kernel_key.place_)) { + VLOG(3) << "missing IPU kernel: " << type_ + << ", expected_kernel_key:" << expected_kernel_key + << ", fallbacking to CPU one!"; + expected_kernel_key.place_ = platform::CPUPlace(); + kernel_iter = kernels.find(expected_kernel_key); + } +#endif #ifdef PADDLE_WITH_ASCEND_CL if (kernel_iter == kernels.end() && platform::is_npu_place(expected_kernel_key.place_)) { diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index 6d348ceb87..d2ab438fd2 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -57,33 +57,6 @@ void Copy(platform::IPUPlace dst_place, std::memcpy(dst, src, num); } -// NOTE: only for CPUPlace and IPUPlace. -template <> -void Copy(pten::Place dst_place, void* dst, - pten::Place src_place, const void* src, - size_t num) { - if (src_place.GetType() == pten::AllocationType::CPU && - dst_place.GetType() == pten::AllocationType::CPU) { - platform::CPUPlace place_dst, place_src; - return Copy(place_dst, dst, place_src, src, num); - } else if (src_place.GetType() == pten::AllocationType::CPU && - dst_place.GetType() == pten::AllocationType::IPU) { - platform::IPUPlace place_dst(dst_place.GetDeviceId()); - platform::CPUPlace place_src; - return Copy(place_dst, dst, place_src, src, num); - } else if (src_place.GetType() == pten::AllocationType::IPU && - dst_place.GetType() == pten::AllocationType::CPU) { - platform::IPUPlace place_src(src_place.GetDeviceId()); - platform::CPUPlace place_dst; - return Copy(place_dst, dst, place_src, src, num); - } else if (src_place.GetType() == pten::AllocationType::IPU && - dst_place.GetType() == pten::AllocationType::IPU) { - platform::IPUPlace place_src(src_place.GetDeviceId()); - platform::IPUPlace place_dst(dst_place.GetDeviceId()); - return Copy(place_dst, dst, place_src, src, num); - } -} - // NOTE: only for (CPUPlace and IPUPlace) -> (IPUPlace). template <> void Copy(pten::IPUPlace dst_place, void* dst, @@ -1039,6 +1012,24 @@ void Copy(pten::Place dst_place, void* dst, return Copy(place_dst, dst, place_src, src, num); } #endif +#ifdef PADDLE_WITH_IPU + else if (src_place.GetType() == pten::AllocationType::CPU && + dst_place.GetType() == pten::AllocationType::IPU) { + platform::IPUPlace place_dst(dst_place.GetDeviceId()); + platform::CPUPlace place_src; + return Copy(place_dst, dst, place_src, src, num); + } else if (src_place.GetType() == pten::AllocationType::IPU && + dst_place.GetType() == pten::AllocationType::CPU) { + platform::IPUPlace place_src(src_place.GetDeviceId()); + platform::CPUPlace place_dst; + return Copy(place_dst, dst, place_src, src, num); + } else if (src_place.GetType() == pten::AllocationType::IPU && + dst_place.GetType() == pten::AllocationType::IPU) { + platform::IPUPlace place_src(src_place.GetDeviceId()); + platform::IPUPlace place_dst(dst_place.GetDeviceId()); + return Copy(place_dst, dst, place_src, src, num); + } +#endif } // NOTE: Only for (CPUPlace) -> (CPUPlace and PinnedPlace). diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 550278950c..eb7057bcd5 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -72,7 +72,7 @@ IF(WITH_GPU OR WITH_ROCM) ENDIF() IF(WITH_IPU) - set(IPU_CTX_DEPS ipu_backend) + set(IPU_CTX_DEPS ipu_info) ELSE() set(IPU_CTX_DEPS) ENDIF(WITH_IPU) diff --git a/paddle/fluid/platform/device/device_wrapper.h b/paddle/fluid/platform/device/device_wrapper.h index 43408ca207..4f8bbb2d26 100644 --- a/paddle/fluid/platform/device/device_wrapper.h +++ b/paddle/fluid/platform/device/device_wrapper.h @@ -34,3 +34,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device/mlu/enforce.h" #include "paddle/fluid/platform/device/mlu/mlu_info.h" #endif + +#ifdef PADDLE_WITH_IPU +#include "paddle/fluid/platform/device/ipu/ipu_info.h" +#endif diff --git a/paddle/fluid/platform/device/ipu/CMakeLists.txt b/paddle/fluid/platform/device/ipu/CMakeLists.txt index 5f711937a8..d54c6a33ec 100644 --- a/paddle/fluid/platform/device/ipu/CMakeLists.txt +++ b/paddle/fluid/platform/device/ipu/CMakeLists.txt @@ -1,19 +1,22 @@ IF(WITH_IPU) FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc) list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC}) - set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "") - set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "") set(IPU_BACKEND_SRC - "ipu_device.cc" "ipu_strategy.cc" "ipu_executor.cc" "ipu_compiler.cc" "ipu_backend.cc" "ipu_utils.cc" ) + set(IPU_INFO_SRC + "ipu_info.cc" + "ipu_device.cc" + ) - cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph framework_proto enforce graph_helper timer) - cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend) - cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart) + cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph graph_helper) + cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart enforce) + cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart graph_helper) add_dependencies(paddle_ipu ipu_backend) + set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "") + set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "") ENDIF() diff --git a/paddle/fluid/platform/device/ipu/ipu_device.cc b/paddle/fluid/platform/device/ipu/ipu_device.cc index cd2a628c9a..2459f5140e 100644 --- a/paddle/fluid/platform/device/ipu/ipu_device.cc +++ b/paddle/fluid/platform/device/ipu/ipu_device.cc @@ -13,12 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_device.h" -#include "paddle/fluid/platform/device/ipu/ipu_utils.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace platform { namespace ipu { +// TODO(alleng) merge with ipu_utils +static bool GetBoolEnv(std::string str) { + char* str_val = getenv(str.c_str()); + if (str_val == NULL) { + return false; + } else { + bool val = false; + if (strcmp(str_val, "1") == 0 || strcmp(str_val, "true") == 0 || + strcmp(str_val, "True") == 0 || strcmp(str_val, "TRUE") == 0) + val = true; + return val; + } +} + int GetNumDevices() { bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); if (ipu_model) { diff --git a/paddle/fluid/platform/device/ipu/ipu_device.h b/paddle/fluid/platform/device/ipu/ipu_device.h index 3da13a522e..d39feffc92 100644 --- a/paddle/fluid/platform/device/ipu/ipu_device.h +++ b/paddle/fluid/platform/device/ipu/ipu_device.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include -#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device/ipu/ipu_info.cc b/paddle/fluid/platform/device/ipu/ipu_info.cc index 4506bfbf97..9e6951c371 100644 --- a/paddle/fluid/platform/device/ipu/ipu_info.cc +++ b/paddle/fluid/platform/device/ipu/ipu_info.cc @@ -16,12 +16,10 @@ namespace paddle { namespace platform { //! Get a list of device ids from environment variable or use all. -std::vector GetSelectedIPUDevices() { - return platform::ipu::GetDeviceIds(); -} +std::vector GetSelectedIPUDevices() { return ipu::GetDeviceIds(); } //! Get the total number of IPU devices in system. -int GetIPUDeviceCount() { return platform::ipu::GetNumDevices(); } +int GetIPUDeviceCount() { return ipu::GetNumDevices(); } } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc index 67012e8d4b..d4a14a6d84 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc @@ -32,7 +32,7 @@ Node *mean_handler(Graph *graph, Node *node) { Node *pow_handler(Graph *graph, Node *node) { auto *op = node->Op(); - if (op->HasInput("FactorTensor") && !op->Input("FactorTensor").empty()) { + if (!op->Input("FactorTensor").empty()) { return CreateBaseOp( graph, node, "popart_pow", {GetInputVarNode("X", node), GetInputVarNode("FactorTensor", node)}, @@ -161,7 +161,7 @@ Node *scale_handler(Graph *graph, Node *node) { static_cast(framework::proto::VarType::FP32)); Node *result = nullptr; - if (op->HasInput("ScaleTensor") && !op->Input("ScaleTensor").empty()) { + if (!op->Input("ScaleTensor").empty()) { auto scale = GetInputVarNode("ScaleTensor", node); if (is_float_equal(bias_, 0.0)) { result = CreateBaseOp(graph, node, "popart_mul", diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc index b741200010..b731ba532d 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc @@ -34,7 +34,7 @@ Node *conv2d_handler(Graph *graph, Node *node) { auto pads = std::vector{pads_.begin(), pads_.end()}; auto stride_ = BOOST_GET_CONST(std::vector, op->GetAttr("strides")); auto stride = std::vector{stride_.begin(), stride_.end()}; - if (op->HasInput("Bias") && !op->Input("Bias").empty()) { + if (!op->Input("Bias").empty()) { return CreateConv( graph, node, { diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc index 662660c23b..539053f2fb 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc @@ -65,7 +65,7 @@ Node *topk_handler(Graph *graph, Node *node) { Node *var_x = GetInputVarNode("X", node); Node *var_k = nullptr; - if (op->HasInput("K") && !op->Input("K").empty()) { + if (!op->Input("K").empty()) { var_k = GetInputVarNode("K", node); } else { auto k = BOOST_GET_CONST(int, op->GetAttr("k")); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc index 296668890e..db429d2f62 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc @@ -23,7 +23,7 @@ namespace { Node *fill_constant_handler(Graph *graph, Node *node) { auto *op = node->Op(); - if (op->HasInput("ShapeTensor") && !op->Input("ShapeTensor").empty()) { + if (!op->Input("ShapeTensor").empty()) { PADDLE_THROW( platform::errors::Unimplemented("op fill_constant with ShapeTensor")); } @@ -328,7 +328,7 @@ Node *shape_handler(Graph *graph, Node *node) { Node *slice_handler(Graph *graph, Node *node) { auto *op = node->Op(); Node *starts = nullptr; - if (op->HasInput("StartsTensor") && !op->Input("StartsTensor").empty()) { + if (!op->Input("StartsTensor").empty()) { starts = GetInputVarNode("StartsTensor", node); } else { auto starts_ = BOOST_GET_CONST(std::vector, op->GetAttr("starts")); @@ -338,7 +338,7 @@ Node *slice_handler(Graph *graph, Node *node) { starts = starts->outputs[0]; } Node *ends = nullptr; - if (op->HasInput("EndsTensor") && !op->Input("EndsTensor").empty()) { + if (!op->Input("EndsTensor").empty()) { ends = GetInputVarNode("EndsTensor", node); } else { auto ends_ = BOOST_GET_CONST(std::vector, op->GetAttr("ends")); @@ -384,14 +384,13 @@ Node *slice_handler(Graph *graph, Node *node) { Node *expand_handler(Graph *graph, Node *node) { auto *op = node->Op(); - if (op->HasInput("expand_times_tensor") && - !op->Input("expand_times_tensor").empty()) { + if (!op->Input("expand_times_tensor").empty()) { PADDLE_THROW( platform::errors::Unimplemented("Expand op with expand_times_tensor")); } Node *expand_times = nullptr; - if (op->HasInput("ExpandTimes") && !op->Input("ExpandTimes").empty()) { + if (!op->Input("ExpandTimes").empty()) { // cast to int64 expand_times = CreateCast(graph, node, {GetInputVarNode("ExpandTimes", node)}, {}, diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index bfb1f57206..142e30d161 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -21,9 +21,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/mlu/device_context.h" #include "paddle/fluid/platform/device/mlu/device_context_allocator.h" #endif -#ifdef PADDLE_WITH_IPU -#include "paddle/fluid/platform/ipu/ipu_backend.h" -#endif #include "glog/logging.h" #include "paddle/fluid/framework/expect.h" #include "paddle/fluid/platform/profiler.h" @@ -230,14 +227,10 @@ CPUDeviceContext::CPUDeviceContext() : pten::CPUContext() {} CPUDeviceContext::CPUDeviceContext(CPUPlace place) : pten::CPUContext() {} #ifdef PADDLE_WITH_IPU -IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) { - int id = place.GetDeviceId(); - std::shared_ptr ipu_backend = - platform::ipu::IpuBackend::GetInstance(); - device_ = ipu_backend->GetDevice(id); -} +IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) {} Place IPUDeviceContext::GetPlace() const { return place_; } + void IPUDeviceContext::Wait() const { /*! \brief Wait for all operations completion in the stream. */ } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 52f17cd986..17b22907b1 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -65,9 +65,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/npu/enforce_npu.h" #include "paddle/fluid/platform/device/npu/npu_stream.h" #endif -#ifdef PADDLE_WITH_IPU -#include "paddle/fluid/platform/device/ipu/device.h" -#endif #include "unsupported/Eigen/CXX11/Tensor" namespace Eigen { @@ -151,11 +148,9 @@ class IPUDeviceContext : public DeviceContext { Place GetPlace() const override; /*! \brief Wait for all operations completion in the stream. */ void Wait() const override; - int DeviceId() const { return device_.getId(); } private: IPUPlace place_; - platform::ipu::Device device_; }; template <> struct DefaultDeviceContextType { -- GitLab