未验证 提交 4efbebea 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] sync misc changes 01 (#38876)

* sync misc changes

* apply comments 01

* fix compile error

* remove is_ipu_place check

* add authors
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NAllen Guo <alleng@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>

* sync changes

* restore cmake

* update ir cmake and setup.py

* update inference_lib cmake

* split PR
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>
上级 83d0d853
...@@ -132,6 +132,22 @@ if(WITH_MKLDNN) ...@@ -132,6 +132,22 @@ if(WITH_MKLDNN)
pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn) pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn)
endif() endif()
if(WITH_IPU)
pass_library(forward_graph_extract_pass base DIR ipu)
pass_library(optimizer_extract_pass base DIR ipu)
pass_library(optimizer_state_align_pass base DIR ipu)
pass_library(ipu_graph_builder_pass base DIR ipu)
pass_library(ipu_runtime_replacer_pass base DIR ipu)
pass_library(inference_process_pass base DIR ipu)
pass_library(inference_postprocess_pass base DIR ipu)
pass_library(popart_canonicalization_pass base DIR ipu)
pass_library(ipu_inplace_pass base DIR ipu)
pass_library(infer_shape_pass base DIR ipu)
pass_library(delete_scale_op_pass base DIR ipu)
pass_library(avg_shard_pass base DIR ipu)
pass_library(transfer_cast_op_pass base DIR ipu)
endif()
cc_library(fuse_bn_act_pass SRCS fuse_bn_act_pass.cc DEPS pass graph_pattern_detector ) cc_library(fuse_bn_act_pass SRCS fuse_bn_act_pass.cc DEPS pass graph_pattern_detector )
cc_library(fuse_bn_add_act_pass SRCS fuse_bn_add_act_pass.cc DEPS pass graph_pattern_detector ) cc_library(fuse_bn_add_act_pass SRCS fuse_bn_add_act_pass.cc DEPS pass graph_pattern_detector )
cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector ) cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector )
......
...@@ -1350,6 +1350,16 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const { ...@@ -1350,6 +1350,16 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
kernel_iter = kernels.find(expected_kernel_key); kernel_iter = kernels.find(expected_kernel_key);
} }
#endif #endif
#ifdef PADDLE_WITH_IPU
if (kernel_iter == kernels.end() &&
platform::is_ipu_place(expected_kernel_key.place_)) {
VLOG(3) << "missing IPU kernel: " << type_
<< ", expected_kernel_key:" << expected_kernel_key
<< ", fallbacking to CPU one!";
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
if (kernel_iter == kernels.end() && if (kernel_iter == kernels.end() &&
platform::is_npu_place(expected_kernel_key.place_)) { platform::is_npu_place(expected_kernel_key.place_)) {
......
...@@ -57,33 +57,6 @@ void Copy<platform::IPUPlace, platform::IPUPlace>(platform::IPUPlace dst_place, ...@@ -57,33 +57,6 @@ void Copy<platform::IPUPlace, platform::IPUPlace>(platform::IPUPlace dst_place,
std::memcpy(dst, src, num); std::memcpy(dst, src, num);
} }
// NOTE: only for CPUPlace and IPUPlace.
template <>
void Copy<pten::Place, pten::Place>(pten::Place dst_place, void* dst,
pten::Place src_place, const void* src,
size_t num) {
if (src_place.GetType() == pten::AllocationType::CPU &&
dst_place.GetType() == pten::AllocationType::CPU) {
platform::CPUPlace place_dst, place_src;
return Copy(place_dst, dst, place_src, src, num);
} else if (src_place.GetType() == pten::AllocationType::CPU &&
dst_place.GetType() == pten::AllocationType::IPU) {
platform::IPUPlace place_dst(dst_place.GetDeviceId());
platform::CPUPlace place_src;
return Copy(place_dst, dst, place_src, src, num);
} else if (src_place.GetType() == pten::AllocationType::IPU &&
dst_place.GetType() == pten::AllocationType::CPU) {
platform::IPUPlace place_src(src_place.GetDeviceId());
platform::CPUPlace place_dst;
return Copy(place_dst, dst, place_src, src, num);
} else if (src_place.GetType() == pten::AllocationType::IPU &&
dst_place.GetType() == pten::AllocationType::IPU) {
platform::IPUPlace place_src(src_place.GetDeviceId());
platform::IPUPlace place_dst(dst_place.GetDeviceId());
return Copy(place_dst, dst, place_src, src, num);
}
}
// NOTE: only for (CPUPlace and IPUPlace) -> (IPUPlace). // NOTE: only for (CPUPlace and IPUPlace) -> (IPUPlace).
template <> template <>
void Copy<pten::IPUPlace, pten::Place>(pten::IPUPlace dst_place, void* dst, void Copy<pten::IPUPlace, pten::Place>(pten::IPUPlace dst_place, void* dst,
...@@ -1039,6 +1012,24 @@ void Copy<pten::Place, pten::Place>(pten::Place dst_place, void* dst, ...@@ -1039,6 +1012,24 @@ void Copy<pten::Place, pten::Place>(pten::Place dst_place, void* dst,
return Copy(place_dst, dst, place_src, src, num); return Copy(place_dst, dst, place_src, src, num);
} }
#endif #endif
#ifdef PADDLE_WITH_IPU
else if (src_place.GetType() == pten::AllocationType::CPU &&
dst_place.GetType() == pten::AllocationType::IPU) {
platform::IPUPlace place_dst(dst_place.GetDeviceId());
platform::CPUPlace place_src;
return Copy(place_dst, dst, place_src, src, num);
} else if (src_place.GetType() == pten::AllocationType::IPU &&
dst_place.GetType() == pten::AllocationType::CPU) {
platform::IPUPlace place_src(src_place.GetDeviceId());
platform::CPUPlace place_dst;
return Copy(place_dst, dst, place_src, src, num);
} else if (src_place.GetType() == pten::AllocationType::IPU &&
dst_place.GetType() == pten::AllocationType::IPU) {
platform::IPUPlace place_src(src_place.GetDeviceId());
platform::IPUPlace place_dst(dst_place.GetDeviceId());
return Copy(place_dst, dst, place_src, src, num);
}
#endif
} }
// NOTE: Only for (CPUPlace) -> (CPUPlace and PinnedPlace). // NOTE: Only for (CPUPlace) -> (CPUPlace and PinnedPlace).
......
...@@ -72,7 +72,7 @@ IF(WITH_GPU OR WITH_ROCM) ...@@ -72,7 +72,7 @@ IF(WITH_GPU OR WITH_ROCM)
ENDIF() ENDIF()
IF(WITH_IPU) IF(WITH_IPU)
set(IPU_CTX_DEPS ipu_backend) set(IPU_CTX_DEPS ipu_info)
ELSE() ELSE()
set(IPU_CTX_DEPS) set(IPU_CTX_DEPS)
ENDIF(WITH_IPU) ENDIF(WITH_IPU)
......
...@@ -34,3 +34,7 @@ limitations under the License. */ ...@@ -34,3 +34,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device/mlu/enforce.h" #include "paddle/fluid/platform/device/mlu/enforce.h"
#include "paddle/fluid/platform/device/mlu/mlu_info.h" #include "paddle/fluid/platform/device/mlu/mlu_info.h"
#endif #endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif
IF(WITH_IPU) IF(WITH_IPU)
FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc) FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc)
list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC}) list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC})
set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
set(IPU_BACKEND_SRC set(IPU_BACKEND_SRC
"ipu_device.cc"
"ipu_strategy.cc" "ipu_strategy.cc"
"ipu_executor.cc" "ipu_executor.cc"
"ipu_compiler.cc" "ipu_compiler.cc"
"ipu_backend.cc" "ipu_backend.cc"
"ipu_utils.cc" "ipu_utils.cc"
) )
set(IPU_INFO_SRC
"ipu_info.cc"
"ipu_device.cc"
)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph framework_proto enforce graph_helper timer) cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph graph_helper)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend) cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart enforce)
cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart) cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart graph_helper)
add_dependencies(paddle_ipu ipu_backend) add_dependencies(paddle_ipu ipu_backend)
set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
ENDIF() ENDIF()
...@@ -13,12 +13,26 @@ See the License for the specific language governing permissions and ...@@ -13,12 +13,26 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_device.h" #include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
// TODO(alleng) merge with ipu_utils
static bool GetBoolEnv(std::string str) {
char* str_val = getenv(str.c_str());
if (str_val == NULL) {
return false;
} else {
bool val = false;
if (strcmp(str_val, "1") == 0 || strcmp(str_val, "true") == 0 ||
strcmp(str_val, "True") == 0 || strcmp(str_val, "TRUE") == 0)
val = true;
return val;
}
}
int GetNumDevices() { int GetNumDevices() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) { if (ipu_model) {
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include <popart/devicemanager.hpp> #include <popart/devicemanager.hpp>
#include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -16,12 +16,10 @@ namespace paddle { ...@@ -16,12 +16,10 @@ namespace paddle {
namespace platform { namespace platform {
//! Get a list of device ids from environment variable or use all. //! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() { std::vector<int> GetSelectedIPUDevices() { return ipu::GetDeviceIds(); }
return platform::ipu::GetDeviceIds();
}
//! Get the total number of IPU devices in system. //! Get the total number of IPU devices in system.
int GetIPUDeviceCount() { return platform::ipu::GetNumDevices(); } int GetIPUDeviceCount() { return ipu::GetNumDevices(); }
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -32,7 +32,7 @@ Node *mean_handler(Graph *graph, Node *node) { ...@@ -32,7 +32,7 @@ Node *mean_handler(Graph *graph, Node *node) {
Node *pow_handler(Graph *graph, Node *node) { Node *pow_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
if (op->HasInput("FactorTensor") && !op->Input("FactorTensor").empty()) { if (!op->Input("FactorTensor").empty()) {
return CreateBaseOp( return CreateBaseOp(
graph, node, "popart_pow", graph, node, "popart_pow",
{GetInputVarNode("X", node), GetInputVarNode("FactorTensor", node)}, {GetInputVarNode("X", node), GetInputVarNode("FactorTensor", node)},
...@@ -161,7 +161,7 @@ Node *scale_handler(Graph *graph, Node *node) { ...@@ -161,7 +161,7 @@ Node *scale_handler(Graph *graph, Node *node) {
static_cast<int>(framework::proto::VarType::FP32)); static_cast<int>(framework::proto::VarType::FP32));
Node *result = nullptr; Node *result = nullptr;
if (op->HasInput("ScaleTensor") && !op->Input("ScaleTensor").empty()) { if (!op->Input("ScaleTensor").empty()) {
auto scale = GetInputVarNode("ScaleTensor", node); auto scale = GetInputVarNode("ScaleTensor", node);
if (is_float_equal(bias_, 0.0)) { if (is_float_equal(bias_, 0.0)) {
result = CreateBaseOp(graph, node, "popart_mul", result = CreateBaseOp(graph, node, "popart_mul",
......
...@@ -34,7 +34,7 @@ Node *conv2d_handler(Graph *graph, Node *node) { ...@@ -34,7 +34,7 @@ Node *conv2d_handler(Graph *graph, Node *node) {
auto pads = std::vector<int64_t>{pads_.begin(), pads_.end()}; auto pads = std::vector<int64_t>{pads_.begin(), pads_.end()};
auto stride_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("strides")); auto stride_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("strides"));
auto stride = std::vector<int64_t>{stride_.begin(), stride_.end()}; auto stride = std::vector<int64_t>{stride_.begin(), stride_.end()};
if (op->HasInput("Bias") && !op->Input("Bias").empty()) { if (!op->Input("Bias").empty()) {
return CreateConv( return CreateConv(
graph, node, graph, node,
{ {
......
...@@ -65,7 +65,7 @@ Node *topk_handler(Graph *graph, Node *node) { ...@@ -65,7 +65,7 @@ Node *topk_handler(Graph *graph, Node *node) {
Node *var_x = GetInputVarNode("X", node); Node *var_x = GetInputVarNode("X", node);
Node *var_k = nullptr; Node *var_k = nullptr;
if (op->HasInput("K") && !op->Input("K").empty()) { if (!op->Input("K").empty()) {
var_k = GetInputVarNode("K", node); var_k = GetInputVarNode("K", node);
} else { } else {
auto k = BOOST_GET_CONST(int, op->GetAttr("k")); auto k = BOOST_GET_CONST(int, op->GetAttr("k"));
......
...@@ -23,7 +23,7 @@ namespace { ...@@ -23,7 +23,7 @@ namespace {
Node *fill_constant_handler(Graph *graph, Node *node) { Node *fill_constant_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
if (op->HasInput("ShapeTensor") && !op->Input("ShapeTensor").empty()) { if (!op->Input("ShapeTensor").empty()) {
PADDLE_THROW( PADDLE_THROW(
platform::errors::Unimplemented("op fill_constant with ShapeTensor")); platform::errors::Unimplemented("op fill_constant with ShapeTensor"));
} }
...@@ -328,7 +328,7 @@ Node *shape_handler(Graph *graph, Node *node) { ...@@ -328,7 +328,7 @@ Node *shape_handler(Graph *graph, Node *node) {
Node *slice_handler(Graph *graph, Node *node) { Node *slice_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
Node *starts = nullptr; Node *starts = nullptr;
if (op->HasInput("StartsTensor") && !op->Input("StartsTensor").empty()) { if (!op->Input("StartsTensor").empty()) {
starts = GetInputVarNode("StartsTensor", node); starts = GetInputVarNode("StartsTensor", node);
} else { } else {
auto starts_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("starts")); auto starts_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("starts"));
...@@ -338,7 +338,7 @@ Node *slice_handler(Graph *graph, Node *node) { ...@@ -338,7 +338,7 @@ Node *slice_handler(Graph *graph, Node *node) {
starts = starts->outputs[0]; starts = starts->outputs[0];
} }
Node *ends = nullptr; Node *ends = nullptr;
if (op->HasInput("EndsTensor") && !op->Input("EndsTensor").empty()) { if (!op->Input("EndsTensor").empty()) {
ends = GetInputVarNode("EndsTensor", node); ends = GetInputVarNode("EndsTensor", node);
} else { } else {
auto ends_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ends")); auto ends_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ends"));
...@@ -384,14 +384,13 @@ Node *slice_handler(Graph *graph, Node *node) { ...@@ -384,14 +384,13 @@ Node *slice_handler(Graph *graph, Node *node) {
Node *expand_handler(Graph *graph, Node *node) { Node *expand_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
if (op->HasInput("expand_times_tensor") && if (!op->Input("expand_times_tensor").empty()) {
!op->Input("expand_times_tensor").empty()) {
PADDLE_THROW( PADDLE_THROW(
platform::errors::Unimplemented("Expand op with expand_times_tensor")); platform::errors::Unimplemented("Expand op with expand_times_tensor"));
} }
Node *expand_times = nullptr; Node *expand_times = nullptr;
if (op->HasInput("ExpandTimes") && !op->Input("ExpandTimes").empty()) { if (!op->Input("ExpandTimes").empty()) {
// cast to int64 // cast to int64
expand_times = expand_times =
CreateCast(graph, node, {GetInputVarNode("ExpandTimes", node)}, {}, CreateCast(graph, node, {GetInputVarNode("ExpandTimes", node)}, {},
......
...@@ -21,9 +21,6 @@ limitations under the License. */ ...@@ -21,9 +21,6 @@ limitations under the License. */
#include "paddle/fluid/platform/device/mlu/device_context.h" #include "paddle/fluid/platform/device/mlu/device_context.h"
#include "paddle/fluid/platform/device/mlu/device_context_allocator.h" #include "paddle/fluid/platform/device/mlu/device_context_allocator.h"
#endif #endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#endif
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/expect.h" #include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -230,14 +227,10 @@ CPUDeviceContext::CPUDeviceContext() : pten::CPUContext() {} ...@@ -230,14 +227,10 @@ CPUDeviceContext::CPUDeviceContext() : pten::CPUContext() {}
CPUDeviceContext::CPUDeviceContext(CPUPlace place) : pten::CPUContext() {} CPUDeviceContext::CPUDeviceContext(CPUPlace place) : pten::CPUContext() {}
#ifdef PADDLE_WITH_IPU #ifdef PADDLE_WITH_IPU
IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) { IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) {}
int id = place.GetDeviceId();
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
device_ = ipu_backend->GetDevice(id);
}
Place IPUDeviceContext::GetPlace() const { return place_; } Place IPUDeviceContext::GetPlace() const { return place_; }
void IPUDeviceContext::Wait() const { void IPUDeviceContext::Wait() const {
/*! \brief Wait for all operations completion in the stream. */ /*! \brief Wait for all operations completion in the stream. */
} }
......
...@@ -65,9 +65,6 @@ limitations under the License. */ ...@@ -65,9 +65,6 @@ limitations under the License. */
#include "paddle/fluid/platform/device/npu/enforce_npu.h" #include "paddle/fluid/platform/device/npu/enforce_npu.h"
#include "paddle/fluid/platform/device/npu/npu_stream.h" #include "paddle/fluid/platform/device/npu/npu_stream.h"
#endif #endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/device.h"
#endif
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
namespace Eigen { namespace Eigen {
...@@ -151,11 +148,9 @@ class IPUDeviceContext : public DeviceContext { ...@@ -151,11 +148,9 @@ class IPUDeviceContext : public DeviceContext {
Place GetPlace() const override; Place GetPlace() const override;
/*! \brief Wait for all operations completion in the stream. */ /*! \brief Wait for all operations completion in the stream. */
void Wait() const override; void Wait() const override;
int DeviceId() const { return device_.getId(); }
private: private:
IPUPlace place_; IPUPlace place_;
platform::ipu::Device device_;
}; };
template <> template <>
struct DefaultDeviceContextType<platform::IPUPlace> { struct DefaultDeviceContextType<platform::IPUPlace> {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册