未验证 提交 b2aee3e3 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] update ipu_backend p0 (#38854)

* update ipu_backend

* sync with paddle internal
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NAllen Guo <alleng@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>

* apply comments 01

* update error messag

* restore ipu_executor and ipu_optimizer

* add clang-format on
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>
上级 b4cb3589
IF(WITH_IPU) IF(WITH_IPU)
FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc) FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc)
cc_library(ipu_device SRCS device.cc DEPS enforce popart) list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC})
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart) set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce) set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce) set(IPU_BACKEND_SRC
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto) "ipu_device.cc"
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils) "ipu_strategy.cc"
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper) "ipu_executor.cc"
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper) "ipu_compiler.cc"
"ipu_backend.cc"
"ipu_utils.cc"
)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph framework_proto enforce graph_helper timer)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend) cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend)
cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart)
add_dependencies(paddle_ipu ipu_backend)
ENDIF() ENDIF()
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_backend.h" #include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/ipu/ipu_utils.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
...@@ -24,170 +24,92 @@ namespace paddle { ...@@ -24,170 +24,92 @@ namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
std::shared_ptr<IpuBackend> IpuBackend::instance_ = nullptr; IpuBackend* IpuBackend::GetInstance() {
static IpuBackend instance;
return &instance;
}
IpuBackend::IpuBackend() { IpuBackend::IpuBackend() {
compiler_ = std::make_shared<Compiler>(); compiler_ = std::make_unique<Compiler>();
executor_ = std::make_unique<Executor>(); executor_ = std::make_unique<Executor>();
} }
void IpuBackend::Clear() { IpuBackend::~IpuBackend() {
compiler_.reset();
executor_.reset(); executor_.reset();
// detach device
if (device_ != nullptr && device_->isAttached()) {
device_->detach();
device_.reset();
device_ = nullptr;
}
}
IpuBackend::~IpuBackend() { Clear(); }
std::shared_ptr<IpuBackend> IpuBackend::GetInstance() {
if (!instance_) {
instance_.reset(new IpuBackend());
}
return instance_;
}
// This api should only call from python, always return a new object
std::shared_ptr<IpuBackend> IpuBackend::GetNewInstance() {
instance_.reset(new IpuBackend());
return instance_;
} }
void IpuBackend::Compile(framework::ir::Graph* graph, void IpuBackend::Compile(Graph* graph,
const std::vector<std::string>& feed_list, const std::vector<std::string>& feed_list,
const std::vector<std::string>& fetch_list) { const std::vector<std::string>& fetch_list) {
VLOG(10) << "enter IpuBackend::Compile"; VLOG(10) << "enter IpuBackend::Compile";
compiler_->Prepare();
executor_->SetCompilerResources(compiler_->GetResources());
compiler_->InitInputs(graph, feed_list); compiler_->InitInputs(graph, feed_list);
compiler_->LowerConstants(graph, scope_);
compiler_->LowerWeights(graph, scope_); compiler_->LowerWeights(graph, scope_);
compiler_->LowerBody(graph); compiler_->LowerBody(graph);
compiler_->InitOutputs(fetch_list); compiler_->InitOutputs(fetch_list);
executor_->SetWeights(compiler_->GetWeights()); if (ipu_strategy_->is_training) {
compiler_->LowerOptimier(graph, scope_);
}
is_compiled_ = true;
// when call compile, means a new graph
is_prepared_ = false;
VLOG(10) << "leave IpuBackend::Compile"; VLOG(10) << "leave IpuBackend::Compile";
} }
void IpuBackend::Run(const std::vector<const framework::Tensor*>& inputs, void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
const std::vector<framework::Tensor*>& outputs, const std::vector<Tensor*>& outputs,
const framework::ExecutionContext& ctx) { const framework::ExecutionContext& ctx) {
Prepare(); Prepare();
auto inputs_id = compiler_->GetInputs(); timer_->Start();
auto outputs_id = compiler_->GetOutputs(); executor_->Run(inputs, outputs, ctx);
executor_->Run(inputs_id, inputs, outputs_id, outputs, ctx); timer_->Pause();
VLOG(10) << "[IPU Run]: " << timer_->ElapsedMS() << " (ms)";
} }
void IpuBackend::Prepare() { void IpuBackend::Prepare() {
if (is_prepared_) { if (!is_prepared_) {
return; executor_->Prepare(compiler_->GetModelProto());
} else { timer_.reset(new platform::Timer());
is_prepared_ = true; is_prepared_ = true;
} }
// convert Model to fp16
if (ipu_strategy_->enable_fp16) {
compiler_->ConvertProtoToFp16();
}
auto proto = compiler_->GetModelProto();
auto tensors = compiler_->GetTensors();
auto outputs = compiler_->GetOutputs();
executor_->Prepare(proto, tensors, outputs, device_);
} }
void IpuBackend::SetScope(const framework::Scope& scope) { void IpuBackend::Detach() { executor_->Detach(); }
void IpuBackend::Reset() {
executor_->Detach();
compiler_.reset();
executor_.reset();
}
void IpuBackend::SetScope(const Scope& scope) {
scope_ = &scope; scope_ = &scope;
executor_->SetScope(&scope); executor_->SetScope(&scope);
} }
void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) { void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) {
ipu_strategy_ = &strategy; ipu_strategy_ = &strategy;
executor_->SetIpuStrategy(strategy);
compiler_->SetIpuStrategy(strategy); compiler_->SetIpuStrategy(strategy);
executor_->SetIpuStrategy(strategy);
} }
size_t IpuBackend::GetNumDevices() { void IpuBackend::SetCustomOps(
// IpuModel const std::vector<IpuCustomOpIdentifier>& custom_ops) {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); compiler_->SetCustomOps(custom_ops);
if (ipu_model) return 1;
// Real dev
size_t num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(
num_devices, 0,
platform::errors::Unavailable(
"Do not found any IPU devices, please make "
"sure Poplar sdk is enabled or enable ENV \"POPLAR_IPUMODEL=1\""));
return num_devices;
}
std::vector<int> IpuBackend::GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
}
Device IpuBackend::GetDevice(int id) {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
std::map<std::string, std::string> deviceOpts{{"numIPUs", "1 "}};
device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
deviceOpts);
Device device(*device_.get());
return device;
}
size_t num_devices = GetNumDevices();
if (id < 0 || id >= num_devices) {
PADDLE_THROW(platform::errors::InvalidArgument(
"device id %d is invalid, number devices is %d", id, num_devices));
}
std::shared_ptr<popart::DeviceInfo> popart_device_info =
popart::DeviceManager::createDeviceManager().getDevice(
popart::SyncPattern::Full, id);
Device device(*popart_device_info.get());
return device;
}
void IpuBackend::AttachDevice(int id) {
// trick here
// Compiler ipu is not same as the runtime ipu.
VLOG(10) << "comile ipu id = " << id;
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return;
}
device_ = popart::DeviceManager::createDeviceManager().acquireAvailableDevice(
UpperIpuNum());
PADDLE_ENFORCE_NOT_NULL(
device_, platform::errors::Unavailable("Can't attach IPU, ipu_num = %d.",
UpperIpuNum()));
} }
bool IpuBackend::DeviceIsAttached() { return device_ != nullptr; } void IpuBackend::SaveMoldeProto(const std::string& path) {
if (ipu_strategy_->is_training && is_prepared_) {
// num_ipus must be pow(2,n); executor_->SaveModelToHost(path);
int IpuBackend::UpperIpuNum() { } else if (is_compiled_) {
PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0, compiler_->SaveModelProtoNoCheck(path);
platform::errors::Unavailable( } else {
"The ipu num get is wrong, please make sure the " LOG(WARNING) << "Model is empty";
"sharding or pipline parameter is right."));
int i = 0;
while (std::pow(2, i) < ipu_strategy_->num_ipus) {
i++;
} }
return std::pow(2, i);
} }
} // namespace ipu } // namespace ipu
......
...@@ -14,88 +14,86 @@ limitations under the License. */ ...@@ -14,88 +14,86 @@ limitations under the License. */
#pragma once #pragma once
#include <cmath>
#include <popart/devicemanager.hpp> #include <popart/devicemanager.hpp>
#include <popart/names.hpp> #include <popart/names.hpp>
#include <popart/tensorinfo.hpp>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_executor.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/ipu/device.h" #include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/platform/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/ipu/ipu_executor.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
// IpuBackend is the center of paddle-ipu, its function include:
// 1. Compile paddle model to popart model
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
class IpuBackend { class IpuBackend {
// IpuBackend is the center of paddle-ipu, its function include: public:
// 1. Compile paddle model to popart model static IpuBackend *GetInstance();
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
public: public:
IpuBackend(); IpuBackend();
~IpuBackend(); ~IpuBackend();
void Clear();
// return if exsits, else create and return
static std::shared_ptr<IpuBackend> GetInstance();
// always return a new instance_
static std::shared_ptr<IpuBackend> GetNewInstance();
// what compile does include(call compiler_): // what compile does include(call compiler_):
// 1. map paddle-op -> poart op // 1. map paddle-op -> poart op
// 2. construct popart onnx compute graph // 2. construct popart onnx compute graph
void Compile(framework::ir::Graph *graph, void Compile(Graph *graph, const std::vector<std::string> &feed_list,
const std::vector<std::string> &feed_list,
const std::vector<std::string> &fetch_list); const std::vector<std::string> &fetch_list);
// what run does include: // what run does include:
// 1. construct forward onnx graph // 1. construct forward onnx graph
// 2. graph-level optimization // 2. graph-level optimization
// 3. autodiff // 3. autodiff
void Run(const std::vector<const framework::Tensor *> &inputs, void Run(const std::vector<const Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs, const std::vector<Tensor *> &outputs,
const framework::ExecutionContext &ctx); const framework::ExecutionContext &ctx);
Executor &GetExecutor() { return *executor_; } // detach IPU manually
void Detach();
// reset manually
// call it before destruct works
void Reset();
void SetScope(const framework::Scope &scope); void SetScope(const Scope &scope);
const framework::Scope *GetScope() { return scope_; } const Scope *GetScope() { return scope_; }
void SetIpuStrategy(const IpuStrategy &strategy); void SetIpuStrategy(const IpuStrategy &strategy);
const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; } const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
// Device // save compiled model to onnx
size_t GetNumDevices(); void SaveMoldeProto(const std::string &path);
std::vector<int> GetDeviceIds();
Device GetDevice(int id);
void AttachDevice(int id);
bool DeviceIsAttached();
private: private:
int UpperIpuNum();
void Prepare(); void Prepare();
private: private:
std::shared_ptr<Compiler> compiler_; std::unique_ptr<Compiler> compiler_;
std::unique_ptr<Executor> executor_; std::unique_ptr<Executor> executor_;
std::shared_ptr<popart::DeviceInfo> device_; bool is_compiled_ = false;
bool is_prepared_ = false; bool is_prepared_ = false;
// not own // not own
const framework::Scope *scope_ = nullptr; const Scope *scope_ = nullptr;
const IpuStrategy *ipu_strategy_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr;
private: private:
static std::shared_ptr<IpuBackend> instance_; // time record for IpuBackend::Run
std::unique_ptr<platform::Timer> timer_;
DISABLE_COPY_AND_ASSIGN(IpuBackend);
}; };
} // namespace ipu } // namespace ipu
......
...@@ -16,76 +16,119 @@ ...@@ -16,76 +16,119 @@
#include <popart/builder.hpp> #include <popart/builder.hpp>
#include <popart/graphtransformer.hpp> #include <popart/graphtransformer.hpp>
#include <popart/optimizer.hpp>
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/ipu/common.h" #include "paddle/fluid/platform/device/ipu/ipu_names.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
struct CompilerResources {
// popart input tensor_ids
std::vector<popart::TensorId> inputs;
// popart output tensor_ids
std::vector<popart::TensorId> outputs;
// <paddle_var_name, popart_tensor_ids>
std::map<std::string, popart::TensorId> tensors;
// popart_weight_ids
std::vector<popart::TensorId> weights;
// popart loss tensor_id
popart::TensorId loss_var;
// paddle lr var_name
std::string lr_var;
// lr value
float lr;
// flag for lr is constant or scheduling
bool with_lr_sched = false;
// paddle optimizer type, eg: momentum, lamb
std::string optimizer_type;
using OptimizerFn =
std::function<std::unique_ptr<popart::Optimizer>(float lr)>;
OptimizerFn optimizer_fn;
public:
popart::Optimizer *Optimizer() { return optimizer.get(); }
popart::Optimizer *NewOptimizer() {
optimizer = optimizer_fn(lr);
return optimizer.get();
}
popart::Optimizer *UpdateOptimizer(float lr_new) {
optimizer = optimizer_fn(lr_new);
return optimizer.get();
}
private:
std::unique_ptr<popart::Optimizer> optimizer;
};
class Compiler { class Compiler {
public: public:
Compiler(); Compiler();
~Compiler(); ~Compiler();
void RegisterOpFunc(); void RegisterOpFunc();
void LowerBody(const framework::ir::Graph *graph); void Prepare();
void InitInputs(framework::ir::Graph *graph, void LowerBody(const Graph *graph);
const std::vector<std::string> &feed_list); void InitInputs(Graph *graph, const std::vector<std::string> &feed_list);
void InitOutputs(const std::vector<std::string> &fetch_list); void InitOutputs(const std::vector<std::string> &fetch_list);
void LowerWeights(const framework::ir::Graph *graph, void LowerConstants(const Graph *graph, const Scope *scope);
const framework::Scope *scope_); void LowerWeights(const Graph *graph, const Scope *scope);
void LowerOptimier(const Graph *graph, const Scope *scope);
void InsertTensors(const std::vector<std::string> &output_names, void InsertTensors(const std::vector<std::string> &output_names,
const std::vector<std::string> &tensor_ids); const std::vector<std::string> &tensor_ids);
void InsertTensors(const std::vector<std::string> &output_names, void InsertTensors(const std::vector<std::string> &output_names,
const std::string &tensor_id); const std::string &tensor_id);
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids, void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const framework::OpDesc *op_desc); const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
const framework::OpDesc *op_desc); void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
std::vector<popart::TensorId> GetInputs() { return inputs_; } void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
std::vector<popart::TensorId> GetOutputs() { return outputs_; } void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
std::map<std::string, popart::TensorId> GetTensors() { return tensors_; } const OpDesc *op_desc);
std::vector<popart::TensorId> &GetWeights(); void SetSerializeAttributes(const std::string &tensor_id,
const OpDesc *op_desc);
std::string GetModelProto();
void SetIpuStrategy(const IpuStrategy &strategy) { void SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy; ipu_strategy_ = &strategy;
}; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
CompilerResources *GetResources() { return resources_.get(); }
std::string GetModelProto();
std::string GetFP16ModelProto();
void SaveModelProto(const std::string &path); void SaveModelProto(const std::string &path);
void SaveModelProtoNoCheck(const std::string &path); void SaveModelProtoNoCheck(const std::string &path);
void ConvertProtoToFp16();
private: private:
std::vector<std::string> GetOpInputs(const framework::OpDesc *op); std::vector<std::string> GetOpInputs(const OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const framework::OpDesc *op); const std::vector<std::string> &GetOpOutputs(const OpDesc *op);
popart::DebugContext BuildDebugContext(const framework::OpDesc *op); popart::DebugContext BuildDebugContext(const OpDesc *op);
private: private:
std::unique_ptr<popart::Builder> builder_; std::unique_ptr<popart::Builder> builder_;
std::unique_ptr<CompilerResources> resources_;
using OpFunc = std::function<void(framework::OpDesc *op_desc)>; using OpFunc = std::function<void(OpDesc *op_desc)>;
std::unordered_map<std::string, OpFunc> name_function_; std::unordered_map<std::string, OpFunc> name_function_;
// stateful variable
std::map<std::string, popart::TensorId> tensors_;
// feed_list_ & fetch_list save paddle tensor id // feed_list_ & fetch_list save paddle tensor id
std::vector<std::string> feed_list_; std::vector<std::string> feed_list_;
std::vector<std::string> fetch_list_; std::vector<std::string> fetch_list_;
// inputs_ & outputs_ save popart tensor id
std::vector<popart::TensorId> inputs_;
std::vector<popart::TensorId> outputs_;
// weights info map
std::vector<popart::TensorId> weights_;
std::string converted_proto_ = "";
const IpuStrategy *ipu_strategy_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr;
std::map<std::string, IpuCustomOpIdentifier> custom_ops_;
}; };
} // namespace ipu } // namespace ipu
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -12,26 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,26 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/device.h" #include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
Device::Device(const popart::DeviceInfo& device_info) int GetNumDevices() {
: id_(device_info.getId()), is_attached_(device_info.isAttached()) { bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
popart::DeviceType popart_device_type = device_info.getType(); if (ipu_model) {
switch (popart_device_type) { return 1;
case popart::DeviceType::IpuModel:
device_type_ = DeviceType::IpuModel;
break;
case popart::DeviceType::Ipu:
device_type_ = DeviceType::Ipu;
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"popart::DeviceType:Unsupported type %d", popart_device_type));
} }
int num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(num_devices, 0, platform::errors::Unavailable(
"Do not found any IPU devices, please "
"make sure Poplar sdk is enabled"));
return num_devices;
}
std::vector<int> GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
} }
} // namespace ipu } // namespace ipu
......
...@@ -21,23 +21,11 @@ namespace paddle { ...@@ -21,23 +21,11 @@ namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
enum class DeviceType { IpuModel = 0, Cpu, Ipu, OfflineIpu, Sim }; // get the number of all avaliable IPUs
int GetNumDevices();
class Device {
public: // get the device id of all avaliable IPUs
Device() {} std::vector<int> GetDeviceIds();
explicit Device(const popart::DeviceInfo& device_info);
int getId() const { return id_; }
bool isAttached() const { return is_attached_; }
DeviceType getType() const { return device_type_; }
private:
int id_;
bool is_attached_;
DeviceType device_type_;
/* TODO:: Add more elements in the future */
};
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
......
...@@ -10,23 +10,18 @@ See the License for the specific language governing permissions and ...@@ -10,23 +10,18 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h" #include "paddle/fluid/platform/device/ipu/ipu_info.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h" #include "paddle/fluid/platform/device/ipu/ipu_device.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
//! Get a list of device ids from environment variable or use all. //! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() { std::vector<int> GetSelectedIPUDevices() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend = return platform::ipu::GetDeviceIds();
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetDeviceIds();
} }
//! Get the total number of IPU devices in system. //! Get the total number of IPU devices in system.
int GetIPUDeviceCount() { int GetIPUDeviceCount() { return platform::ipu::GetNumDevices(); }
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetNumDevices();
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -17,8 +17,10 @@ limitations under the License. */ ...@@ -17,8 +17,10 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
std::vector<int> GetSelectedIPUDevices(); std::vector<int> GetSelectedIPUDevices();
int GetIPUDeviceCount(); int GetIPUDeviceCount();
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
#endif #endif
...@@ -22,6 +22,8 @@ namespace ipu { ...@@ -22,6 +22,8 @@ namespace ipu {
static constexpr const char *sIpuIndexAttr = "ipu_index"; static constexpr const char *sIpuIndexAttr = "ipu_index";
static constexpr const char *sIpuStageAttr = "ipu_stage"; static constexpr const char *sIpuStageAttr = "ipu_stage";
static constexpr const char *sMatmulSerializeFactor = "serialize_factor";
static constexpr const char *sMatmulSerializeMode = "serialize_mode";
static constexpr const char *sOpIdentifyIdAttr = "op_identify_id"; static constexpr const char *sOpIdentifyIdAttr = "op_identify_id";
static constexpr const char *sDebugInfoId = "__debug_info_id"; static constexpr const char *sDebugInfoId = "__debug_info_id";
...@@ -29,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1"; ...@@ -29,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1";
static constexpr const char *sBeta2 = "beta2"; static constexpr const char *sBeta2 = "beta2";
static constexpr const char *sBeta1Pow = "Beta1Pow"; static constexpr const char *sBeta1Pow = "Beta1Pow";
static constexpr const char *sBeta2Pow = "Beta2Pow"; static constexpr const char *sBeta2Pow = "Beta2Pow";
static constexpr const char *sLossScaling = "LossScaling";
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
......
...@@ -12,10 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,10 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include <glog/logging.h>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu {} // namespace ipu namespace ipu {
void IpuStrategy::enablePattern(const std::string& t) {
VLOG(10) << "enable popart pattern: " << t;
popart_patterns.enablePattern(t, true);
}
void IpuStrategy::disablePattern(const std::string& t) {
VLOG(10) << "disable popart pattern: " << t;
popart_patterns.enablePattern(t, false);
}
const bool IpuStrategy::isPatternEnabled(const std::string& t) {
return popart_patterns.isPatternEnabled(t);
}
} // namespace ipu
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -14,24 +14,86 @@ limitations under the License. */ ...@@ -14,24 +14,86 @@ limitations under the License. */
#pragma once #pragma once
#include <popart/op.hpp>
#include <popart/sessionoptions.hpp> #include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp>
#include "popart/patterns/patterns.hpp"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
using VirtualGraphMode = popart::VirtualGraphMode; using VirtualGraphMode = popart::VirtualGraphMode;
using RecomputationType = popart::RecomputationType;
struct IpuStrategy { struct IpuStrategy {
IpuStrategy() {
// we always save optimizer state to OffChip and enable rts for saving
// memory
auto storage = popart::TensorLocation(popart::TensorStorage::OffChip,
popart::ReplicatedTensorSharding::On);
popart_options.optimizerStateTensorLocationSettings =
popart::TensorLocationSettings(storage);
// We divide the accumulationFactor and replicatedGraphCount after all
// reduce
popart_options.accumulationAndReplicationReductionType =
popart::ReductionType::Mean;
popart_options.meanAccumulationAndReplicationReductionStrategy =
popart::MeanReductionStrategy::Post;
popart_options.enableFloatingPointChecks = false;
// A directory for log traces to be written into.
popart_options.logDir = "popart_log";
}
~IpuStrategy() {}
// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1; int num_ipus = 1;
// batches per step
int batches_per_step = 1; int batches_per_step = 1;
int batch_size = 1;
// micro batch-size
int micro_batch_size = 1;
// training flag, true for training
bool is_training = true; bool is_training = true;
// save the onnx model lowered by paddle program description
bool save_init_onnx = false; bool save_init_onnx = false;
bool save_last_onnx = true;
popart::SessionOptions popart_options_; // save the trained model
bool save_onnx_checkpoint = false;
// save paddle model per n steps
int save_per_n_step = 1;
// average sharding, debugging used
bool need_avg_shard = false; bool need_avg_shard = false;
// flag for fp16, true for pure fp16
bool enable_fp16 = false; bool enable_fp16 = false;
// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;
// loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f;
// defaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f;
// popart session option
popart::SessionOptions popart_options;
popart::Patterns popart_patterns;
public:
void enablePattern(const std::string& t);
void disablePattern(const std::string& t);
const bool isPatternEnabled(const std::string& t);
}; };
} // namespace ipu } // namespace ipu
......
...@@ -12,22 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,22 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_utils.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include <cmath>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
void* PaddleIArray::data() { return tensor_->data(); } void* PaddleIArray::data() { return tensor_.data(); }
popart::DataType PaddleIArray::dataType() const { popart::DataType PaddleIArray::dataType() const {
return VarType2PopartType(tensor_->type()); return VarType2PopartType(tensor_.type());
} }
std::size_t PaddleIArray::rank() const { return tensor_->dims().size(); } std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); }
int64_t PaddleIArray::dim(size_t index) const { int64_t PaddleIArray::dim(size_t index) const {
return tensor_->dims().at(index); return tensor_.dims().at(index);
} }
std::size_t PaddleIArray::nelms() const { std::size_t PaddleIArray::nelms() const {
...@@ -150,6 +151,32 @@ bool GetBoolEnv(std::string str) { ...@@ -150,6 +151,32 @@ bool GetBoolEnv(std::string str) {
} }
} }
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type) {
// format: {popart_tensor_id, paddle_tensor_id}, ...
std::vector<std::pair<std::string, std::string>> pre_post_fix;
if (opt_type == "adam" || opt_type == "lamb") {
pre_post_fix.push_back(std::make_pair("", ""));
pre_post_fix.push_back(std::make_pair("Accl1___", "_moment1_0"));
pre_post_fix.push_back(std::make_pair("Accl2___", "_moment2_0"));
pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
} else if (opt_type == "sgd" || opt_type == "momentum") {
// sgd
pre_post_fix.push_back(std::make_pair("", ""));
} else {
pre_post_fix.push_back(std::make_pair("", ""));
//
}
return pre_post_fix;
}
int RequestIpus(const int num_ipus) {
// num_ipus must be pow(2, n);
return std::pow(2, ceil(log2(num_ipus)));
}
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -17,14 +17,27 @@ limitations under the License. */ ...@@ -17,14 +17,27 @@ limitations under the License. */
#include <popart/ndarraywrapper.hpp> #include <popart/ndarraywrapper.hpp>
#include <popart/tensordata.hpp> #include <popart/tensordata.hpp>
#include <popart/tensorinfo.hpp> #include <popart/tensorinfo.hpp>
#include <popart/vendored/any.hpp>
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
using float16 = platform::float16;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using Scope = framework::Scope;
using OpDesc = framework::OpDesc;
using Graph = framework::ir::Graph;
using Node = framework::ir::Node;
using BlockDesc = framework::BlockDesc;
// onnx dtype // onnx dtype
// https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3 // https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3
enum ONNXDataType : int { enum ONNXDataType : int {
...@@ -49,14 +62,15 @@ enum ONNXDataType : int { ...@@ -49,14 +62,15 @@ enum ONNXDataType : int {
class PaddleIArray final : public popart::IArray { class PaddleIArray final : public popart::IArray {
public: public:
explicit PaddleIArray(framework::Tensor *tensor) : tensor_(tensor) { explicit PaddleIArray(const Tensor* tensor) {
tensor_.ShareDataWith(*tensor);
for (int i = 0; i < tensor->dims().size(); ++i) { for (int i = 0; i < tensor->dims().size(); ++i) {
shape_.push_back(tensor->dims().at(i)); shape_.push_back(tensor->dims().at(i));
} }
} }
public: public:
void *data(); void* data();
popart::DataType dataType() const; popart::DataType dataType() const;
std::size_t rank() const; std::size_t rank() const;
int64_t dim(size_t index) const; int64_t dim(size_t index) const;
...@@ -64,7 +78,7 @@ class PaddleIArray final : public popart::IArray { ...@@ -64,7 +78,7 @@ class PaddleIArray final : public popart::IArray {
const popart::Shape shape() const; const popart::Shape shape() const;
private: private:
framework::Tensor *tensor_; Tensor tensor_;
std::vector<int64_t> shape_; std::vector<int64_t> shape_;
}; };
...@@ -74,8 +88,7 @@ popart::DataType OnnxDtype2PopartType(const int type); ...@@ -74,8 +88,7 @@ popart::DataType OnnxDtype2PopartType(const int type);
bool GetBoolEnv(std::string str); bool GetBoolEnv(std::string str);
template <typename T> template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray( std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) {
const framework::Tensor &tensor) {
auto dtype = VarType2PopartType(tensor.type()); auto dtype = VarType2PopartType(tensor.type());
auto shape = std::vector<int64_t>(); auto shape = std::vector<int64_t>();
for (size_t i = 0; i < tensor.dims().size(); ++i) { for (size_t i = 0; i < tensor.dims().size(); ++i) {
...@@ -84,18 +97,140 @@ std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray( ...@@ -84,18 +97,140 @@ std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(
popart::TensorInfo tensor_info(dtype, shape); popart::TensorInfo tensor_info(dtype, shape);
return std::make_unique<popart::NDArrayWrapper<T>>( return std::make_unique<popart::NDArrayWrapper<T>>(
reinterpret_cast<T *>(tensor.data()), tensor_info); reinterpret_cast<T*>(tensor.data()), tensor_info);
} }
template <typename T> template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> LoDTensor2IArray( std::unique_ptr<popart::NDArrayWrapper<T>> LoDTensor2IArray(
framework::LoDTensor const &lod_tensor) { LoDTensor const& lod_tensor) {
PADDLE_ENFORCE_EQ( if (lod_tensor.lod().size() == 0) {
lod_tensor.lod().size(), 0UL,
platform::errors::InvalidArgument("LoDTensor2IArray is Unimplemented"));
return Tensor2IArray<T>(lod_tensor); return Tensor2IArray<T>(lod_tensor);
} else {
PADDLE_THROW(
platform::errors::Unimplemented("LoDTensor2IArray is Unimplemented"));
}
}
template <typename T>
T GetSingleVarFromScope(const Scope* scope, const std::string& var_name) {
auto var = scope->GetVar(var_name);
auto tensor = var->Get<framework::LoDTensor>();
// check dtype is ?
return tensor.data<T>()[0];
} }
struct CustomOpAttrVisitor : public boost::static_visitor<void> {
explicit CustomOpAttrVisitor(std::map<std::string, popart::any>* attr,
const std::string& attr_name)
: attrs_(attr), attr_name_(attr_name) {}
mutable std::map<std::string, popart::any>* attrs_;
std::string attr_name_;
void operator()(int v) const { attrs_->emplace(attr_name_, v); }
void operator()(float v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::string& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<int>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<float>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<std::string>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(bool v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<bool>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(BlockDesc* desc) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(const std::vector<BlockDesc*>& v) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(int64_t v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<int64_t>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<double>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(boost::blank) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `boost::blank` type."));
}
};
struct IpuCustomOpIdentifier {
IpuCustomOpIdentifier(const std::string& _paddle_op,
const std::string& _popart_op,
const std::string& _domain, unsigned int _version)
: paddle_op(_paddle_op), popart_op(_domain, _popart_op, _version) {}
std::string repr() {
std::ostringstream os;
os << "paddle_op: " << paddle_op << ", domain: " << popart_op.domain
<< ", type: " << popart_op.type << ", version: " << popart_op.version;
return os.str();
}
std::string paddle_op;
popart::OperatorIdentifier popart_op;
};
struct ConstantOpAttrVisitor : public boost::static_visitor<void> {
explicit ConstantOpAttrVisitor(framework::LoDTensor* tensor,
framework::proto::VarType::Type dtype)
: tensor_(tensor), dtype_(dtype) {}
framework::LoDTensor* tensor_;
framework::proto::VarType::Type dtype_;
void operator()(const std::vector<int>& vec) const {
framework::TensorFromVector<int>(vec, tensor_);
}
void operator()(const std::vector<float>& vec) const {
if (dtype_ == framework::proto::VarType::FP16) {
std::vector<float16> vec_fp16;
std::transform(vec.begin(), vec.end(), std::back_inserter(vec_fp16),
[](float f) -> float16 { return float16(f); });
framework::TensorFromVector<float16>(vec_fp16, tensor_);
} else {
framework::TensorFromVector<float>(vec, tensor_);
}
}
void operator()(const std::vector<bool>& vec) const {
framework::TensorFromVector<bool>(vec, tensor_);
}
void operator()(const std::vector<int64_t>& vec) const {
framework::TensorFromVector<int64_t>(vec, tensor_);
}
void operator()(const std::vector<double>& vec) const {
framework::TensorFromVector<double>(vec, tensor_);
}
void RaiseError() const {
PADDLE_THROW(
platform::errors::InvalidArgument("Constant value must be a vector"));
}
void operator()(int v) const { RaiseError(); }
void operator()(float v) const { RaiseError(); }
void operator()(const std::string& v) const { RaiseError(); }
void operator()(const std::vector<std::string>& v) const { RaiseError(); }
void operator()(bool v) const { RaiseError(); }
void operator()(BlockDesc* desc) const { RaiseError(); }
void operator()(const std::vector<BlockDesc*>& v) const { RaiseError(); }
void operator()(int64_t v) const { RaiseError(); }
void operator()(boost::blank) const { RaiseError(); }
};
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type);
int RequestIpus(const int num_ipus);
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// clang-format off
#pragma once
OP_DECL(popart_nllloss_v2, aiGraphcoreOpset.nllloss, SIG_ARG(INT32,popart::ReductionType,reduction) OPT_ARG(INT32,ignoreIndex) ARG(BOOL,inputIsLogProbability) ) // NOLINT
// clang-format on
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册