未验证 提交 b2aee3e3 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] update ipu_backend p0 (#38854)

* update ipu_backend

* sync with paddle internal
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NAllen Guo <alleng@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>

* apply comments 01

* update error messag

* restore ipu_executor and ipu_optimizer

* add clang-format on
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>
上级 b4cb3589
IF(WITH_IPU)
FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc)
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce)
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto)
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils)
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper)
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper)
list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC})
set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
set(IPU_BACKEND_SRC
"ipu_device.cc"
"ipu_strategy.cc"
"ipu_executor.cc"
"ipu_compiler.cc"
"ipu_backend.cc"
"ipu_utils.cc"
)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph framework_proto enforce graph_helper timer)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend)
cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart)
add_dependencies(paddle_ipu ipu_backend)
ENDIF()
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#include "paddle/fluid/platform/ipu/ipu_utils.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
......@@ -24,170 +24,92 @@ namespace paddle {
namespace platform {
namespace ipu {
std::shared_ptr<IpuBackend> IpuBackend::instance_ = nullptr;
IpuBackend* IpuBackend::GetInstance() {
static IpuBackend instance;
return &instance;
}
IpuBackend::IpuBackend() {
compiler_ = std::make_shared<Compiler>();
compiler_ = std::make_unique<Compiler>();
executor_ = std::make_unique<Executor>();
}
void IpuBackend::Clear() {
IpuBackend::~IpuBackend() {
compiler_.reset();
executor_.reset();
// detach device
if (device_ != nullptr && device_->isAttached()) {
device_->detach();
device_.reset();
device_ = nullptr;
}
}
IpuBackend::~IpuBackend() { Clear(); }
std::shared_ptr<IpuBackend> IpuBackend::GetInstance() {
if (!instance_) {
instance_.reset(new IpuBackend());
}
return instance_;
}
// This api should only call from python, always return a new object
std::shared_ptr<IpuBackend> IpuBackend::GetNewInstance() {
instance_.reset(new IpuBackend());
return instance_;
}
void IpuBackend::Compile(framework::ir::Graph* graph,
void IpuBackend::Compile(Graph* graph,
const std::vector<std::string>& feed_list,
const std::vector<std::string>& fetch_list) {
VLOG(10) << "enter IpuBackend::Compile";
compiler_->Prepare();
executor_->SetCompilerResources(compiler_->GetResources());
compiler_->InitInputs(graph, feed_list);
compiler_->LowerConstants(graph, scope_);
compiler_->LowerWeights(graph, scope_);
compiler_->LowerBody(graph);
compiler_->InitOutputs(fetch_list);
executor_->SetWeights(compiler_->GetWeights());
if (ipu_strategy_->is_training) {
compiler_->LowerOptimier(graph, scope_);
}
is_compiled_ = true;
// when call compile, means a new graph
is_prepared_ = false;
VLOG(10) << "leave IpuBackend::Compile";
}
void IpuBackend::Run(const std::vector<const framework::Tensor*>& inputs,
const std::vector<framework::Tensor*>& outputs,
void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs,
const framework::ExecutionContext& ctx) {
Prepare();
auto inputs_id = compiler_->GetInputs();
auto outputs_id = compiler_->GetOutputs();
executor_->Run(inputs_id, inputs, outputs_id, outputs, ctx);
timer_->Start();
executor_->Run(inputs, outputs, ctx);
timer_->Pause();
VLOG(10) << "[IPU Run]: " << timer_->ElapsedMS() << " (ms)";
}
void IpuBackend::Prepare() {
if (is_prepared_) {
return;
} else {
if (!is_prepared_) {
executor_->Prepare(compiler_->GetModelProto());
timer_.reset(new platform::Timer());
is_prepared_ = true;
}
// convert Model to fp16
if (ipu_strategy_->enable_fp16) {
compiler_->ConvertProtoToFp16();
}
auto proto = compiler_->GetModelProto();
auto tensors = compiler_->GetTensors();
auto outputs = compiler_->GetOutputs();
executor_->Prepare(proto, tensors, outputs, device_);
}
void IpuBackend::SetScope(const framework::Scope& scope) {
void IpuBackend::Detach() { executor_->Detach(); }
void IpuBackend::Reset() {
executor_->Detach();
compiler_.reset();
executor_.reset();
}
void IpuBackend::SetScope(const Scope& scope) {
scope_ = &scope;
executor_->SetScope(&scope);
}
void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) {
ipu_strategy_ = &strategy;
executor_->SetIpuStrategy(strategy);
compiler_->SetIpuStrategy(strategy);
executor_->SetIpuStrategy(strategy);
}
size_t IpuBackend::GetNumDevices() {
// IpuModel
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) return 1;
// Real dev
size_t num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(
num_devices, 0,
platform::errors::Unavailable(
"Do not found any IPU devices, please make "
"sure Poplar sdk is enabled or enable ENV \"POPLAR_IPUMODEL=1\""));
return num_devices;
}
std::vector<int> IpuBackend::GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
}
Device IpuBackend::GetDevice(int id) {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
std::map<std::string, std::string> deviceOpts{{"numIPUs", "1 "}};
device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
deviceOpts);
Device device(*device_.get());
return device;
}
size_t num_devices = GetNumDevices();
if (id < 0 || id >= num_devices) {
PADDLE_THROW(platform::errors::InvalidArgument(
"device id %d is invalid, number devices is %d", id, num_devices));
}
std::shared_ptr<popart::DeviceInfo> popart_device_info =
popart::DeviceManager::createDeviceManager().getDevice(
popart::SyncPattern::Full, id);
Device device(*popart_device_info.get());
return device;
}
void IpuBackend::AttachDevice(int id) {
// trick here
// Compiler ipu is not same as the runtime ipu.
VLOG(10) << "comile ipu id = " << id;
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return;
}
device_ = popart::DeviceManager::createDeviceManager().acquireAvailableDevice(
UpperIpuNum());
PADDLE_ENFORCE_NOT_NULL(
device_, platform::errors::Unavailable("Can't attach IPU, ipu_num = %d.",
UpperIpuNum()));
void IpuBackend::SetCustomOps(
const std::vector<IpuCustomOpIdentifier>& custom_ops) {
compiler_->SetCustomOps(custom_ops);
}
bool IpuBackend::DeviceIsAttached() { return device_ != nullptr; }
// num_ipus must be pow(2,n);
int IpuBackend::UpperIpuNum() {
PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0,
platform::errors::Unavailable(
"The ipu num get is wrong, please make sure the "
"sharding or pipline parameter is right."));
int i = 0;
while (std::pow(2, i) < ipu_strategy_->num_ipus) {
i++;
void IpuBackend::SaveMoldeProto(const std::string& path) {
if (ipu_strategy_->is_training && is_prepared_) {
executor_->SaveModelToHost(path);
} else if (is_compiled_) {
compiler_->SaveModelProtoNoCheck(path);
} else {
LOG(WARNING) << "Model is empty";
}
return std::pow(2, i);
}
} // namespace ipu
......
......@@ -14,88 +14,86 @@ limitations under the License. */
#pragma once
#include <cmath>
#include <popart/devicemanager.hpp>
#include <popart/names.hpp>
#include <popart/tensorinfo.hpp>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_executor.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/ipu/device.h"
#include "paddle/fluid/platform/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/ipu/ipu_executor.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/timer.h"
namespace paddle {
namespace platform {
namespace ipu {
// IpuBackend is the center of paddle-ipu, its function include:
// 1. Compile paddle model to popart model
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
class IpuBackend {
// IpuBackend is the center of paddle-ipu, its function include:
// 1. Compile paddle model to popart model
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
public:
static IpuBackend *GetInstance();
public:
IpuBackend();
~IpuBackend();
void Clear();
// return if exsits, else create and return
static std::shared_ptr<IpuBackend> GetInstance();
// always return a new instance_
static std::shared_ptr<IpuBackend> GetNewInstance();
// what compile does include(call compiler_):
// 1. map paddle-op -> poart op
// 2. construct popart onnx compute graph
void Compile(framework::ir::Graph *graph,
const std::vector<std::string> &feed_list,
void Compile(Graph *graph, const std::vector<std::string> &feed_list,
const std::vector<std::string> &fetch_list);
// what run does include:
// 1. construct forward onnx graph
// 2. graph-level optimization
// 3. autodiff
void Run(const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs,
void Run(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs,
const framework::ExecutionContext &ctx);
Executor &GetExecutor() { return *executor_; }
// detach IPU manually
void Detach();
// reset manually
// call it before destruct works
void Reset();
void SetScope(const framework::Scope &scope);
const framework::Scope *GetScope() { return scope_; }
void SetScope(const Scope &scope);
const Scope *GetScope() { return scope_; }
void SetIpuStrategy(const IpuStrategy &strategy);
const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
// Device
size_t GetNumDevices();
std::vector<int> GetDeviceIds();
Device GetDevice(int id);
void AttachDevice(int id);
bool DeviceIsAttached();
// save compiled model to onnx
void SaveMoldeProto(const std::string &path);
private:
int UpperIpuNum();
void Prepare();
private:
std::shared_ptr<Compiler> compiler_;
std::unique_ptr<Compiler> compiler_;
std::unique_ptr<Executor> executor_;
std::shared_ptr<popart::DeviceInfo> device_;
bool is_compiled_ = false;
bool is_prepared_ = false;
// not own
const framework::Scope *scope_ = nullptr;
const Scope *scope_ = nullptr;
const IpuStrategy *ipu_strategy_ = nullptr;
private:
static std::shared_ptr<IpuBackend> instance_;
// time record for IpuBackend::Run
std::unique_ptr<platform::Timer> timer_;
DISABLE_COPY_AND_ASSIGN(IpuBackend);
};
} // namespace ipu
......
......@@ -16,76 +16,119 @@
#include <popart/builder.hpp>
#include <popart/graphtransformer.hpp>
#include <popart/optimizer.hpp>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/ipu/common.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_names.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
struct CompilerResources {
// popart input tensor_ids
std::vector<popart::TensorId> inputs;
// popart output tensor_ids
std::vector<popart::TensorId> outputs;
// <paddle_var_name, popart_tensor_ids>
std::map<std::string, popart::TensorId> tensors;
// popart_weight_ids
std::vector<popart::TensorId> weights;
// popart loss tensor_id
popart::TensorId loss_var;
// paddle lr var_name
std::string lr_var;
// lr value
float lr;
// flag for lr is constant or scheduling
bool with_lr_sched = false;
// paddle optimizer type, eg: momentum, lamb
std::string optimizer_type;
using OptimizerFn =
std::function<std::unique_ptr<popart::Optimizer>(float lr)>;
OptimizerFn optimizer_fn;
public:
popart::Optimizer *Optimizer() { return optimizer.get(); }
popart::Optimizer *NewOptimizer() {
optimizer = optimizer_fn(lr);
return optimizer.get();
}
popart::Optimizer *UpdateOptimizer(float lr_new) {
optimizer = optimizer_fn(lr_new);
return optimizer.get();
}
private:
std::unique_ptr<popart::Optimizer> optimizer;
};
class Compiler {
public:
Compiler();
~Compiler();
void RegisterOpFunc();
void LowerBody(const framework::ir::Graph *graph);
void InitInputs(framework::ir::Graph *graph,
const std::vector<std::string> &feed_list);
void Prepare();
void LowerBody(const Graph *graph);
void InitInputs(Graph *graph, const std::vector<std::string> &feed_list);
void InitOutputs(const std::vector<std::string> &fetch_list);
void LowerWeights(const framework::ir::Graph *graph,
const framework::Scope *scope_);
void LowerConstants(const Graph *graph, const Scope *scope);
void LowerWeights(const Graph *graph, const Scope *scope);
void LowerOptimier(const Graph *graph, const Scope *scope);
void InsertTensors(const std::vector<std::string> &output_names,
const std::vector<std::string> &tensor_ids);
void InsertTensors(const std::vector<std::string> &output_names,
const std::string &tensor_id);
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const framework::OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id,
const framework::OpDesc *op_desc);
std::vector<popart::TensorId> GetInputs() { return inputs_; }
std::vector<popart::TensorId> GetOutputs() { return outputs_; }
std::map<std::string, popart::TensorId> GetTensors() { return tensors_; }
std::vector<popart::TensorId> &GetWeights();
const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetSerializeAttributes(const std::string &tensor_id,
const OpDesc *op_desc);
std::string GetModelProto();
void SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy;
};
}
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
CompilerResources *GetResources() { return resources_.get(); }
std::string GetModelProto();
std::string GetFP16ModelProto();
void SaveModelProto(const std::string &path);
void SaveModelProtoNoCheck(const std::string &path);
void ConvertProtoToFp16();
private:
std::vector<std::string> GetOpInputs(const framework::OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const framework::OpDesc *op);
popart::DebugContext BuildDebugContext(const framework::OpDesc *op);
std::vector<std::string> GetOpInputs(const OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const OpDesc *op);
popart::DebugContext BuildDebugContext(const OpDesc *op);
private:
std::unique_ptr<popart::Builder> builder_;
std::unique_ptr<CompilerResources> resources_;
using OpFunc = std::function<void(framework::OpDesc *op_desc)>;
using OpFunc = std::function<void(OpDesc *op_desc)>;
std::unordered_map<std::string, OpFunc> name_function_;
// stateful variable
std::map<std::string, popart::TensorId> tensors_;
// feed_list_ & fetch_list save paddle tensor id
std::vector<std::string> feed_list_;
std::vector<std::string> fetch_list_;
// inputs_ & outputs_ save popart tensor id
std::vector<popart::TensorId> inputs_;
std::vector<popart::TensorId> outputs_;
// weights info map
std::vector<popart::TensorId> weights_;
std::string converted_proto_ = "";
const IpuStrategy *ipu_strategy_ = nullptr;
std::map<std::string, IpuCustomOpIdentifier> custom_ops_;
};
} // namespace ipu
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -12,26 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/ipu/device.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
Device::Device(const popart::DeviceInfo& device_info)
: id_(device_info.getId()), is_attached_(device_info.isAttached()) {
popart::DeviceType popart_device_type = device_info.getType();
switch (popart_device_type) {
case popart::DeviceType::IpuModel:
device_type_ = DeviceType::IpuModel;
break;
case popart::DeviceType::Ipu:
device_type_ = DeviceType::Ipu;
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"popart::DeviceType:Unsupported type %d", popart_device_type));
int GetNumDevices() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return 1;
}
int num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(num_devices, 0, platform::errors::Unavailable(
"Do not found any IPU devices, please "
"make sure Poplar sdk is enabled"));
return num_devices;
}
std::vector<int> GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
}
} // namespace ipu
......
......@@ -21,23 +21,11 @@ namespace paddle {
namespace platform {
namespace ipu {
enum class DeviceType { IpuModel = 0, Cpu, Ipu, OfflineIpu, Sim };
class Device {
public:
Device() {}
explicit Device(const popart::DeviceInfo& device_info);
int getId() const { return id_; }
bool isAttached() const { return is_attached_; }
DeviceType getType() const { return device_type_; }
private:
int id_;
bool is_attached_;
DeviceType device_type_;
/* TODO:: Add more elements in the future */
};
// get the number of all avaliable IPUs
int GetNumDevices();
// get the device id of all avaliable IPUs
std::vector<int> GetDeviceIds();
} // namespace ipu
} // namespace platform
......
......@@ -10,23 +10,18 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
namespace paddle {
namespace platform {
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetDeviceIds();
return platform::ipu::GetDeviceIds();
}
//! Get the total number of IPU devices in system.
int GetIPUDeviceCount() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetNumDevices();
}
int GetIPUDeviceCount() { return platform::ipu::GetNumDevices(); }
} // namespace platform
} // namespace paddle
......@@ -17,8 +17,10 @@ limitations under the License. */
namespace paddle {
namespace platform {
std::vector<int> GetSelectedIPUDevices();
int GetIPUDeviceCount();
} // namespace platform
} // namespace paddle
#endif
......@@ -22,6 +22,8 @@ namespace ipu {
static constexpr const char *sIpuIndexAttr = "ipu_index";
static constexpr const char *sIpuStageAttr = "ipu_stage";
static constexpr const char *sMatmulSerializeFactor = "serialize_factor";
static constexpr const char *sMatmulSerializeMode = "serialize_mode";
static constexpr const char *sOpIdentifyIdAttr = "op_identify_id";
static constexpr const char *sDebugInfoId = "__debug_info_id";
......@@ -29,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1";
static constexpr const char *sBeta2 = "beta2";
static constexpr const char *sBeta1Pow = "Beta1Pow";
static constexpr const char *sBeta2Pow = "Beta2Pow";
static constexpr const char *sLossScaling = "LossScaling";
} // namespace ipu
} // namespace platform
......
......@@ -12,10 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include <glog/logging.h>
namespace paddle {
namespace platform {
namespace ipu {} // namespace ipu
namespace ipu {
void IpuStrategy::enablePattern(const std::string& t) {
VLOG(10) << "enable popart pattern: " << t;
popart_patterns.enablePattern(t, true);
}
void IpuStrategy::disablePattern(const std::string& t) {
VLOG(10) << "disable popart pattern: " << t;
popart_patterns.enablePattern(t, false);
}
const bool IpuStrategy::isPatternEnabled(const std::string& t) {
return popart_patterns.isPatternEnabled(t);
}
} // namespace ipu
} // namespace platform
} // namespace paddle
......@@ -14,24 +14,86 @@ limitations under the License. */
#pragma once
#include <popart/op.hpp>
#include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp>
#include "popart/patterns/patterns.hpp"
namespace paddle {
namespace platform {
namespace ipu {
using VirtualGraphMode = popart::VirtualGraphMode;
using RecomputationType = popart::RecomputationType;
struct IpuStrategy {
IpuStrategy() {
// we always save optimizer state to OffChip and enable rts for saving
// memory
auto storage = popart::TensorLocation(popart::TensorStorage::OffChip,
popart::ReplicatedTensorSharding::On);
popart_options.optimizerStateTensorLocationSettings =
popart::TensorLocationSettings(storage);
// We divide the accumulationFactor and replicatedGraphCount after all
// reduce
popart_options.accumulationAndReplicationReductionType =
popart::ReductionType::Mean;
popart_options.meanAccumulationAndReplicationReductionStrategy =
popart::MeanReductionStrategy::Post;
popart_options.enableFloatingPointChecks = false;
// A directory for log traces to be written into.
popart_options.logDir = "popart_log";
}
~IpuStrategy() {}
// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1;
// batches per step
int batches_per_step = 1;
int batch_size = 1;
// micro batch-size
int micro_batch_size = 1;
// training flag, true for training
bool is_training = true;
// save the onnx model lowered by paddle program description
bool save_init_onnx = false;
bool save_last_onnx = true;
popart::SessionOptions popart_options_;
// save the trained model
bool save_onnx_checkpoint = false;
// save paddle model per n steps
int save_per_n_step = 1;
// average sharding, debugging used
bool need_avg_shard = false;
// flag for fp16, true for pure fp16
bool enable_fp16 = false;
// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;
// loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f;
// defaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f;
// popart session option
popart::SessionOptions popart_options;
popart::Patterns popart_patterns;
public:
void enablePattern(const std::string& t);
void disablePattern(const std::string& t);
const bool isPatternEnabled(const std::string& t);
};
} // namespace ipu
......
......@@ -12,22 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_utils.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include <cmath>
namespace paddle {
namespace platform {
namespace ipu {
void* PaddleIArray::data() { return tensor_->data(); }
void* PaddleIArray::data() { return tensor_.data(); }
popart::DataType PaddleIArray::dataType() const {
return VarType2PopartType(tensor_->type());
return VarType2PopartType(tensor_.type());
}
std::size_t PaddleIArray::rank() const { return tensor_->dims().size(); }
std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); }
int64_t PaddleIArray::dim(size_t index) const {
return tensor_->dims().at(index);
return tensor_.dims().at(index);
}
std::size_t PaddleIArray::nelms() const {
......@@ -150,6 +151,32 @@ bool GetBoolEnv(std::string str) {
}
}
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type) {
// format: {popart_tensor_id, paddle_tensor_id}, ...
std::vector<std::pair<std::string, std::string>> pre_post_fix;
if (opt_type == "adam" || opt_type == "lamb") {
pre_post_fix.push_back(std::make_pair("", ""));
pre_post_fix.push_back(std::make_pair("Accl1___", "_moment1_0"));
pre_post_fix.push_back(std::make_pair("Accl2___", "_moment2_0"));
pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
} else if (opt_type == "sgd" || opt_type == "momentum") {
// sgd
pre_post_fix.push_back(std::make_pair("", ""));
} else {
pre_post_fix.push_back(std::make_pair("", ""));
//
}
return pre_post_fix;
}
int RequestIpus(const int num_ipus) {
// num_ipus must be pow(2, n);
return std::pow(2, ceil(log2(num_ipus)));
}
} // namespace ipu
} // namespace platform
} // namespace paddle
......@@ -17,14 +17,27 @@ limitations under the License. */
#include <popart/ndarraywrapper.hpp>
#include <popart/tensordata.hpp>
#include <popart/tensorinfo.hpp>
#include <popart/vendored/any.hpp>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace platform {
namespace ipu {
using float16 = platform::float16;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using Scope = framework::Scope;
using OpDesc = framework::OpDesc;
using Graph = framework::ir::Graph;
using Node = framework::ir::Node;
using BlockDesc = framework::BlockDesc;
// onnx dtype
// https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3
enum ONNXDataType : int {
......@@ -49,14 +62,15 @@ enum ONNXDataType : int {
class PaddleIArray final : public popart::IArray {
public:
explicit PaddleIArray(framework::Tensor *tensor) : tensor_(tensor) {
explicit PaddleIArray(const Tensor* tensor) {
tensor_.ShareDataWith(*tensor);
for (int i = 0; i < tensor->dims().size(); ++i) {
shape_.push_back(tensor->dims().at(i));
}
}
public:
void *data();
void* data();
popart::DataType dataType() const;
std::size_t rank() const;
int64_t dim(size_t index) const;
......@@ -64,7 +78,7 @@ class PaddleIArray final : public popart::IArray {
const popart::Shape shape() const;
private:
framework::Tensor *tensor_;
Tensor tensor_;
std::vector<int64_t> shape_;
};
......@@ -74,8 +88,7 @@ popart::DataType OnnxDtype2PopartType(const int type);
bool GetBoolEnv(std::string str);
template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(
const framework::Tensor &tensor) {
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) {
auto dtype = VarType2PopartType(tensor.type());
auto shape = std::vector<int64_t>();
for (size_t i = 0; i < tensor.dims().size(); ++i) {
......@@ -84,18 +97,140 @@ std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(
popart::TensorInfo tensor_info(dtype, shape);
return std::make_unique<popart::NDArrayWrapper<T>>(
reinterpret_cast<T *>(tensor.data()), tensor_info);
reinterpret_cast<T*>(tensor.data()), tensor_info);
}
template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> LoDTensor2IArray(
framework::LoDTensor const &lod_tensor) {
PADDLE_ENFORCE_EQ(
lod_tensor.lod().size(), 0UL,
platform::errors::InvalidArgument("LoDTensor2IArray is Unimplemented"));
LoDTensor const& lod_tensor) {
if (lod_tensor.lod().size() == 0) {
return Tensor2IArray<T>(lod_tensor);
} else {
PADDLE_THROW(
platform::errors::Unimplemented("LoDTensor2IArray is Unimplemented"));
}
}
template <typename T>
T GetSingleVarFromScope(const Scope* scope, const std::string& var_name) {
auto var = scope->GetVar(var_name);
auto tensor = var->Get<framework::LoDTensor>();
// check dtype is ?
return tensor.data<T>()[0];
}
struct CustomOpAttrVisitor : public boost::static_visitor<void> {
explicit CustomOpAttrVisitor(std::map<std::string, popart::any>* attr,
const std::string& attr_name)
: attrs_(attr), attr_name_(attr_name) {}
mutable std::map<std::string, popart::any>* attrs_;
std::string attr_name_;
void operator()(int v) const { attrs_->emplace(attr_name_, v); }
void operator()(float v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::string& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<int>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<float>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<std::string>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(bool v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<bool>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(BlockDesc* desc) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(const std::vector<BlockDesc*>& v) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(int64_t v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<int64_t>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<double>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(boost::blank) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `boost::blank` type."));
}
};
struct IpuCustomOpIdentifier {
IpuCustomOpIdentifier(const std::string& _paddle_op,
const std::string& _popart_op,
const std::string& _domain, unsigned int _version)
: paddle_op(_paddle_op), popart_op(_domain, _popart_op, _version) {}
std::string repr() {
std::ostringstream os;
os << "paddle_op: " << paddle_op << ", domain: " << popart_op.domain
<< ", type: " << popart_op.type << ", version: " << popart_op.version;
return os.str();
}
std::string paddle_op;
popart::OperatorIdentifier popart_op;
};
struct ConstantOpAttrVisitor : public boost::static_visitor<void> {
explicit ConstantOpAttrVisitor(framework::LoDTensor* tensor,
framework::proto::VarType::Type dtype)
: tensor_(tensor), dtype_(dtype) {}
framework::LoDTensor* tensor_;
framework::proto::VarType::Type dtype_;
void operator()(const std::vector<int>& vec) const {
framework::TensorFromVector<int>(vec, tensor_);
}
void operator()(const std::vector<float>& vec) const {
if (dtype_ == framework::proto::VarType::FP16) {
std::vector<float16> vec_fp16;
std::transform(vec.begin(), vec.end(), std::back_inserter(vec_fp16),
[](float f) -> float16 { return float16(f); });
framework::TensorFromVector<float16>(vec_fp16, tensor_);
} else {
framework::TensorFromVector<float>(vec, tensor_);
}
}
void operator()(const std::vector<bool>& vec) const {
framework::TensorFromVector<bool>(vec, tensor_);
}
void operator()(const std::vector<int64_t>& vec) const {
framework::TensorFromVector<int64_t>(vec, tensor_);
}
void operator()(const std::vector<double>& vec) const {
framework::TensorFromVector<double>(vec, tensor_);
}
void RaiseError() const {
PADDLE_THROW(
platform::errors::InvalidArgument("Constant value must be a vector"));
}
void operator()(int v) const { RaiseError(); }
void operator()(float v) const { RaiseError(); }
void operator()(const std::string& v) const { RaiseError(); }
void operator()(const std::vector<std::string>& v) const { RaiseError(); }
void operator()(bool v) const { RaiseError(); }
void operator()(BlockDesc* desc) const { RaiseError(); }
void operator()(const std::vector<BlockDesc*>& v) const { RaiseError(); }
void operator()(int64_t v) const { RaiseError(); }
void operator()(boost::blank) const { RaiseError(); }
};
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type);
int RequestIpus(const int num_ipus);
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// clang-format off
#pragma once
OP_DECL(popart_nllloss_v2, aiGraphcoreOpset.nllloss, SIG_ARG(INT32,popart::ReductionType,reduction) OPT_ARG(INT32,ignoreIndex) ARG(BOOL,inputIsLogProbability) ) // NOLINT
// clang-format on
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册