未验证 提交 b2aee3e3 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] update ipu_backend p0 (#38854)

* update ipu_backend

* sync with paddle internal
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NAllen Guo <alleng@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>

* apply comments 01

* update error messag

* restore ipu_executor and ipu_optimizer

* add clang-format on
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>
上级 b4cb3589
IF(WITH_IPU)
FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc)
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce)
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto)
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils)
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper)
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper)
list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC})
set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
set(IPU_BACKEND_SRC
"ipu_device.cc"
"ipu_strategy.cc"
"ipu_executor.cc"
"ipu_compiler.cc"
"ipu_backend.cc"
"ipu_utils.cc"
)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph framework_proto enforce graph_helper timer)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend)
cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart)
add_dependencies(paddle_ipu ipu_backend)
ENDIF()
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#include "paddle/fluid/platform/ipu/ipu_utils.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
......@@ -24,170 +24,92 @@ namespace paddle {
namespace platform {
namespace ipu {
std::shared_ptr<IpuBackend> IpuBackend::instance_ = nullptr;
IpuBackend* IpuBackend::GetInstance() {
static IpuBackend instance;
return &instance;
}
IpuBackend::IpuBackend() {
compiler_ = std::make_shared<Compiler>();
compiler_ = std::make_unique<Compiler>();
executor_ = std::make_unique<Executor>();
}
void IpuBackend::Clear() {
IpuBackend::~IpuBackend() {
compiler_.reset();
executor_.reset();
// detach device
if (device_ != nullptr && device_->isAttached()) {
device_->detach();
device_.reset();
device_ = nullptr;
}
}
IpuBackend::~IpuBackend() { Clear(); }
std::shared_ptr<IpuBackend> IpuBackend::GetInstance() {
if (!instance_) {
instance_.reset(new IpuBackend());
}
return instance_;
}
// This api should only call from python, always return a new object
std::shared_ptr<IpuBackend> IpuBackend::GetNewInstance() {
instance_.reset(new IpuBackend());
return instance_;
}
void IpuBackend::Compile(framework::ir::Graph* graph,
void IpuBackend::Compile(Graph* graph,
const std::vector<std::string>& feed_list,
const std::vector<std::string>& fetch_list) {
VLOG(10) << "enter IpuBackend::Compile";
compiler_->Prepare();
executor_->SetCompilerResources(compiler_->GetResources());
compiler_->InitInputs(graph, feed_list);
compiler_->LowerConstants(graph, scope_);
compiler_->LowerWeights(graph, scope_);
compiler_->LowerBody(graph);
compiler_->InitOutputs(fetch_list);
executor_->SetWeights(compiler_->GetWeights());
if (ipu_strategy_->is_training) {
compiler_->LowerOptimier(graph, scope_);
}
is_compiled_ = true;
// when call compile, means a new graph
is_prepared_ = false;
VLOG(10) << "leave IpuBackend::Compile";
}
void IpuBackend::Run(const std::vector<const framework::Tensor*>& inputs,
const std::vector<framework::Tensor*>& outputs,
void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs,
const framework::ExecutionContext& ctx) {
Prepare();
auto inputs_id = compiler_->GetInputs();
auto outputs_id = compiler_->GetOutputs();
executor_->Run(inputs_id, inputs, outputs_id, outputs, ctx);
timer_->Start();
executor_->Run(inputs, outputs, ctx);
timer_->Pause();
VLOG(10) << "[IPU Run]: " << timer_->ElapsedMS() << " (ms)";
}
void IpuBackend::Prepare() {
if (is_prepared_) {
return;
} else {
if (!is_prepared_) {
executor_->Prepare(compiler_->GetModelProto());
timer_.reset(new platform::Timer());
is_prepared_ = true;
}
// convert Model to fp16
if (ipu_strategy_->enable_fp16) {
compiler_->ConvertProtoToFp16();
}
auto proto = compiler_->GetModelProto();
auto tensors = compiler_->GetTensors();
auto outputs = compiler_->GetOutputs();
executor_->Prepare(proto, tensors, outputs, device_);
}
void IpuBackend::SetScope(const framework::Scope& scope) {
void IpuBackend::Detach() { executor_->Detach(); }
void IpuBackend::Reset() {
executor_->Detach();
compiler_.reset();
executor_.reset();
}
void IpuBackend::SetScope(const Scope& scope) {
scope_ = &scope;
executor_->SetScope(&scope);
}
void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) {
ipu_strategy_ = &strategy;
executor_->SetIpuStrategy(strategy);
compiler_->SetIpuStrategy(strategy);
executor_->SetIpuStrategy(strategy);
}
size_t IpuBackend::GetNumDevices() {
// IpuModel
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) return 1;
// Real dev
size_t num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(
num_devices, 0,
platform::errors::Unavailable(
"Do not found any IPU devices, please make "
"sure Poplar sdk is enabled or enable ENV \"POPLAR_IPUMODEL=1\""));
return num_devices;
}
std::vector<int> IpuBackend::GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
}
Device IpuBackend::GetDevice(int id) {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
std::map<std::string, std::string> deviceOpts{{"numIPUs", "1 "}};
device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
deviceOpts);
Device device(*device_.get());
return device;
}
size_t num_devices = GetNumDevices();
if (id < 0 || id >= num_devices) {
PADDLE_THROW(platform::errors::InvalidArgument(
"device id %d is invalid, number devices is %d", id, num_devices));
}
std::shared_ptr<popart::DeviceInfo> popart_device_info =
popart::DeviceManager::createDeviceManager().getDevice(
popart::SyncPattern::Full, id);
Device device(*popart_device_info.get());
return device;
}
void IpuBackend::AttachDevice(int id) {
// trick here
// Compiler ipu is not same as the runtime ipu.
VLOG(10) << "comile ipu id = " << id;
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return;
}
device_ = popart::DeviceManager::createDeviceManager().acquireAvailableDevice(
UpperIpuNum());
PADDLE_ENFORCE_NOT_NULL(
device_, platform::errors::Unavailable("Can't attach IPU, ipu_num = %d.",
UpperIpuNum()));
void IpuBackend::SetCustomOps(
const std::vector<IpuCustomOpIdentifier>& custom_ops) {
compiler_->SetCustomOps(custom_ops);
}
bool IpuBackend::DeviceIsAttached() { return device_ != nullptr; }
// num_ipus must be pow(2,n);
int IpuBackend::UpperIpuNum() {
PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0,
platform::errors::Unavailable(
"The ipu num get is wrong, please make sure the "
"sharding or pipline parameter is right."));
int i = 0;
while (std::pow(2, i) < ipu_strategy_->num_ipus) {
i++;
void IpuBackend::SaveMoldeProto(const std::string& path) {
if (ipu_strategy_->is_training && is_prepared_) {
executor_->SaveModelToHost(path);
} else if (is_compiled_) {
compiler_->SaveModelProtoNoCheck(path);
} else {
LOG(WARNING) << "Model is empty";
}
return std::pow(2, i);
}
} // namespace ipu
......
......@@ -14,88 +14,86 @@ limitations under the License. */
#pragma once
#include <cmath>
#include <popart/devicemanager.hpp>
#include <popart/names.hpp>
#include <popart/tensorinfo.hpp>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_executor.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/ipu/device.h"
#include "paddle/fluid/platform/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/ipu/ipu_executor.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/timer.h"
namespace paddle {
namespace platform {
namespace ipu {
// IpuBackend is the center of paddle-ipu, its function include:
// 1. Compile paddle model to popart model
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
class IpuBackend {
// IpuBackend is the center of paddle-ipu, its function include:
// 1. Compile paddle model to popart model
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
public:
static IpuBackend *GetInstance();
public:
IpuBackend();
~IpuBackend();
void Clear();
// return if exsits, else create and return
static std::shared_ptr<IpuBackend> GetInstance();
// always return a new instance_
static std::shared_ptr<IpuBackend> GetNewInstance();
// what compile does include(call compiler_):
// 1. map paddle-op -> poart op
// 2. construct popart onnx compute graph
void Compile(framework::ir::Graph *graph,
const std::vector<std::string> &feed_list,
void Compile(Graph *graph, const std::vector<std::string> &feed_list,
const std::vector<std::string> &fetch_list);
// what run does include:
// 1. construct forward onnx graph
// 2. graph-level optimization
// 3. autodiff
void Run(const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs,
void Run(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs,
const framework::ExecutionContext &ctx);
Executor &GetExecutor() { return *executor_; }
// detach IPU manually
void Detach();
// reset manually
// call it before destruct works
void Reset();
void SetScope(const framework::Scope &scope);
const framework::Scope *GetScope() { return scope_; }
void SetScope(const Scope &scope);
const Scope *GetScope() { return scope_; }
void SetIpuStrategy(const IpuStrategy &strategy);
const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
// Device
size_t GetNumDevices();
std::vector<int> GetDeviceIds();
Device GetDevice(int id);
void AttachDevice(int id);
bool DeviceIsAttached();
// save compiled model to onnx
void SaveMoldeProto(const std::string &path);
private:
int UpperIpuNum();
void Prepare();
private:
std::shared_ptr<Compiler> compiler_;
std::unique_ptr<Compiler> compiler_;
std::unique_ptr<Executor> executor_;
std::shared_ptr<popart::DeviceInfo> device_;
bool is_compiled_ = false;
bool is_prepared_ = false;
// not own
const framework::Scope *scope_ = nullptr;
const Scope *scope_ = nullptr;
const IpuStrategy *ipu_strategy_ = nullptr;
private:
static std::shared_ptr<IpuBackend> instance_;
// time record for IpuBackend::Run
std::unique_ptr<platform::Timer> timer_;
DISABLE_COPY_AND_ASSIGN(IpuBackend);
};
} // namespace ipu
......
......@@ -12,17 +12,66 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/device/ipu/ipu_compiler.h"
#include <popart/adam.hpp>
#include <popart/adaptive.hpp>
#include <popart/optimizer.hpp>
#include <popart/sgd.hpp>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/platform/ipu/ipu_utils.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
popart::AdamMode AdamModeFromStr(const std::string& str) {
if (str == "adam") {
return popart::AdamMode::Adam;
} else if (str == "adamax") {
return popart::AdamMode::AdaMax;
} else if (str == "lamb") {
return popart::AdamMode::Lamb;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Uknown AdamMode: %s, AdamMode must be one of these values: adam, "
"adamax or lamb",
str));
}
}
popart::AdaptiveMode AdaptiveModeFromStr(const std::string& str) {
if (str == "adadelta") {
return popart::AdaptiveMode::AdaDelta;
} else if (str == "adagrad") {
return popart::AdaptiveMode::AdaGrad;
} else if (str == "rmsprop") {
return popart::AdaptiveMode::RMSProp;
} else if (str == "centered_rmsprop") {
return popart::AdaptiveMode::CenteredRMSProp;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Uknown AdaptiveMode: %s, AdaptiveMode must be one of these values: "
"adadelta, adagrad, rmsprop or centered_rmsprop",
str));
}
}
popart::WeightDecayMode WeightDecayModeFromStr(const std::string& str) {
if (str == "decay") {
return popart::WeightDecayMode::Decay;
} else if (str == "l2_regularization") {
return popart::WeightDecayMode::L2Regularization;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Uknown WeightDecayMode: %s, WeightDecayMode must be decay or "
"l2_regularization",
str));
}
}
template <typename T>
T GetAttrAllowNull(std::string attr, framework::OpDesc* op_desc) {
T GetAttrAllowNull(std::string attr, OpDesc* op_desc) {
if (op_desc->HasAttr(attr)) {
return BOOST_GET_CONST(T, op_desc->GetAttr(attr));
} else {
......@@ -31,8 +80,7 @@ T GetAttrAllowNull(std::string attr, framework::OpDesc* op_desc) {
}
template <typename T>
nonstd::optional<T> GetOptAttrAllowNull(std::string attr,
framework::OpDesc* op_desc) {
nonstd::optional<T> GetOptAttrAllowNull(std::string attr, OpDesc* op_desc) {
if (op_desc->HasAttr(attr)) {
return BOOST_GET_CONST(T, op_desc->GetAttr(attr));
} else {
......@@ -40,19 +88,36 @@ nonstd::optional<T> GetOptAttrAllowNull(std::string attr,
}
}
Compiler::Compiler() {
builder_ = popart::Builder::create();
RegisterOpFunc();
template <typename TI, typename TO>
TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) {
if (op_desc->HasAttr(attr)) {
auto x = BOOST_GET_CONST(TI, op_desc->GetAttr(attr));
return static_cast<TO>(x);
} else {
return {};
}
}
Compiler::Compiler() { RegisterOpFunc(); }
Compiler::~Compiler() {
builder_.reset();
resources_.reset();
}
Compiler::~Compiler() {}
void Compiler::Prepare() {
builder_ = popart::Builder::create();
resources_ = std::make_unique<CompilerResources>();
}
void Compiler::RegisterOpFunc() {
VLOG(10) << "enter Compiler::RegisterOpFunc";
#define INT_VEC std::vector<std::int64_t>
#define INT32_VEC std::vector<std::int32_t>
#define FLOAT_VEC std::vector<float>
#define FLOAT float
#define INT std::int64_t
#define INT32 std::int32_t
#define BOOL bool
#define STRING std::string
#define STRING_VEC std::vector<std::string*>
......@@ -60,6 +125,7 @@ void Compiler::RegisterOpFunc() {
#define ARG(Type, Name) , GetAttrAllowNull<Type>(#Name, op_desc)
#define OPT_ARG(Type, Name) , GetOptAttrAllowNull<Type>(#Name, op_desc)
#define SIG_ARG(TI, TO, Name) , GetCastSigAttrAllowNull<TI, TO>(#Name, op_desc)
#define POPART_CONST_ARG(Name) , const PopartConstant& Name
#define HOST_SIDE_CONST_ARG(Name) , const HostSideConstant& Name
#define POPART_ATTRIB_VEC_ARG(Name)
......@@ -67,7 +133,7 @@ void Compiler::RegisterOpFunc() {
name_function_ = {
#define OP_DECL(FuncName, OnnxImpl, Args) \
{#FuncName, [&](framework::OpDesc* op_desc) { \
{#FuncName, [&](OpDesc* op_desc) { \
auto op_type = op_desc->Type(); \
VLOG(10) << "build op:" << op_type << " args " << #Args; \
auto inputs = GetOpInputs(op_desc); \
......@@ -77,9 +143,12 @@ void Compiler::RegisterOpFunc() {
auto aiOnnxOpset = builder_->aiOnnxOpset11(); \
auto output_ids = OnnxImpl(inputs Args, debug_context); \
SetIpuIndexStage(output_ids, op_desc); \
SetAMPAttributes(output_ids, op_desc); \
SetSerializeAttributes(output_ids, op_desc); \
InsertTensors(output_names, output_ids); \
}}, // NOLINT
#include "paddle/fluid/platform/ipu/supported_ops_autogen.h"
#include "paddle/fluid/platform/device/ipu/supported_ops_autogen.h"
#include "paddle/fluid/platform/device/ipu/supported_ops_custom.h"
};
#undef OP_DECL
......@@ -87,146 +156,99 @@ void Compiler::RegisterOpFunc() {
#undef POPART_ATTRIB_VEC_ARG
#undef HOST_SIDE_CONST_ARG
#undef POPART_CONST_ARG
#undef SIG_ARG
#undef OPT_ARG
#undef ARG
#undef NONE
#undef STRING_VEC
#undef STRING
#undef BOOL
#undef INT32
#undef INT
#undef FLOAT
#undef FLOAT_VEC
#undef INT32_VEC
#undef INT_VEC
}
void Compiler::LowerBody(const framework::ir::Graph* graph) {
void Compiler::LowerBody(const Graph* graph) {
VLOG(10) << "enter Compiler::LowerBody";
auto nodes = framework::ir::TopologySortOperations(*graph);
for (auto* node : nodes) {
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
VLOG(10) << "node->type: " << op_type;
VLOG(10) << "lowering op: " << op_type;
if (op_type == "popart_constant") {
auto dims =
BOOST_GET_CONST(std::vector<int64_t>, op_desc->GetAttr("dims"));
auto dtype_ = BOOST_GET_CONST(int, op_desc->GetAttr("dtype"));
auto dtype = OnnxDtype2PopartType(dtype_);
popart::TensorInfo tensor_info{dtype, dims};
auto value_attr = op_desc->GetAttr("value");
auto const_data = std::unique_ptr<popart::ConstVoidData>{};
switch (dtype) {
case popart::DataType::FLOAT:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<float>, value_attr).data(),
tensor_info));
break;
case popart::DataType::INT32:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<int>, value_attr).data(),
tensor_info));
break;
case popart::DataType::DOUBLE:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<double>, value_attr).data(),
tensor_info));
break;
case popart::DataType::INT64:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<int64_t>, value_attr).data(),
tensor_info));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"The popart datatype is not supported, popart::DataType is %d",
dtype));
}
popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
SetIpuIndexStage(result, op_desc);
InsertTensors(GetOpOutputs(op_desc), result);
} else if (op_type == "popart_batchnormalization") {
// pass
} else if (op_type == "popart_optimizer") {
// pass
} else if (op_type == "popart_checkpointoutput") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto num_outputs = outputs.size();
auto epsilon = BOOST_GET_CONST(float, op_desc->GetAttr("epsilon"));
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum"));
auto result = builder_->aiOnnxOpset11().batchnormalization(
inputs, num_outputs, epsilon, momentum);
SetIpuIndexStage(result, op_desc);
InsertTensors(GetOpOutputs(op_desc), result);
} else if (op_type == "popart_nllloss") {
auto output_ids = builder_->checkpointOutput(inputs);
InsertTensors(outputs, output_ids);
} else if (op_type == "popart_custom_op") {
auto inputs = GetOpInputs(op_desc);
auto ignoreIndex = BOOST_GET_CONST(int, op_desc->GetAttr("ignoreIndex"));
auto result = builder_->aiGraphcoreOpset1().nllloss(
inputs, popart::ReductionType::NoReduction, ignoreIndex);
SetIpuIndexStage(result, op_desc);
InsertTensors(GetOpOutputs(op_desc), result);
} else if (op_type == "popart_topk") {
auto outputs = GetOpOutputs(op_desc);
auto debug_context = BuildDebugContext(op_desc);
auto attributes = std::map<std::string, popart::any>{};
for (auto& attr : op_desc->GetAttrMap()) {
CustomOpAttrVisitor visitor(&attributes, attr.first);
boost::apply_visitor(visitor, attr.second);
}
auto __op_type =
BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
VLOG(10) << "Build graph from custom op: " << __op_type;
auto it = custom_ops_.find(__op_type);
auto output_ids =
builder_->customOp(it->second.popart_op, it->second.popart_op.version,
inputs, outputs.size(), attributes, debug_context);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else if (op_type == "popart_printtensor") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
int64_t axis = BOOST_GET_CONST(int64_t, op_desc->GetAttr("axis"));
int sorted_INT32 = BOOST_GET_CONST(int, op_desc->GetAttr("sorted"));
int64_t sorted = int64_t{sorted_INT32};
auto aiOnnxOpset = builder_->aiOnnxOpset11();
popart::ConvInputs result;
if (inputs.size() == 2) {
VLOG(10)
<< "[Compiler::LowerBody] size of inputs for <popart_topk> is 2";
result = aiOnnxOpset.topk(inputs, axis, sorted);
} else if (inputs.size() == 1) {
VLOG(10)
<< "[Compiler::LowerBody] size of inputs for <popart_topk> is 1";
int64_t k = BOOST_GET_CONST(int64_t, op_desc->GetAttr("k"));
popart::TensorInfo kShape{"INT64", std::vector<int64_t>{1}};
popart::ConstVoidData kData = {&k, kShape};
auto K_t = aiOnnxOpset.constant(kData);
result = aiOnnxOpset.topk({inputs[0], K_t}, axis, sorted);
}
result[1] = aiOnnxOpset.cast({result[1]}, "INT32");
SetIpuIndexStage(result, op_desc);
VLOG(10) << "[Compiler::LowerBody] output[1]: " << outputs[1];
VLOG(10) << "[Compiler::LowerBody] output[1]: "
<< GetOpOutputs(op_desc)[1] << " -> " << result[1];
tensors_.emplace(GetOpOutputs(op_desc)[1], result[1]); // topk indices
VLOG(10) << "[Compiler::LowerBody] output[0]: " << outputs[0];
VLOG(10) << "[Compiler::LowerBody] output[0]: "
<< GetOpOutputs(op_desc)[0] << " -> " << result[0];
tensors_.emplace(GetOpOutputs(op_desc)[0], result[0]); // topk values
auto debug_context = BuildDebugContext(op_desc);
auto print_gradient =
BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
inputs, print_gradient, debug_context, title);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else {
auto itr = name_function_.find(op_type);
if (itr != name_function_.end()) {
itr->second(node->Op());
} else {
PADDLE_THROW(platform::errors::NotFound(
"Op %s is not registered in popart canonicalization", op_type));
"%s is not registered, please check for unsupported operators for "
"running on IPU",
op_type));
}
}
}
VLOG(10) << "leave Compiler::LowerBody";
}
void Compiler::InitInputs(framework::ir::Graph* graph,
void Compiler::InitInputs(Graph* graph,
const std::vector<std::string>& feed_list) {
for (const auto& feed_name : feed_list) {
feed_list_.push_back(feed_name);
for (const framework::ir::Node* n : graph->Nodes()) {
for (const Node* n : graph->Nodes()) {
if (n->IsVar()) {
auto* var_desc = n->Var();
if (feed_name == var_desc->Name()) {
VLOG(10) << "feed_name= " << var_desc->Name();
auto data_type = VarType2PopartType(var_desc->GetDataType());
if (ipu_strategy_->enable_fp16) {
data_type = popart::DataType::FLOAT16;
}
popart::TensorInfo input_info{data_type, var_desc->GetShape()};
VLOG(10) << "popart input_info = " << input_info;
popart::TensorId tensor_id =
builder_->addInputTensor(input_info, feed_name);
VLOG(10) << "popart input tensor id = " << tensor_id;
inputs_.push_back(tensor_id);
tensors_.emplace(var_desc->Name(), tensor_id);
resources_->inputs.push_back(tensor_id);
resources_->tensors.emplace(var_desc->Name(), tensor_id);
}
}
}
......@@ -236,20 +258,58 @@ void Compiler::InitInputs(framework::ir::Graph* graph,
void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) {
for (const auto& fetch_name : fetch_list) {
fetch_list_.push_back(fetch_name);
auto tensor = tensors_.find(fetch_name);
PADDLE_ENFORCE_NE(tensor, tensors_.end(),
auto tensor = resources_->tensors.find(fetch_name);
PADDLE_ENFORCE_NE(
tensor, resources_->tensors.end(),
platform::errors::NotFound(
"output tensor %s does not exist.", fetch_name));
"Output tensor %s is not found, please check the model.",
fetch_name));
VLOG(10) << "fetch_name= " << fetch_name;
VLOG(10) << "popart output tensor id = " << tensor->second;
builder_->addOutputTensor(tensor->second);
outputs_.push_back(tensor->second);
resources_->outputs.push_back(tensor->second);
}
}
void Compiler::LowerWeights(const framework::ir::Graph* graph,
const framework::Scope* scope_) {
PADDLE_ENFORCE_NOT_NULL(scope_,
void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
auto& kid_scope = scope->NewScope();
VLOG(10) << "enter Compiler::LowerConstants";
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
}
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
if (op_type == "popart_constant") {
auto shape =
BOOST_GET_CONST(std::vector<int64_t>, op_desc->GetAttr("dims"));
auto dtype_ = BOOST_GET_CONST(int, op_desc->GetAttr("dtype"));
auto dtype = PopartType2VarType(OnnxDtype2PopartType(dtype_));
auto tensor_name = op_desc->Output("__outputs__")[0];
auto* var = kid_scope.Var(tensor_name);
VLOG(10) << "lowering constant: " << tensor_name;
auto* tensor = var->GetMutable<framework::LoDTensor>();
ConstantOpAttrVisitor visitor(tensor, dtype);
auto value = op_desc->GetAttr("value");
boost::apply_visitor(visitor, value);
auto ddim = framework::make_ddim(shape);
tensor->Resize(ddim);
auto const_data = std::unique_ptr<popart::ConstVoidData>();
popart::TensorInfo tensor_info(VarType2PopartType(tensor->type()), shape);
const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info));
popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
SetIpuIndexStage(result, op_desc);
resources_->tensors.emplace(tensor_name, result);
}
}
VLOG(10) << "leave Compiler::LowerConstants";
}
void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
VLOG(10) << "enter Compiler::LowerWeights";
PADDLE_ENFORCE_NOT_NULL(scope,
platform::errors::PreconditionNotMet(
"You should call set_scope before LowerWeights"));
// at this step, the graph doesn't contains optimizer related states
......@@ -257,12 +317,12 @@ void Compiler::LowerWeights(const framework::ir::Graph* graph,
if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
if (node->Var()->Persistable() && node->inputs.empty()) {
auto var_name = node->Var()->Name();
// workround: https://github.com/graphcore/Paddle/issues/151
if (tensors_.count(var_name) != 0) {
if (resources_->tensors.count(var_name) != 0) {
continue;
}
VLOG(10) << "lowering weight: " << var_name;
auto var = scope_->FindVar(var_name);
auto var = scope->FindVar(var_name);
if (var) {
auto tensor = var->Get<framework::LoDTensor>();
auto dtype = VarType2PopartType(tensor.type());
......@@ -274,12 +334,113 @@ void Compiler::LowerWeights(const framework::ir::Graph* graph,
popart::ConstVoidData const_data{tensor.data(), tensor_info};
popart::TensorId result =
builder_->addInitializedInputTensor(const_data, var_name);
tensors_.emplace(var_name, result);
weights_.push_back(result);
resources_->tensors.emplace(var_name, result);
resources_->weights.push_back(result);
}
}
}
}
VLOG(10) << "leave Compiler::LowerWeights";
}
void Compiler::LowerOptimier(const Graph* graph, const Scope* scope) {
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
}
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
if (op_type == "popart_optimizer") {
auto raw_type =
BOOST_GET_CONST(std::string, op_desc->GetAttr("raw_type"));
resources_->optimizer_type = raw_type;
auto loss_var =
BOOST_GET_CONST(std::string, op_desc->GetAttr("loss_var"));
resources_->loss_var = resources_->tensors[loss_var];
resources_->with_lr_sched =
BOOST_GET_CONST(bool, op_desc->GetAttr("with_lr_sched"));
if (op_desc->HasAttr("lr_var")) {
auto lr_var = BOOST_GET_CONST(std::string, op_desc->GetAttr("lr_var"));
resources_->lr_var = lr_var;
resources_->lr = GetSingleVarFromScope<float>(scope, lr_var);
} else {
// adadelta has no lr
resources_->lr = 0.01f;
resources_->with_lr_sched = false;
}
VLOG(10) << "Set initial lr: " << resources_->lr;
auto loss_scaling = ipu_strategy_->loss_scaling;
auto type = BOOST_GET_CONST(std::string, op_desc->GetAttr("type"));
if (type == "sgd") {
auto weight_decay =
BOOST_GET_CONST(float, op_desc->GetAttr("weight_decay"));
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum"));
resources_->optimizer_fn = [=](float lr) {
return std::make_unique<popart::SGD>(
popart::OptimizerValue(lr, false),
popart::OptimizerValue(weight_decay, true),
popart::OptimizerValue(momentum, true),
popart::SGD::getUnsetDampening(),
popart::SGD::getUnsetVelocityScaling(),
popart::OptimizerValue(loss_scaling, true));
};
} else if (type == "adam") {
auto weight_decay =
BOOST_GET_CONST(float, op_desc->GetAttr("weight_decay"));
auto beta1 = BOOST_GET_CONST(float, op_desc->GetAttr("beta1"));
auto beta2 = BOOST_GET_CONST(float, op_desc->GetAttr("beta2"));
auto eps = BOOST_GET_CONST(float, op_desc->GetAttr("eps"));
auto mwn = ipu_strategy_->max_weight_norm;
VLOG(10) << "set max_weight_norm: " << mwn;
auto adam_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("adam_mode"));
auto adam_mode = AdamModeFromStr(adam_mode_);
auto weight_decay_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("weight_decay_mode"));
auto weight_decay_mode = WeightDecayModeFromStr(weight_decay_mode_);
resources_->optimizer_fn = [=](float lr) {
return std::make_unique<popart::Adam>(
popart::OptimizerValue(lr, false),
popart::OptimizerValue(weight_decay, true),
popart::OptimizerValue(beta1, true),
popart::OptimizerValue(beta2, true),
popart::OptimizerValue(eps, true),
popart::OptimizerValue(loss_scaling, true),
popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT);
};
} else if (type == "adaptive") {
auto alpha = BOOST_GET_CONST(float, op_desc->GetAttr("alpha"));
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum"));
auto eps = BOOST_GET_CONST(float, op_desc->GetAttr("eps"));
auto weight_decay =
BOOST_GET_CONST(float, op_desc->GetAttr("weight_decay"));
auto adaptive_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("adaptive_mode"));
auto adaptive_mode = AdaptiveModeFromStr(adaptive_mode_);
auto weight_decay_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("weight_decay_mode"));
auto weight_decay_mode = WeightDecayModeFromStr(weight_decay_mode_);
resources_->optimizer_fn = [=](float lr) {
return std::make_unique<popart::Adaptive>(
popart::OptimizerValue(lr, false),
popart::OptimizerValue(weight_decay, true),
popart::OptimizerValue(alpha, true),
popart::OptimizerValue(momentum, true),
popart::OptimizerValue(eps, true),
popart::OptimizerValue(loss_scaling, true), adaptive_mode,
weight_decay_mode, popart::DataType::UNDEFINED,
popart::DataType::FLOAT, popart::DataType::FLOAT,
popart::DataType::FLOAT);
};
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"optimizer %s is not implemented", type));
}
}
}
}
void Compiler::InsertTensors(const std::vector<std::string>& output_names,
......@@ -288,7 +449,7 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names,
platform::errors::Fatal("InsertTensors size mismatch"));
for (int i = 0; i < tensor_ids.size(); i++) {
std::string tensor_id = tensor_ids[i];
tensors_.emplace(output_names[i], tensor_ids[i]);
resources_->tensors.emplace(output_names[i], tensor_ids[i]);
}
}
......@@ -296,11 +457,11 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names,
const std::string& tensor_id) {
PADDLE_ENFORCE_EQ(output_names.size(), 1,
platform::errors::Fatal("InsertTensors size mismatch"));
tensors_.emplace(output_names[0], tensor_id);
resources_->tensors.emplace(output_names[0], tensor_id);
}
void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids,
const framework::OpDesc* op_desc) {
const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetIpuIndexStage";
auto tensor_ids_set =
std::set<std::string>(tensor_ids.begin(), tensor_ids.end());
......@@ -321,7 +482,7 @@ void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids,
}
void Compiler::SetIpuIndexStage(const std::string& tensor_id,
const framework::OpDesc* op_desc) {
const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetIpuIndexStage";
if (op_desc->HasAttr(sIpuIndexAttr)) {
......@@ -339,20 +500,73 @@ void Compiler::SetIpuIndexStage(const std::string& tensor_id,
VLOG(10) << "leave Compiler::SetIpuIndexStage";
}
std::vector<popart::TensorId>& Compiler::GetWeights() { return weights_; }
void Compiler::SetAMPAttributes(const std::vector<std::string>& tensor_ids,
const OpDesc* op_desc) {
if (op_desc->Type() == "popart_matmul") {
for (const auto& tensor_id : tensor_ids) {
SetAMPAttributes(tensor_id, op_desc);
}
}
}
void Compiler::SetAMPAttributes(const std::string& tensor_id,
const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetAMPAttributes";
if (op_desc->Type() == "popart_matmul") {
auto amp = ipu_strategy_->available_memory_proportion;
if (amp > 0.0f && amp <= 1.0) {
builder_->setAvailableMemoryProportion(tensor_id, amp);
}
}
VLOG(10) << "leave Compiler::SetAMPAttributes";
}
void Compiler::SetSerializeAttributes(
const std::vector<std::string>& tensor_ids, const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetSerializeAttributes";
auto tensor_ids_set =
std::set<std::string>(tensor_ids.begin(), tensor_ids.end());
// convertFloatsToHalfs
void Compiler::ConvertProtoToFp16() {
if (op_desc->Type() == "popart_matmul") {
if (op_desc->HasAttr(sMatmulSerializeFactor)) {
auto factor =
BOOST_GET_CONST(int, op_desc->GetAttr(sMatmulSerializeFactor));
std::string mode = "output_channels";
if (op_desc->HasAttr(sMatmulSerializeMode)) {
mode = BOOST_GET_CONST(std::string,
op_desc->GetAttr(sMatmulSerializeMode));
}
builder_->setSerializeMatMul(tensor_ids_set, mode, (int64_t)factor, true);
}
}
VLOG(10) << "leave Compiler::SetSerializeAttributes";
}
void Compiler::SetSerializeAttributes(const std::string& tensor_id,
const OpDesc* op_desc) {
std::vector<std::string> tensor_ids = {tensor_id};
SetSerializeAttributes(tensor_ids, op_desc);
}
void Compiler::SetCustomOps(
const std::vector<IpuCustomOpIdentifier>& custom_ops) {
for (auto x : custom_ops) {
custom_ops_.emplace(x.paddle_op, x);
}
}
std::string Compiler::GetFP16ModelProto() {
popart::GraphTransformer graph_transformer(builder_->getModelProto());
graph_transformer.convertFloatsToHalfs();
converted_proto_ = graph_transformer.getModelProto();
return graph_transformer.getModelProto();
}
std::string Compiler::GetModelProto() {
if (converted_proto_.length()) {
return converted_proto_;
}
if (ipu_strategy_->enable_fp16) {
return GetFP16ModelProto();
} else {
return builder_->getModelProto();
}
}
void Compiler::SaveModelProto(const std::string& path) {
......@@ -366,12 +580,12 @@ void Compiler::SaveModelProtoNoCheck(const std::string& path) {
onnxfile.close();
}
std::vector<std::string> Compiler::GetOpInputs(const framework::OpDesc* op) {
std::vector<std::string> Compiler::GetOpInputs(const OpDesc* op) {
auto ins = op->Input("__inputs__");
std::vector<std::string> inputs;
for (const auto& in : ins) {
if (tensors_.find(in) != tensors_.end()) {
inputs.push_back(tensors_[in]);
if (resources_->tensors.find(in) != resources_->tensors.end()) {
inputs.push_back(resources_->tensors[in]);
} else {
inputs.push_back(in);
}
......@@ -379,12 +593,11 @@ std::vector<std::string> Compiler::GetOpInputs(const framework::OpDesc* op) {
return inputs;
}
const std::vector<std::string>& Compiler::GetOpOutputs(
const framework::OpDesc* op) {
const std::vector<std::string>& Compiler::GetOpOutputs(const OpDesc* op) {
return op->Output("__outputs__");
}
popart::DebugContext Compiler::BuildDebugContext(const framework::OpDesc* op) {
popart::DebugContext Compiler::BuildDebugContext(const OpDesc* op) {
auto op_identify_id =
BOOST_GET_CONST(std::string, op->GetAttr(sOpIdentifyIdAttr));
VLOG(10) << "op_identify_id of op: " << op->Type() << " is "
......
......@@ -16,76 +16,119 @@
#include <popart/builder.hpp>
#include <popart/graphtransformer.hpp>
#include <popart/optimizer.hpp>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/ipu/common.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_names.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
struct CompilerResources {
// popart input tensor_ids
std::vector<popart::TensorId> inputs;
// popart output tensor_ids
std::vector<popart::TensorId> outputs;
// <paddle_var_name, popart_tensor_ids>
std::map<std::string, popart::TensorId> tensors;
// popart_weight_ids
std::vector<popart::TensorId> weights;
// popart loss tensor_id
popart::TensorId loss_var;
// paddle lr var_name
std::string lr_var;
// lr value
float lr;
// flag for lr is constant or scheduling
bool with_lr_sched = false;
// paddle optimizer type, eg: momentum, lamb
std::string optimizer_type;
using OptimizerFn =
std::function<std::unique_ptr<popart::Optimizer>(float lr)>;
OptimizerFn optimizer_fn;
public:
popart::Optimizer *Optimizer() { return optimizer.get(); }
popart::Optimizer *NewOptimizer() {
optimizer = optimizer_fn(lr);
return optimizer.get();
}
popart::Optimizer *UpdateOptimizer(float lr_new) {
optimizer = optimizer_fn(lr_new);
return optimizer.get();
}
private:
std::unique_ptr<popart::Optimizer> optimizer;
};
class Compiler {
public:
Compiler();
~Compiler();
void RegisterOpFunc();
void LowerBody(const framework::ir::Graph *graph);
void InitInputs(framework::ir::Graph *graph,
const std::vector<std::string> &feed_list);
void Prepare();
void LowerBody(const Graph *graph);
void InitInputs(Graph *graph, const std::vector<std::string> &feed_list);
void InitOutputs(const std::vector<std::string> &fetch_list);
void LowerWeights(const framework::ir::Graph *graph,
const framework::Scope *scope_);
void LowerConstants(const Graph *graph, const Scope *scope);
void LowerWeights(const Graph *graph, const Scope *scope);
void LowerOptimier(const Graph *graph, const Scope *scope);
void InsertTensors(const std::vector<std::string> &output_names,
const std::vector<std::string> &tensor_ids);
void InsertTensors(const std::vector<std::string> &output_names,
const std::string &tensor_id);
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const framework::OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id,
const framework::OpDesc *op_desc);
std::vector<popart::TensorId> GetInputs() { return inputs_; }
std::vector<popart::TensorId> GetOutputs() { return outputs_; }
std::map<std::string, popart::TensorId> GetTensors() { return tensors_; }
std::vector<popart::TensorId> &GetWeights();
const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetSerializeAttributes(const std::string &tensor_id,
const OpDesc *op_desc);
std::string GetModelProto();
void SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy;
};
}
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
CompilerResources *GetResources() { return resources_.get(); }
std::string GetModelProto();
std::string GetFP16ModelProto();
void SaveModelProto(const std::string &path);
void SaveModelProtoNoCheck(const std::string &path);
void ConvertProtoToFp16();
private:
std::vector<std::string> GetOpInputs(const framework::OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const framework::OpDesc *op);
popart::DebugContext BuildDebugContext(const framework::OpDesc *op);
std::vector<std::string> GetOpInputs(const OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const OpDesc *op);
popart::DebugContext BuildDebugContext(const OpDesc *op);
private:
std::unique_ptr<popart::Builder> builder_;
std::unique_ptr<CompilerResources> resources_;
using OpFunc = std::function<void(framework::OpDesc *op_desc)>;
using OpFunc = std::function<void(OpDesc *op_desc)>;
std::unordered_map<std::string, OpFunc> name_function_;
// stateful variable
std::map<std::string, popart::TensorId> tensors_;
// feed_list_ & fetch_list save paddle tensor id
std::vector<std::string> feed_list_;
std::vector<std::string> fetch_list_;
// inputs_ & outputs_ save popart tensor id
std::vector<popart::TensorId> inputs_;
std::vector<popart::TensorId> outputs_;
// weights info map
std::vector<popart::TensorId> weights_;
std::string converted_proto_ = "";
const IpuStrategy *ipu_strategy_ = nullptr;
std::map<std::string, IpuCustomOpIdentifier> custom_ops_;
};
} // namespace ipu
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -12,26 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/ipu/device.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle {
namespace platform {
namespace ipu {
Device::Device(const popart::DeviceInfo& device_info)
: id_(device_info.getId()), is_attached_(device_info.isAttached()) {
popart::DeviceType popart_device_type = device_info.getType();
switch (popart_device_type) {
case popart::DeviceType::IpuModel:
device_type_ = DeviceType::IpuModel;
break;
case popart::DeviceType::Ipu:
device_type_ = DeviceType::Ipu;
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"popart::DeviceType:Unsupported type %d", popart_device_type));
int GetNumDevices() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return 1;
}
int num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(num_devices, 0, platform::errors::Unavailable(
"Do not found any IPU devices, please "
"make sure Poplar sdk is enabled"));
return num_devices;
}
std::vector<int> GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
}
} // namespace ipu
......
......@@ -21,23 +21,11 @@ namespace paddle {
namespace platform {
namespace ipu {
enum class DeviceType { IpuModel = 0, Cpu, Ipu, OfflineIpu, Sim };
class Device {
public:
Device() {}
explicit Device(const popart::DeviceInfo& device_info);
int getId() const { return id_; }
bool isAttached() const { return is_attached_; }
DeviceType getType() const { return device_type_; }
private:
int id_;
bool is_attached_;
DeviceType device_type_;
/* TODO:: Add more elements in the future */
};
// get the number of all avaliable IPUs
int GetNumDevices();
// get the device id of all avaliable IPUs
std::vector<int> GetDeviceIds();
} // namespace ipu
} // namespace platform
......
......@@ -10,23 +10,18 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
namespace paddle {
namespace platform {
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetDeviceIds();
return platform::ipu::GetDeviceIds();
}
//! Get the total number of IPU devices in system.
int GetIPUDeviceCount() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetNumDevices();
}
int GetIPUDeviceCount() { return platform::ipu::GetNumDevices(); }
} // namespace platform
} // namespace paddle
......@@ -17,8 +17,10 @@ limitations under the License. */
namespace paddle {
namespace platform {
std::vector<int> GetSelectedIPUDevices();
int GetIPUDeviceCount();
} // namespace platform
} // namespace paddle
#endif
......@@ -22,6 +22,8 @@ namespace ipu {
static constexpr const char *sIpuIndexAttr = "ipu_index";
static constexpr const char *sIpuStageAttr = "ipu_stage";
static constexpr const char *sMatmulSerializeFactor = "serialize_factor";
static constexpr const char *sMatmulSerializeMode = "serialize_mode";
static constexpr const char *sOpIdentifyIdAttr = "op_identify_id";
static constexpr const char *sDebugInfoId = "__debug_info_id";
......@@ -29,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1";
static constexpr const char *sBeta2 = "beta2";
static constexpr const char *sBeta1Pow = "Beta1Pow";
static constexpr const char *sBeta2Pow = "Beta2Pow";
static constexpr const char *sLossScaling = "LossScaling";
} // namespace ipu
} // namespace platform
......
......@@ -12,10 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include <glog/logging.h>
namespace paddle {
namespace platform {
namespace ipu {} // namespace ipu
namespace ipu {
void IpuStrategy::enablePattern(const std::string& t) {
VLOG(10) << "enable popart pattern: " << t;
popart_patterns.enablePattern(t, true);
}
void IpuStrategy::disablePattern(const std::string& t) {
VLOG(10) << "disable popart pattern: " << t;
popart_patterns.enablePattern(t, false);
}
const bool IpuStrategy::isPatternEnabled(const std::string& t) {
return popart_patterns.isPatternEnabled(t);
}
} // namespace ipu
} // namespace platform
} // namespace paddle
......@@ -14,24 +14,86 @@ limitations under the License. */
#pragma once
#include <popart/op.hpp>
#include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp>
#include "popart/patterns/patterns.hpp"
namespace paddle {
namespace platform {
namespace ipu {
using VirtualGraphMode = popart::VirtualGraphMode;
using RecomputationType = popart::RecomputationType;
struct IpuStrategy {
IpuStrategy() {
// we always save optimizer state to OffChip and enable rts for saving
// memory
auto storage = popart::TensorLocation(popart::TensorStorage::OffChip,
popart::ReplicatedTensorSharding::On);
popart_options.optimizerStateTensorLocationSettings =
popart::TensorLocationSettings(storage);
// We divide the accumulationFactor and replicatedGraphCount after all
// reduce
popart_options.accumulationAndReplicationReductionType =
popart::ReductionType::Mean;
popart_options.meanAccumulationAndReplicationReductionStrategy =
popart::MeanReductionStrategy::Post;
popart_options.enableFloatingPointChecks = false;
// A directory for log traces to be written into.
popart_options.logDir = "popart_log";
}
~IpuStrategy() {}
// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1;
// batches per step
int batches_per_step = 1;
int batch_size = 1;
// micro batch-size
int micro_batch_size = 1;
// training flag, true for training
bool is_training = true;
// save the onnx model lowered by paddle program description
bool save_init_onnx = false;
bool save_last_onnx = true;
popart::SessionOptions popart_options_;
// save the trained model
bool save_onnx_checkpoint = false;
// save paddle model per n steps
int save_per_n_step = 1;
// average sharding, debugging used
bool need_avg_shard = false;
// flag for fp16, true for pure fp16
bool enable_fp16 = false;
// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;
// loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f;
// defaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f;
// popart session option
popart::SessionOptions popart_options;
popart::Patterns popart_patterns;
public:
void enablePattern(const std::string& t);
void disablePattern(const std::string& t);
const bool isPatternEnabled(const std::string& t);
};
} // namespace ipu
......
......@@ -12,22 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_utils.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include <cmath>
namespace paddle {
namespace platform {
namespace ipu {
void* PaddleIArray::data() { return tensor_->data(); }
void* PaddleIArray::data() { return tensor_.data(); }
popart::DataType PaddleIArray::dataType() const {
return VarType2PopartType(tensor_->type());
return VarType2PopartType(tensor_.type());
}
std::size_t PaddleIArray::rank() const { return tensor_->dims().size(); }
std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); }
int64_t PaddleIArray::dim(size_t index) const {
return tensor_->dims().at(index);
return tensor_.dims().at(index);
}
std::size_t PaddleIArray::nelms() const {
......@@ -150,6 +151,32 @@ bool GetBoolEnv(std::string str) {
}
}
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type) {
// format: {popart_tensor_id, paddle_tensor_id}, ...
std::vector<std::pair<std::string, std::string>> pre_post_fix;
if (opt_type == "adam" || opt_type == "lamb") {
pre_post_fix.push_back(std::make_pair("", ""));
pre_post_fix.push_back(std::make_pair("Accl1___", "_moment1_0"));
pre_post_fix.push_back(std::make_pair("Accl2___", "_moment2_0"));
pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
} else if (opt_type == "sgd" || opt_type == "momentum") {
// sgd
pre_post_fix.push_back(std::make_pair("", ""));
} else {
pre_post_fix.push_back(std::make_pair("", ""));
//
}
return pre_post_fix;
}
int RequestIpus(const int num_ipus) {
// num_ipus must be pow(2, n);
return std::pow(2, ceil(log2(num_ipus)));
}
} // namespace ipu
} // namespace platform
} // namespace paddle
......@@ -17,14 +17,27 @@ limitations under the License. */
#include <popart/ndarraywrapper.hpp>
#include <popart/tensordata.hpp>
#include <popart/tensorinfo.hpp>
#include <popart/vendored/any.hpp>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace platform {
namespace ipu {
using float16 = platform::float16;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using Scope = framework::Scope;
using OpDesc = framework::OpDesc;
using Graph = framework::ir::Graph;
using Node = framework::ir::Node;
using BlockDesc = framework::BlockDesc;
// onnx dtype
// https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3
enum ONNXDataType : int {
......@@ -49,14 +62,15 @@ enum ONNXDataType : int {
class PaddleIArray final : public popart::IArray {
public:
explicit PaddleIArray(framework::Tensor *tensor) : tensor_(tensor) {
explicit PaddleIArray(const Tensor* tensor) {
tensor_.ShareDataWith(*tensor);
for (int i = 0; i < tensor->dims().size(); ++i) {
shape_.push_back(tensor->dims().at(i));
}
}
public:
void *data();
void* data();
popart::DataType dataType() const;
std::size_t rank() const;
int64_t dim(size_t index) const;
......@@ -64,7 +78,7 @@ class PaddleIArray final : public popart::IArray {
const popart::Shape shape() const;
private:
framework::Tensor *tensor_;
Tensor tensor_;
std::vector<int64_t> shape_;
};
......@@ -74,8 +88,7 @@ popart::DataType OnnxDtype2PopartType(const int type);
bool GetBoolEnv(std::string str);
template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(
const framework::Tensor &tensor) {
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) {
auto dtype = VarType2PopartType(tensor.type());
auto shape = std::vector<int64_t>();
for (size_t i = 0; i < tensor.dims().size(); ++i) {
......@@ -84,18 +97,140 @@ std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(
popart::TensorInfo tensor_info(dtype, shape);
return std::make_unique<popart::NDArrayWrapper<T>>(
reinterpret_cast<T *>(tensor.data()), tensor_info);
reinterpret_cast<T*>(tensor.data()), tensor_info);
}
template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> LoDTensor2IArray(
framework::LoDTensor const &lod_tensor) {
PADDLE_ENFORCE_EQ(
lod_tensor.lod().size(), 0UL,
platform::errors::InvalidArgument("LoDTensor2IArray is Unimplemented"));
LoDTensor const& lod_tensor) {
if (lod_tensor.lod().size() == 0) {
return Tensor2IArray<T>(lod_tensor);
} else {
PADDLE_THROW(
platform::errors::Unimplemented("LoDTensor2IArray is Unimplemented"));
}
}
template <typename T>
T GetSingleVarFromScope(const Scope* scope, const std::string& var_name) {
auto var = scope->GetVar(var_name);
auto tensor = var->Get<framework::LoDTensor>();
// check dtype is ?
return tensor.data<T>()[0];
}
struct CustomOpAttrVisitor : public boost::static_visitor<void> {
explicit CustomOpAttrVisitor(std::map<std::string, popart::any>* attr,
const std::string& attr_name)
: attrs_(attr), attr_name_(attr_name) {}
mutable std::map<std::string, popart::any>* attrs_;
std::string attr_name_;
void operator()(int v) const { attrs_->emplace(attr_name_, v); }
void operator()(float v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::string& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<int>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<float>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<std::string>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(bool v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<bool>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(BlockDesc* desc) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(const std::vector<BlockDesc*>& v) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(int64_t v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<int64_t>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<double>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(boost::blank) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `boost::blank` type."));
}
};
struct IpuCustomOpIdentifier {
IpuCustomOpIdentifier(const std::string& _paddle_op,
const std::string& _popart_op,
const std::string& _domain, unsigned int _version)
: paddle_op(_paddle_op), popart_op(_domain, _popart_op, _version) {}
std::string repr() {
std::ostringstream os;
os << "paddle_op: " << paddle_op << ", domain: " << popart_op.domain
<< ", type: " << popart_op.type << ", version: " << popart_op.version;
return os.str();
}
std::string paddle_op;
popart::OperatorIdentifier popart_op;
};
struct ConstantOpAttrVisitor : public boost::static_visitor<void> {
explicit ConstantOpAttrVisitor(framework::LoDTensor* tensor,
framework::proto::VarType::Type dtype)
: tensor_(tensor), dtype_(dtype) {}
framework::LoDTensor* tensor_;
framework::proto::VarType::Type dtype_;
void operator()(const std::vector<int>& vec) const {
framework::TensorFromVector<int>(vec, tensor_);
}
void operator()(const std::vector<float>& vec) const {
if (dtype_ == framework::proto::VarType::FP16) {
std::vector<float16> vec_fp16;
std::transform(vec.begin(), vec.end(), std::back_inserter(vec_fp16),
[](float f) -> float16 { return float16(f); });
framework::TensorFromVector<float16>(vec_fp16, tensor_);
} else {
framework::TensorFromVector<float>(vec, tensor_);
}
}
void operator()(const std::vector<bool>& vec) const {
framework::TensorFromVector<bool>(vec, tensor_);
}
void operator()(const std::vector<int64_t>& vec) const {
framework::TensorFromVector<int64_t>(vec, tensor_);
}
void operator()(const std::vector<double>& vec) const {
framework::TensorFromVector<double>(vec, tensor_);
}
void RaiseError() const {
PADDLE_THROW(
platform::errors::InvalidArgument("Constant value must be a vector"));
}
void operator()(int v) const { RaiseError(); }
void operator()(float v) const { RaiseError(); }
void operator()(const std::string& v) const { RaiseError(); }
void operator()(const std::vector<std::string>& v) const { RaiseError(); }
void operator()(bool v) const { RaiseError(); }
void operator()(BlockDesc* desc) const { RaiseError(); }
void operator()(const std::vector<BlockDesc*>& v) const { RaiseError(); }
void operator()(int64_t v) const { RaiseError(); }
void operator()(boost::blank) const { RaiseError(); }
};
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type);
int RequestIpus(const int num_ipus);
} // namespace ipu
} // namespace platform
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// clang-format off
#pragma once
OP_DECL(popart_nllloss_v2, aiGraphcoreOpset.nllloss, SIG_ARG(INT32,popart::ReductionType,reduction) OPT_ARG(INT32,ignoreIndex) ARG(BOOL,inputIsLogProbability) ) // NOLINT
// clang-format on
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册