未验证 提交 b2aee3e3 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] update ipu_backend p0 (#38854)

* update ipu_backend

* sync with paddle internal
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NAllen Guo <alleng@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>

* apply comments 01

* update error messag

* restore ipu_executor and ipu_optimizer

* add clang-format on
Co-authored-by: NXiaobing Wang <xiaobingw@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NHaicheng Jiang <haichengj@graphcore.ai>
Co-authored-by: NHan Zhao <hanzhao@graphcore.ai>
上级 b4cb3589
IF(WITH_IPU) IF(WITH_IPU)
FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc) FILE(GLOB POPART_CANONICALIZATION_SRC ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc)
cc_library(ipu_device SRCS device.cc DEPS enforce popart) list(APPEND PADDLE_IPU_SRC ${POPART_CANONICALIZATION_SRC})
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart) set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce) set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce) set(IPU_BACKEND_SRC
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto) "ipu_device.cc"
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils) "ipu_strategy.cc"
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper) "ipu_executor.cc"
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper) "ipu_compiler.cc"
"ipu_backend.cc"
"ipu_utils.cc"
)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph framework_proto enforce graph_helper timer)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend) cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend)
cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart)
add_dependencies(paddle_ipu ipu_backend)
ENDIF() ENDIF()
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_backend.h" #include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/ipu/ipu_utils.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
...@@ -24,170 +24,92 @@ namespace paddle { ...@@ -24,170 +24,92 @@ namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
std::shared_ptr<IpuBackend> IpuBackend::instance_ = nullptr; IpuBackend* IpuBackend::GetInstance() {
static IpuBackend instance;
return &instance;
}
IpuBackend::IpuBackend() { IpuBackend::IpuBackend() {
compiler_ = std::make_shared<Compiler>(); compiler_ = std::make_unique<Compiler>();
executor_ = std::make_unique<Executor>(); executor_ = std::make_unique<Executor>();
} }
void IpuBackend::Clear() { IpuBackend::~IpuBackend() {
compiler_.reset();
executor_.reset(); executor_.reset();
// detach device
if (device_ != nullptr && device_->isAttached()) {
device_->detach();
device_.reset();
device_ = nullptr;
}
}
IpuBackend::~IpuBackend() { Clear(); }
std::shared_ptr<IpuBackend> IpuBackend::GetInstance() {
if (!instance_) {
instance_.reset(new IpuBackend());
}
return instance_;
}
// This api should only call from python, always return a new object
std::shared_ptr<IpuBackend> IpuBackend::GetNewInstance() {
instance_.reset(new IpuBackend());
return instance_;
} }
void IpuBackend::Compile(framework::ir::Graph* graph, void IpuBackend::Compile(Graph* graph,
const std::vector<std::string>& feed_list, const std::vector<std::string>& feed_list,
const std::vector<std::string>& fetch_list) { const std::vector<std::string>& fetch_list) {
VLOG(10) << "enter IpuBackend::Compile"; VLOG(10) << "enter IpuBackend::Compile";
compiler_->Prepare();
executor_->SetCompilerResources(compiler_->GetResources());
compiler_->InitInputs(graph, feed_list); compiler_->InitInputs(graph, feed_list);
compiler_->LowerConstants(graph, scope_);
compiler_->LowerWeights(graph, scope_); compiler_->LowerWeights(graph, scope_);
compiler_->LowerBody(graph); compiler_->LowerBody(graph);
compiler_->InitOutputs(fetch_list); compiler_->InitOutputs(fetch_list);
executor_->SetWeights(compiler_->GetWeights()); if (ipu_strategy_->is_training) {
compiler_->LowerOptimier(graph, scope_);
}
is_compiled_ = true;
// when call compile, means a new graph
is_prepared_ = false;
VLOG(10) << "leave IpuBackend::Compile"; VLOG(10) << "leave IpuBackend::Compile";
} }
void IpuBackend::Run(const std::vector<const framework::Tensor*>& inputs, void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
const std::vector<framework::Tensor*>& outputs, const std::vector<Tensor*>& outputs,
const framework::ExecutionContext& ctx) { const framework::ExecutionContext& ctx) {
Prepare(); Prepare();
auto inputs_id = compiler_->GetInputs(); timer_->Start();
auto outputs_id = compiler_->GetOutputs(); executor_->Run(inputs, outputs, ctx);
executor_->Run(inputs_id, inputs, outputs_id, outputs, ctx); timer_->Pause();
VLOG(10) << "[IPU Run]: " << timer_->ElapsedMS() << " (ms)";
} }
void IpuBackend::Prepare() { void IpuBackend::Prepare() {
if (is_prepared_) { if (!is_prepared_) {
return; executor_->Prepare(compiler_->GetModelProto());
} else { timer_.reset(new platform::Timer());
is_prepared_ = true; is_prepared_ = true;
} }
// convert Model to fp16
if (ipu_strategy_->enable_fp16) {
compiler_->ConvertProtoToFp16();
}
auto proto = compiler_->GetModelProto();
auto tensors = compiler_->GetTensors();
auto outputs = compiler_->GetOutputs();
executor_->Prepare(proto, tensors, outputs, device_);
} }
void IpuBackend::SetScope(const framework::Scope& scope) { void IpuBackend::Detach() { executor_->Detach(); }
void IpuBackend::Reset() {
executor_->Detach();
compiler_.reset();
executor_.reset();
}
void IpuBackend::SetScope(const Scope& scope) {
scope_ = &scope; scope_ = &scope;
executor_->SetScope(&scope); executor_->SetScope(&scope);
} }
void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) { void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) {
ipu_strategy_ = &strategy; ipu_strategy_ = &strategy;
executor_->SetIpuStrategy(strategy);
compiler_->SetIpuStrategy(strategy); compiler_->SetIpuStrategy(strategy);
executor_->SetIpuStrategy(strategy);
} }
size_t IpuBackend::GetNumDevices() { void IpuBackend::SetCustomOps(
// IpuModel const std::vector<IpuCustomOpIdentifier>& custom_ops) {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); compiler_->SetCustomOps(custom_ops);
if (ipu_model) return 1;
// Real dev
size_t num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(
num_devices, 0,
platform::errors::Unavailable(
"Do not found any IPU devices, please make "
"sure Poplar sdk is enabled or enable ENV \"POPLAR_IPUMODEL=1\""));
return num_devices;
}
std::vector<int> IpuBackend::GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
}
Device IpuBackend::GetDevice(int id) {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
std::map<std::string, std::string> deviceOpts{{"numIPUs", "1 "}};
device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
deviceOpts);
Device device(*device_.get());
return device;
}
size_t num_devices = GetNumDevices();
if (id < 0 || id >= num_devices) {
PADDLE_THROW(platform::errors::InvalidArgument(
"device id %d is invalid, number devices is %d", id, num_devices));
}
std::shared_ptr<popart::DeviceInfo> popart_device_info =
popart::DeviceManager::createDeviceManager().getDevice(
popart::SyncPattern::Full, id);
Device device(*popart_device_info.get());
return device;
}
void IpuBackend::AttachDevice(int id) {
// trick here
// Compiler ipu is not same as the runtime ipu.
VLOG(10) << "comile ipu id = " << id;
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return;
}
device_ = popart::DeviceManager::createDeviceManager().acquireAvailableDevice(
UpperIpuNum());
PADDLE_ENFORCE_NOT_NULL(
device_, platform::errors::Unavailable("Can't attach IPU, ipu_num = %d.",
UpperIpuNum()));
} }
bool IpuBackend::DeviceIsAttached() { return device_ != nullptr; } void IpuBackend::SaveMoldeProto(const std::string& path) {
if (ipu_strategy_->is_training && is_prepared_) {
// num_ipus must be pow(2,n); executor_->SaveModelToHost(path);
int IpuBackend::UpperIpuNum() { } else if (is_compiled_) {
PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0, compiler_->SaveModelProtoNoCheck(path);
platform::errors::Unavailable( } else {
"The ipu num get is wrong, please make sure the " LOG(WARNING) << "Model is empty";
"sharding or pipline parameter is right."));
int i = 0;
while (std::pow(2, i) < ipu_strategy_->num_ipus) {
i++;
} }
return std::pow(2, i);
} }
} // namespace ipu } // namespace ipu
......
...@@ -14,88 +14,86 @@ limitations under the License. */ ...@@ -14,88 +14,86 @@ limitations under the License. */
#pragma once #pragma once
#include <cmath>
#include <popart/devicemanager.hpp> #include <popart/devicemanager.hpp>
#include <popart/names.hpp> #include <popart/names.hpp>
#include <popart/tensorinfo.hpp>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_executor.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/ipu/device.h" #include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/platform/ipu/ipu_compiler.h"
#include "paddle/fluid/platform/ipu/ipu_executor.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
// IpuBackend is the center of paddle-ipu, its function include:
// 1. Compile paddle model to popart model
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
class IpuBackend { class IpuBackend {
// IpuBackend is the center of paddle-ipu, its function include: public:
// 1. Compile paddle model to popart model static IpuBackend *GetInstance();
// 2. Run popart model, inference or training
// 3. Request and release device
// 4. Other helper function
public: public:
IpuBackend(); IpuBackend();
~IpuBackend(); ~IpuBackend();
void Clear();
// return if exsits, else create and return
static std::shared_ptr<IpuBackend> GetInstance();
// always return a new instance_
static std::shared_ptr<IpuBackend> GetNewInstance();
// what compile does include(call compiler_): // what compile does include(call compiler_):
// 1. map paddle-op -> poart op // 1. map paddle-op -> poart op
// 2. construct popart onnx compute graph // 2. construct popart onnx compute graph
void Compile(framework::ir::Graph *graph, void Compile(Graph *graph, const std::vector<std::string> &feed_list,
const std::vector<std::string> &feed_list,
const std::vector<std::string> &fetch_list); const std::vector<std::string> &fetch_list);
// what run does include: // what run does include:
// 1. construct forward onnx graph // 1. construct forward onnx graph
// 2. graph-level optimization // 2. graph-level optimization
// 3. autodiff // 3. autodiff
void Run(const std::vector<const framework::Tensor *> &inputs, void Run(const std::vector<const Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs, const std::vector<Tensor *> &outputs,
const framework::ExecutionContext &ctx); const framework::ExecutionContext &ctx);
Executor &GetExecutor() { return *executor_; } // detach IPU manually
void Detach();
// reset manually
// call it before destruct works
void Reset();
void SetScope(const framework::Scope &scope); void SetScope(const Scope &scope);
const framework::Scope *GetScope() { return scope_; } const Scope *GetScope() { return scope_; }
void SetIpuStrategy(const IpuStrategy &strategy); void SetIpuStrategy(const IpuStrategy &strategy);
const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; } const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
// Device // save compiled model to onnx
size_t GetNumDevices(); void SaveMoldeProto(const std::string &path);
std::vector<int> GetDeviceIds();
Device GetDevice(int id);
void AttachDevice(int id);
bool DeviceIsAttached();
private: private:
int UpperIpuNum();
void Prepare(); void Prepare();
private: private:
std::shared_ptr<Compiler> compiler_; std::unique_ptr<Compiler> compiler_;
std::unique_ptr<Executor> executor_; std::unique_ptr<Executor> executor_;
std::shared_ptr<popart::DeviceInfo> device_; bool is_compiled_ = false;
bool is_prepared_ = false; bool is_prepared_ = false;
// not own // not own
const framework::Scope *scope_ = nullptr; const Scope *scope_ = nullptr;
const IpuStrategy *ipu_strategy_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr;
private: private:
static std::shared_ptr<IpuBackend> instance_; // time record for IpuBackend::Run
std::unique_ptr<platform::Timer> timer_;
DISABLE_COPY_AND_ASSIGN(IpuBackend);
}; };
} // namespace ipu } // namespace ipu
......
...@@ -12,17 +12,66 @@ ...@@ -12,17 +12,66 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/platform/ipu/ipu_compiler.h" #include "paddle/fluid/platform/device/ipu/ipu_compiler.h"
#include <popart/adam.hpp>
#include <popart/adaptive.hpp>
#include <popart/optimizer.hpp>
#include <popart/sgd.hpp>
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/platform/ipu/ipu_utils.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
popart::AdamMode AdamModeFromStr(const std::string& str) {
if (str == "adam") {
return popart::AdamMode::Adam;
} else if (str == "adamax") {
return popart::AdamMode::AdaMax;
} else if (str == "lamb") {
return popart::AdamMode::Lamb;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Uknown AdamMode: %s, AdamMode must be one of these values: adam, "
"adamax or lamb",
str));
}
}
popart::AdaptiveMode AdaptiveModeFromStr(const std::string& str) {
if (str == "adadelta") {
return popart::AdaptiveMode::AdaDelta;
} else if (str == "adagrad") {
return popart::AdaptiveMode::AdaGrad;
} else if (str == "rmsprop") {
return popart::AdaptiveMode::RMSProp;
} else if (str == "centered_rmsprop") {
return popart::AdaptiveMode::CenteredRMSProp;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Uknown AdaptiveMode: %s, AdaptiveMode must be one of these values: "
"adadelta, adagrad, rmsprop or centered_rmsprop",
str));
}
}
popart::WeightDecayMode WeightDecayModeFromStr(const std::string& str) {
if (str == "decay") {
return popart::WeightDecayMode::Decay;
} else if (str == "l2_regularization") {
return popart::WeightDecayMode::L2Regularization;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Uknown WeightDecayMode: %s, WeightDecayMode must be decay or "
"l2_regularization",
str));
}
}
template <typename T> template <typename T>
T GetAttrAllowNull(std::string attr, framework::OpDesc* op_desc) { T GetAttrAllowNull(std::string attr, OpDesc* op_desc) {
if (op_desc->HasAttr(attr)) { if (op_desc->HasAttr(attr)) {
return BOOST_GET_CONST(T, op_desc->GetAttr(attr)); return BOOST_GET_CONST(T, op_desc->GetAttr(attr));
} else { } else {
...@@ -31,8 +80,7 @@ T GetAttrAllowNull(std::string attr, framework::OpDesc* op_desc) { ...@@ -31,8 +80,7 @@ T GetAttrAllowNull(std::string attr, framework::OpDesc* op_desc) {
} }
template <typename T> template <typename T>
nonstd::optional<T> GetOptAttrAllowNull(std::string attr, nonstd::optional<T> GetOptAttrAllowNull(std::string attr, OpDesc* op_desc) {
framework::OpDesc* op_desc) {
if (op_desc->HasAttr(attr)) { if (op_desc->HasAttr(attr)) {
return BOOST_GET_CONST(T, op_desc->GetAttr(attr)); return BOOST_GET_CONST(T, op_desc->GetAttr(attr));
} else { } else {
...@@ -40,19 +88,36 @@ nonstd::optional<T> GetOptAttrAllowNull(std::string attr, ...@@ -40,19 +88,36 @@ nonstd::optional<T> GetOptAttrAllowNull(std::string attr,
} }
} }
Compiler::Compiler() { template <typename TI, typename TO>
builder_ = popart::Builder::create(); TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) {
RegisterOpFunc(); if (op_desc->HasAttr(attr)) {
auto x = BOOST_GET_CONST(TI, op_desc->GetAttr(attr));
return static_cast<TO>(x);
} else {
return {};
}
}
Compiler::Compiler() { RegisterOpFunc(); }
Compiler::~Compiler() {
builder_.reset();
resources_.reset();
} }
Compiler::~Compiler() {} void Compiler::Prepare() {
builder_ = popart::Builder::create();
resources_ = std::make_unique<CompilerResources>();
}
void Compiler::RegisterOpFunc() { void Compiler::RegisterOpFunc() {
VLOG(10) << "enter Compiler::RegisterOpFunc"; VLOG(10) << "enter Compiler::RegisterOpFunc";
#define INT_VEC std::vector<std::int64_t> #define INT_VEC std::vector<std::int64_t>
#define INT32_VEC std::vector<std::int32_t>
#define FLOAT_VEC std::vector<float> #define FLOAT_VEC std::vector<float>
#define FLOAT float #define FLOAT float
#define INT std::int64_t #define INT std::int64_t
#define INT32 std::int32_t
#define BOOL bool #define BOOL bool
#define STRING std::string #define STRING std::string
#define STRING_VEC std::vector<std::string*> #define STRING_VEC std::vector<std::string*>
...@@ -60,6 +125,7 @@ void Compiler::RegisterOpFunc() { ...@@ -60,6 +125,7 @@ void Compiler::RegisterOpFunc() {
#define ARG(Type, Name) , GetAttrAllowNull<Type>(#Name, op_desc) #define ARG(Type, Name) , GetAttrAllowNull<Type>(#Name, op_desc)
#define OPT_ARG(Type, Name) , GetOptAttrAllowNull<Type>(#Name, op_desc) #define OPT_ARG(Type, Name) , GetOptAttrAllowNull<Type>(#Name, op_desc)
#define SIG_ARG(TI, TO, Name) , GetCastSigAttrAllowNull<TI, TO>(#Name, op_desc)
#define POPART_CONST_ARG(Name) , const PopartConstant& Name #define POPART_CONST_ARG(Name) , const PopartConstant& Name
#define HOST_SIDE_CONST_ARG(Name) , const HostSideConstant& Name #define HOST_SIDE_CONST_ARG(Name) , const HostSideConstant& Name
#define POPART_ATTRIB_VEC_ARG(Name) #define POPART_ATTRIB_VEC_ARG(Name)
...@@ -67,7 +133,7 @@ void Compiler::RegisterOpFunc() { ...@@ -67,7 +133,7 @@ void Compiler::RegisterOpFunc() {
name_function_ = { name_function_ = {
#define OP_DECL(FuncName, OnnxImpl, Args) \ #define OP_DECL(FuncName, OnnxImpl, Args) \
{#FuncName, [&](framework::OpDesc* op_desc) { \ {#FuncName, [&](OpDesc* op_desc) { \
auto op_type = op_desc->Type(); \ auto op_type = op_desc->Type(); \
VLOG(10) << "build op:" << op_type << " args " << #Args; \ VLOG(10) << "build op:" << op_type << " args " << #Args; \
auto inputs = GetOpInputs(op_desc); \ auto inputs = GetOpInputs(op_desc); \
...@@ -77,9 +143,12 @@ void Compiler::RegisterOpFunc() { ...@@ -77,9 +143,12 @@ void Compiler::RegisterOpFunc() {
auto aiOnnxOpset = builder_->aiOnnxOpset11(); \ auto aiOnnxOpset = builder_->aiOnnxOpset11(); \
auto output_ids = OnnxImpl(inputs Args, debug_context); \ auto output_ids = OnnxImpl(inputs Args, debug_context); \
SetIpuIndexStage(output_ids, op_desc); \ SetIpuIndexStage(output_ids, op_desc); \
SetAMPAttributes(output_ids, op_desc); \
SetSerializeAttributes(output_ids, op_desc); \
InsertTensors(output_names, output_ids); \ InsertTensors(output_names, output_ids); \
}}, // NOLINT }}, // NOLINT
#include "paddle/fluid/platform/ipu/supported_ops_autogen.h" #include "paddle/fluid/platform/device/ipu/supported_ops_autogen.h"
#include "paddle/fluid/platform/device/ipu/supported_ops_custom.h"
}; };
#undef OP_DECL #undef OP_DECL
...@@ -87,146 +156,99 @@ void Compiler::RegisterOpFunc() { ...@@ -87,146 +156,99 @@ void Compiler::RegisterOpFunc() {
#undef POPART_ATTRIB_VEC_ARG #undef POPART_ATTRIB_VEC_ARG
#undef HOST_SIDE_CONST_ARG #undef HOST_SIDE_CONST_ARG
#undef POPART_CONST_ARG #undef POPART_CONST_ARG
#undef SIG_ARG
#undef OPT_ARG #undef OPT_ARG
#undef ARG #undef ARG
#undef NONE #undef NONE
#undef STRING_VEC #undef STRING_VEC
#undef STRING #undef STRING
#undef BOOL #undef BOOL
#undef INT32
#undef INT #undef INT
#undef FLOAT #undef FLOAT
#undef FLOAT_VEC #undef FLOAT_VEC
#undef INT32_VEC
#undef INT_VEC #undef INT_VEC
} }
void Compiler::LowerBody(const framework::ir::Graph* graph) { void Compiler::LowerBody(const Graph* graph) {
VLOG(10) << "enter Compiler::LowerBody"; VLOG(10) << "enter Compiler::LowerBody";
auto nodes = framework::ir::TopologySortOperations(*graph); auto nodes = framework::ir::TopologySortOperations(*graph);
for (auto* node : nodes) { for (auto* node : nodes) {
auto* op_desc = node->Op(); auto* op_desc = node->Op();
auto op_type = op_desc->Type(); auto op_type = op_desc->Type();
VLOG(10) << "node->type: " << op_type; VLOG(10) << "lowering op: " << op_type;
if (op_type == "popart_constant") { if (op_type == "popart_constant") {
auto dims = // pass
BOOST_GET_CONST(std::vector<int64_t>, op_desc->GetAttr("dims")); } else if (op_type == "popart_optimizer") {
auto dtype_ = BOOST_GET_CONST(int, op_desc->GetAttr("dtype")); // pass
auto dtype = OnnxDtype2PopartType(dtype_); } else if (op_type == "popart_checkpointoutput") {
popart::TensorInfo tensor_info{dtype, dims};
auto value_attr = op_desc->GetAttr("value");
auto const_data = std::unique_ptr<popart::ConstVoidData>{};
switch (dtype) {
case popart::DataType::FLOAT:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<float>, value_attr).data(),
tensor_info));
break;
case popart::DataType::INT32:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<int>, value_attr).data(),
tensor_info));
break;
case popart::DataType::DOUBLE:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<double>, value_attr).data(),
tensor_info));
break;
case popart::DataType::INT64:
const_data.reset(new popart::ConstVoidData(
BOOST_GET_CONST(std::vector<int64_t>, value_attr).data(),
tensor_info));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"The popart datatype is not supported, popart::DataType is %d",
dtype));
}
popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
SetIpuIndexStage(result, op_desc);
InsertTensors(GetOpOutputs(op_desc), result);
} else if (op_type == "popart_batchnormalization") {
auto inputs = GetOpInputs(op_desc); auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc); auto outputs = GetOpOutputs(op_desc);
auto num_outputs = outputs.size(); auto output_ids = builder_->checkpointOutput(inputs);
auto epsilon = BOOST_GET_CONST(float, op_desc->GetAttr("epsilon")); InsertTensors(outputs, output_ids);
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum")); } else if (op_type == "popart_custom_op") {
auto result = builder_->aiOnnxOpset11().batchnormalization(
inputs, num_outputs, epsilon, momentum);
SetIpuIndexStage(result, op_desc);
InsertTensors(GetOpOutputs(op_desc), result);
} else if (op_type == "popart_nllloss") {
auto inputs = GetOpInputs(op_desc);
auto ignoreIndex = BOOST_GET_CONST(int, op_desc->GetAttr("ignoreIndex"));
auto result = builder_->aiGraphcoreOpset1().nllloss(
inputs, popart::ReductionType::NoReduction, ignoreIndex);
SetIpuIndexStage(result, op_desc);
InsertTensors(GetOpOutputs(op_desc), result);
} else if (op_type == "popart_topk") {
auto inputs = GetOpInputs(op_desc); auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc); auto outputs = GetOpOutputs(op_desc);
int64_t axis = BOOST_GET_CONST(int64_t, op_desc->GetAttr("axis")); auto debug_context = BuildDebugContext(op_desc);
int sorted_INT32 = BOOST_GET_CONST(int, op_desc->GetAttr("sorted")); auto attributes = std::map<std::string, popart::any>{};
int64_t sorted = int64_t{sorted_INT32}; for (auto& attr : op_desc->GetAttrMap()) {
CustomOpAttrVisitor visitor(&attributes, attr.first);
auto aiOnnxOpset = builder_->aiOnnxOpset11(); boost::apply_visitor(visitor, attr.second);
popart::ConvInputs result;
if (inputs.size() == 2) {
VLOG(10)
<< "[Compiler::LowerBody] size of inputs for <popart_topk> is 2";
result = aiOnnxOpset.topk(inputs, axis, sorted);
} else if (inputs.size() == 1) {
VLOG(10)
<< "[Compiler::LowerBody] size of inputs for <popart_topk> is 1";
int64_t k = BOOST_GET_CONST(int64_t, op_desc->GetAttr("k"));
popart::TensorInfo kShape{"INT64", std::vector<int64_t>{1}};
popart::ConstVoidData kData = {&k, kShape};
auto K_t = aiOnnxOpset.constant(kData);
result = aiOnnxOpset.topk({inputs[0], K_t}, axis, sorted);
} }
result[1] = aiOnnxOpset.cast({result[1]}, "INT32"); auto __op_type =
SetIpuIndexStage(result, op_desc); BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
VLOG(10) << "[Compiler::LowerBody] output[1]: " << outputs[1]; VLOG(10) << "Build graph from custom op: " << __op_type;
VLOG(10) << "[Compiler::LowerBody] output[1]: " auto it = custom_ops_.find(__op_type);
<< GetOpOutputs(op_desc)[1] << " -> " << result[1]; auto output_ids =
tensors_.emplace(GetOpOutputs(op_desc)[1], result[1]); // topk indices builder_->customOp(it->second.popart_op, it->second.popart_op.version,
VLOG(10) << "[Compiler::LowerBody] output[0]: " << outputs[0]; inputs, outputs.size(), attributes, debug_context);
VLOG(10) << "[Compiler::LowerBody] output[0]: " SetIpuIndexStage(output_ids, op_desc);
<< GetOpOutputs(op_desc)[0] << " -> " << result[0]; InsertTensors(outputs, output_ids);
tensors_.emplace(GetOpOutputs(op_desc)[0], result[0]); // topk values } else if (op_type == "popart_printtensor") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto debug_context = BuildDebugContext(op_desc);
auto print_gradient =
BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
inputs, print_gradient, debug_context, title);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else { } else {
auto itr = name_function_.find(op_type); auto itr = name_function_.find(op_type);
if (itr != name_function_.end()) { if (itr != name_function_.end()) {
itr->second(node->Op()); itr->second(node->Op());
} else { } else {
PADDLE_THROW(platform::errors::NotFound( PADDLE_THROW(platform::errors::NotFound(
"Op %s is not registered in popart canonicalization", op_type)); "%s is not registered, please check for unsupported operators for "
"running on IPU",
op_type));
} }
} }
} }
VLOG(10) << "leave Compiler::LowerBody"; VLOG(10) << "leave Compiler::LowerBody";
} }
void Compiler::InitInputs(framework::ir::Graph* graph, void Compiler::InitInputs(Graph* graph,
const std::vector<std::string>& feed_list) { const std::vector<std::string>& feed_list) {
for (const auto& feed_name : feed_list) { for (const auto& feed_name : feed_list) {
feed_list_.push_back(feed_name); feed_list_.push_back(feed_name);
for (const framework::ir::Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsVar()) { if (n->IsVar()) {
auto* var_desc = n->Var(); auto* var_desc = n->Var();
if (feed_name == var_desc->Name()) { if (feed_name == var_desc->Name()) {
VLOG(10) << "feed_name= " << var_desc->Name(); VLOG(10) << "feed_name= " << var_desc->Name();
auto data_type = VarType2PopartType(var_desc->GetDataType()); auto data_type = VarType2PopartType(var_desc->GetDataType());
if (ipu_strategy_->enable_fp16) {
data_type = popart::DataType::FLOAT16;
}
popart::TensorInfo input_info{data_type, var_desc->GetShape()}; popart::TensorInfo input_info{data_type, var_desc->GetShape()};
VLOG(10) << "popart input_info = " << input_info; VLOG(10) << "popart input_info = " << input_info;
popart::TensorId tensor_id = popart::TensorId tensor_id =
builder_->addInputTensor(input_info, feed_name); builder_->addInputTensor(input_info, feed_name);
VLOG(10) << "popart input tensor id = " << tensor_id; VLOG(10) << "popart input tensor id = " << tensor_id;
inputs_.push_back(tensor_id); resources_->inputs.push_back(tensor_id);
tensors_.emplace(var_desc->Name(), tensor_id); resources_->tensors.emplace(var_desc->Name(), tensor_id);
} }
} }
} }
...@@ -236,20 +258,58 @@ void Compiler::InitInputs(framework::ir::Graph* graph, ...@@ -236,20 +258,58 @@ void Compiler::InitInputs(framework::ir::Graph* graph,
void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) { void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) {
for (const auto& fetch_name : fetch_list) { for (const auto& fetch_name : fetch_list) {
fetch_list_.push_back(fetch_name); fetch_list_.push_back(fetch_name);
auto tensor = tensors_.find(fetch_name); auto tensor = resources_->tensors.find(fetch_name);
PADDLE_ENFORCE_NE(tensor, tensors_.end(), PADDLE_ENFORCE_NE(
platform::errors::NotFound( tensor, resources_->tensors.end(),
"output tensor %s does not exist.", fetch_name)); platform::errors::NotFound(
"Output tensor %s is not found, please check the model.",
fetch_name));
VLOG(10) << "fetch_name= " << fetch_name; VLOG(10) << "fetch_name= " << fetch_name;
VLOG(10) << "popart output tensor id = " << tensor->second; VLOG(10) << "popart output tensor id = " << tensor->second;
builder_->addOutputTensor(tensor->second); builder_->addOutputTensor(tensor->second);
outputs_.push_back(tensor->second); resources_->outputs.push_back(tensor->second);
}
}
void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
auto& kid_scope = scope->NewScope();
VLOG(10) << "enter Compiler::LowerConstants";
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
}
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
if (op_type == "popart_constant") {
auto shape =
BOOST_GET_CONST(std::vector<int64_t>, op_desc->GetAttr("dims"));
auto dtype_ = BOOST_GET_CONST(int, op_desc->GetAttr("dtype"));
auto dtype = PopartType2VarType(OnnxDtype2PopartType(dtype_));
auto tensor_name = op_desc->Output("__outputs__")[0];
auto* var = kid_scope.Var(tensor_name);
VLOG(10) << "lowering constant: " << tensor_name;
auto* tensor = var->GetMutable<framework::LoDTensor>();
ConstantOpAttrVisitor visitor(tensor, dtype);
auto value = op_desc->GetAttr("value");
boost::apply_visitor(visitor, value);
auto ddim = framework::make_ddim(shape);
tensor->Resize(ddim);
auto const_data = std::unique_ptr<popart::ConstVoidData>();
popart::TensorInfo tensor_info(VarType2PopartType(tensor->type()), shape);
const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info));
popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
SetIpuIndexStage(result, op_desc);
resources_->tensors.emplace(tensor_name, result);
}
} }
VLOG(10) << "leave Compiler::LowerConstants";
} }
void Compiler::LowerWeights(const framework::ir::Graph* graph, void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
const framework::Scope* scope_) { VLOG(10) << "enter Compiler::LowerWeights";
PADDLE_ENFORCE_NOT_NULL(scope_, PADDLE_ENFORCE_NOT_NULL(scope,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"You should call set_scope before LowerWeights")); "You should call set_scope before LowerWeights"));
// at this step, the graph doesn't contains optimizer related states // at this step, the graph doesn't contains optimizer related states
...@@ -257,12 +317,12 @@ void Compiler::LowerWeights(const framework::ir::Graph* graph, ...@@ -257,12 +317,12 @@ void Compiler::LowerWeights(const framework::ir::Graph* graph,
if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
if (node->Var()->Persistable() && node->inputs.empty()) { if (node->Var()->Persistable() && node->inputs.empty()) {
auto var_name = node->Var()->Name(); auto var_name = node->Var()->Name();
// workround: https://github.com/graphcore/Paddle/issues/151 if (resources_->tensors.count(var_name) != 0) {
if (tensors_.count(var_name) != 0) {
continue; continue;
} }
VLOG(10) << "lowering weight: " << var_name;
auto var = scope_->FindVar(var_name); auto var = scope->FindVar(var_name);
if (var) { if (var) {
auto tensor = var->Get<framework::LoDTensor>(); auto tensor = var->Get<framework::LoDTensor>();
auto dtype = VarType2PopartType(tensor.type()); auto dtype = VarType2PopartType(tensor.type());
...@@ -274,12 +334,113 @@ void Compiler::LowerWeights(const framework::ir::Graph* graph, ...@@ -274,12 +334,113 @@ void Compiler::LowerWeights(const framework::ir::Graph* graph,
popart::ConstVoidData const_data{tensor.data(), tensor_info}; popart::ConstVoidData const_data{tensor.data(), tensor_info};
popart::TensorId result = popart::TensorId result =
builder_->addInitializedInputTensor(const_data, var_name); builder_->addInitializedInputTensor(const_data, var_name);
tensors_.emplace(var_name, result); resources_->tensors.emplace(var_name, result);
weights_.push_back(result); resources_->weights.push_back(result);
} }
} }
} }
} }
VLOG(10) << "leave Compiler::LowerWeights";
}
void Compiler::LowerOptimier(const Graph* graph, const Scope* scope) {
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
}
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
if (op_type == "popart_optimizer") {
auto raw_type =
BOOST_GET_CONST(std::string, op_desc->GetAttr("raw_type"));
resources_->optimizer_type = raw_type;
auto loss_var =
BOOST_GET_CONST(std::string, op_desc->GetAttr("loss_var"));
resources_->loss_var = resources_->tensors[loss_var];
resources_->with_lr_sched =
BOOST_GET_CONST(bool, op_desc->GetAttr("with_lr_sched"));
if (op_desc->HasAttr("lr_var")) {
auto lr_var = BOOST_GET_CONST(std::string, op_desc->GetAttr("lr_var"));
resources_->lr_var = lr_var;
resources_->lr = GetSingleVarFromScope<float>(scope, lr_var);
} else {
// adadelta has no lr
resources_->lr = 0.01f;
resources_->with_lr_sched = false;
}
VLOG(10) << "Set initial lr: " << resources_->lr;
auto loss_scaling = ipu_strategy_->loss_scaling;
auto type = BOOST_GET_CONST(std::string, op_desc->GetAttr("type"));
if (type == "sgd") {
auto weight_decay =
BOOST_GET_CONST(float, op_desc->GetAttr("weight_decay"));
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum"));
resources_->optimizer_fn = [=](float lr) {
return std::make_unique<popart::SGD>(
popart::OptimizerValue(lr, false),
popart::OptimizerValue(weight_decay, true),
popart::OptimizerValue(momentum, true),
popart::SGD::getUnsetDampening(),
popart::SGD::getUnsetVelocityScaling(),
popart::OptimizerValue(loss_scaling, true));
};
} else if (type == "adam") {
auto weight_decay =
BOOST_GET_CONST(float, op_desc->GetAttr("weight_decay"));
auto beta1 = BOOST_GET_CONST(float, op_desc->GetAttr("beta1"));
auto beta2 = BOOST_GET_CONST(float, op_desc->GetAttr("beta2"));
auto eps = BOOST_GET_CONST(float, op_desc->GetAttr("eps"));
auto mwn = ipu_strategy_->max_weight_norm;
VLOG(10) << "set max_weight_norm: " << mwn;
auto adam_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("adam_mode"));
auto adam_mode = AdamModeFromStr(adam_mode_);
auto weight_decay_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("weight_decay_mode"));
auto weight_decay_mode = WeightDecayModeFromStr(weight_decay_mode_);
resources_->optimizer_fn = [=](float lr) {
return std::make_unique<popart::Adam>(
popart::OptimizerValue(lr, false),
popart::OptimizerValue(weight_decay, true),
popart::OptimizerValue(beta1, true),
popart::OptimizerValue(beta2, true),
popart::OptimizerValue(eps, true),
popart::OptimizerValue(loss_scaling, true),
popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT);
};
} else if (type == "adaptive") {
auto alpha = BOOST_GET_CONST(float, op_desc->GetAttr("alpha"));
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum"));
auto eps = BOOST_GET_CONST(float, op_desc->GetAttr("eps"));
auto weight_decay =
BOOST_GET_CONST(float, op_desc->GetAttr("weight_decay"));
auto adaptive_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("adaptive_mode"));
auto adaptive_mode = AdaptiveModeFromStr(adaptive_mode_);
auto weight_decay_mode_ =
BOOST_GET_CONST(std::string, op_desc->GetAttr("weight_decay_mode"));
auto weight_decay_mode = WeightDecayModeFromStr(weight_decay_mode_);
resources_->optimizer_fn = [=](float lr) {
return std::make_unique<popart::Adaptive>(
popart::OptimizerValue(lr, false),
popart::OptimizerValue(weight_decay, true),
popart::OptimizerValue(alpha, true),
popart::OptimizerValue(momentum, true),
popart::OptimizerValue(eps, true),
popart::OptimizerValue(loss_scaling, true), adaptive_mode,
weight_decay_mode, popart::DataType::UNDEFINED,
popart::DataType::FLOAT, popart::DataType::FLOAT,
popart::DataType::FLOAT);
};
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"optimizer %s is not implemented", type));
}
}
}
} }
void Compiler::InsertTensors(const std::vector<std::string>& output_names, void Compiler::InsertTensors(const std::vector<std::string>& output_names,
...@@ -288,7 +449,7 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names, ...@@ -288,7 +449,7 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names,
platform::errors::Fatal("InsertTensors size mismatch")); platform::errors::Fatal("InsertTensors size mismatch"));
for (int i = 0; i < tensor_ids.size(); i++) { for (int i = 0; i < tensor_ids.size(); i++) {
std::string tensor_id = tensor_ids[i]; std::string tensor_id = tensor_ids[i];
tensors_.emplace(output_names[i], tensor_ids[i]); resources_->tensors.emplace(output_names[i], tensor_ids[i]);
} }
} }
...@@ -296,11 +457,11 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names, ...@@ -296,11 +457,11 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names,
const std::string& tensor_id) { const std::string& tensor_id) {
PADDLE_ENFORCE_EQ(output_names.size(), 1, PADDLE_ENFORCE_EQ(output_names.size(), 1,
platform::errors::Fatal("InsertTensors size mismatch")); platform::errors::Fatal("InsertTensors size mismatch"));
tensors_.emplace(output_names[0], tensor_id); resources_->tensors.emplace(output_names[0], tensor_id);
} }
void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids, void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids,
const framework::OpDesc* op_desc) { const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetIpuIndexStage"; VLOG(10) << "enter Compiler::SetIpuIndexStage";
auto tensor_ids_set = auto tensor_ids_set =
std::set<std::string>(tensor_ids.begin(), tensor_ids.end()); std::set<std::string>(tensor_ids.begin(), tensor_ids.end());
...@@ -321,7 +482,7 @@ void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids, ...@@ -321,7 +482,7 @@ void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids,
} }
void Compiler::SetIpuIndexStage(const std::string& tensor_id, void Compiler::SetIpuIndexStage(const std::string& tensor_id,
const framework::OpDesc* op_desc) { const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetIpuIndexStage"; VLOG(10) << "enter Compiler::SetIpuIndexStage";
if (op_desc->HasAttr(sIpuIndexAttr)) { if (op_desc->HasAttr(sIpuIndexAttr)) {
...@@ -339,20 +500,73 @@ void Compiler::SetIpuIndexStage(const std::string& tensor_id, ...@@ -339,20 +500,73 @@ void Compiler::SetIpuIndexStage(const std::string& tensor_id,
VLOG(10) << "leave Compiler::SetIpuIndexStage"; VLOG(10) << "leave Compiler::SetIpuIndexStage";
} }
std::vector<popart::TensorId>& Compiler::GetWeights() { return weights_; } void Compiler::SetAMPAttributes(const std::vector<std::string>& tensor_ids,
const OpDesc* op_desc) {
if (op_desc->Type() == "popart_matmul") {
for (const auto& tensor_id : tensor_ids) {
SetAMPAttributes(tensor_id, op_desc);
}
}
}
void Compiler::SetAMPAttributes(const std::string& tensor_id,
const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetAMPAttributes";
if (op_desc->Type() == "popart_matmul") {
auto amp = ipu_strategy_->available_memory_proportion;
if (amp > 0.0f && amp <= 1.0) {
builder_->setAvailableMemoryProportion(tensor_id, amp);
}
}
VLOG(10) << "leave Compiler::SetAMPAttributes";
}
void Compiler::SetSerializeAttributes(
const std::vector<std::string>& tensor_ids, const OpDesc* op_desc) {
VLOG(10) << "enter Compiler::SetSerializeAttributes";
auto tensor_ids_set =
std::set<std::string>(tensor_ids.begin(), tensor_ids.end());
if (op_desc->Type() == "popart_matmul") {
if (op_desc->HasAttr(sMatmulSerializeFactor)) {
auto factor =
BOOST_GET_CONST(int, op_desc->GetAttr(sMatmulSerializeFactor));
std::string mode = "output_channels";
if (op_desc->HasAttr(sMatmulSerializeMode)) {
mode = BOOST_GET_CONST(std::string,
op_desc->GetAttr(sMatmulSerializeMode));
}
builder_->setSerializeMatMul(tensor_ids_set, mode, (int64_t)factor, true);
}
}
VLOG(10) << "leave Compiler::SetSerializeAttributes";
}
void Compiler::SetSerializeAttributes(const std::string& tensor_id,
const OpDesc* op_desc) {
std::vector<std::string> tensor_ids = {tensor_id};
SetSerializeAttributes(tensor_ids, op_desc);
}
// convertFloatsToHalfs void Compiler::SetCustomOps(
void Compiler::ConvertProtoToFp16() { const std::vector<IpuCustomOpIdentifier>& custom_ops) {
for (auto x : custom_ops) {
custom_ops_.emplace(x.paddle_op, x);
}
}
std::string Compiler::GetFP16ModelProto() {
popart::GraphTransformer graph_transformer(builder_->getModelProto()); popart::GraphTransformer graph_transformer(builder_->getModelProto());
graph_transformer.convertFloatsToHalfs(); graph_transformer.convertFloatsToHalfs();
converted_proto_ = graph_transformer.getModelProto(); return graph_transformer.getModelProto();
} }
std::string Compiler::GetModelProto() { std::string Compiler::GetModelProto() {
if (converted_proto_.length()) { if (ipu_strategy_->enable_fp16) {
return converted_proto_; return GetFP16ModelProto();
} else {
return builder_->getModelProto();
} }
return builder_->getModelProto();
} }
void Compiler::SaveModelProto(const std::string& path) { void Compiler::SaveModelProto(const std::string& path) {
...@@ -366,12 +580,12 @@ void Compiler::SaveModelProtoNoCheck(const std::string& path) { ...@@ -366,12 +580,12 @@ void Compiler::SaveModelProtoNoCheck(const std::string& path) {
onnxfile.close(); onnxfile.close();
} }
std::vector<std::string> Compiler::GetOpInputs(const framework::OpDesc* op) { std::vector<std::string> Compiler::GetOpInputs(const OpDesc* op) {
auto ins = op->Input("__inputs__"); auto ins = op->Input("__inputs__");
std::vector<std::string> inputs; std::vector<std::string> inputs;
for (const auto& in : ins) { for (const auto& in : ins) {
if (tensors_.find(in) != tensors_.end()) { if (resources_->tensors.find(in) != resources_->tensors.end()) {
inputs.push_back(tensors_[in]); inputs.push_back(resources_->tensors[in]);
} else { } else {
inputs.push_back(in); inputs.push_back(in);
} }
...@@ -379,12 +593,11 @@ std::vector<std::string> Compiler::GetOpInputs(const framework::OpDesc* op) { ...@@ -379,12 +593,11 @@ std::vector<std::string> Compiler::GetOpInputs(const framework::OpDesc* op) {
return inputs; return inputs;
} }
const std::vector<std::string>& Compiler::GetOpOutputs( const std::vector<std::string>& Compiler::GetOpOutputs(const OpDesc* op) {
const framework::OpDesc* op) {
return op->Output("__outputs__"); return op->Output("__outputs__");
} }
popart::DebugContext Compiler::BuildDebugContext(const framework::OpDesc* op) { popart::DebugContext Compiler::BuildDebugContext(const OpDesc* op) {
auto op_identify_id = auto op_identify_id =
BOOST_GET_CONST(std::string, op->GetAttr(sOpIdentifyIdAttr)); BOOST_GET_CONST(std::string, op->GetAttr(sOpIdentifyIdAttr));
VLOG(10) << "op_identify_id of op: " << op->Type() << " is " VLOG(10) << "op_identify_id of op: " << op->Type() << " is "
......
...@@ -16,76 +16,119 @@ ...@@ -16,76 +16,119 @@
#include <popart/builder.hpp> #include <popart/builder.hpp>
#include <popart/graphtransformer.hpp> #include <popart/graphtransformer.hpp>
#include <popart/optimizer.hpp>
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/ipu/common.h" #include "paddle/fluid/platform/device/ipu/ipu_names.h"
#include "paddle/fluid/platform/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
struct CompilerResources {
// popart input tensor_ids
std::vector<popart::TensorId> inputs;
// popart output tensor_ids
std::vector<popart::TensorId> outputs;
// <paddle_var_name, popart_tensor_ids>
std::map<std::string, popart::TensorId> tensors;
// popart_weight_ids
std::vector<popart::TensorId> weights;
// popart loss tensor_id
popart::TensorId loss_var;
// paddle lr var_name
std::string lr_var;
// lr value
float lr;
// flag for lr is constant or scheduling
bool with_lr_sched = false;
// paddle optimizer type, eg: momentum, lamb
std::string optimizer_type;
using OptimizerFn =
std::function<std::unique_ptr<popart::Optimizer>(float lr)>;
OptimizerFn optimizer_fn;
public:
popart::Optimizer *Optimizer() { return optimizer.get(); }
popart::Optimizer *NewOptimizer() {
optimizer = optimizer_fn(lr);
return optimizer.get();
}
popart::Optimizer *UpdateOptimizer(float lr_new) {
optimizer = optimizer_fn(lr_new);
return optimizer.get();
}
private:
std::unique_ptr<popart::Optimizer> optimizer;
};
class Compiler { class Compiler {
public: public:
Compiler(); Compiler();
~Compiler(); ~Compiler();
void RegisterOpFunc(); void RegisterOpFunc();
void LowerBody(const framework::ir::Graph *graph); void Prepare();
void InitInputs(framework::ir::Graph *graph, void LowerBody(const Graph *graph);
const std::vector<std::string> &feed_list); void InitInputs(Graph *graph, const std::vector<std::string> &feed_list);
void InitOutputs(const std::vector<std::string> &fetch_list); void InitOutputs(const std::vector<std::string> &fetch_list);
void LowerWeights(const framework::ir::Graph *graph, void LowerConstants(const Graph *graph, const Scope *scope);
const framework::Scope *scope_); void LowerWeights(const Graph *graph, const Scope *scope);
void LowerOptimier(const Graph *graph, const Scope *scope);
void InsertTensors(const std::vector<std::string> &output_names, void InsertTensors(const std::vector<std::string> &output_names,
const std::vector<std::string> &tensor_ids); const std::vector<std::string> &tensor_ids);
void InsertTensors(const std::vector<std::string> &output_names, void InsertTensors(const std::vector<std::string> &output_names,
const std::string &tensor_id); const std::string &tensor_id);
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids, void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const framework::OpDesc *op_desc); const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
const framework::OpDesc *op_desc); void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
std::vector<popart::TensorId> GetInputs() { return inputs_; } void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
std::vector<popart::TensorId> GetOutputs() { return outputs_; } void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
std::map<std::string, popart::TensorId> GetTensors() { return tensors_; } const OpDesc *op_desc);
std::vector<popart::TensorId> &GetWeights(); void SetSerializeAttributes(const std::string &tensor_id,
const OpDesc *op_desc);
std::string GetModelProto();
void SetIpuStrategy(const IpuStrategy &strategy) { void SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy; ipu_strategy_ = &strategy;
}; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
CompilerResources *GetResources() { return resources_.get(); }
std::string GetModelProto();
std::string GetFP16ModelProto();
void SaveModelProto(const std::string &path); void SaveModelProto(const std::string &path);
void SaveModelProtoNoCheck(const std::string &path); void SaveModelProtoNoCheck(const std::string &path);
void ConvertProtoToFp16();
private: private:
std::vector<std::string> GetOpInputs(const framework::OpDesc *op); std::vector<std::string> GetOpInputs(const OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const framework::OpDesc *op); const std::vector<std::string> &GetOpOutputs(const OpDesc *op);
popart::DebugContext BuildDebugContext(const framework::OpDesc *op); popart::DebugContext BuildDebugContext(const OpDesc *op);
private: private:
std::unique_ptr<popart::Builder> builder_; std::unique_ptr<popart::Builder> builder_;
std::unique_ptr<CompilerResources> resources_;
using OpFunc = std::function<void(framework::OpDesc *op_desc)>; using OpFunc = std::function<void(OpDesc *op_desc)>;
std::unordered_map<std::string, OpFunc> name_function_; std::unordered_map<std::string, OpFunc> name_function_;
// stateful variable
std::map<std::string, popart::TensorId> tensors_;
// feed_list_ & fetch_list save paddle tensor id // feed_list_ & fetch_list save paddle tensor id
std::vector<std::string> feed_list_; std::vector<std::string> feed_list_;
std::vector<std::string> fetch_list_; std::vector<std::string> fetch_list_;
// inputs_ & outputs_ save popart tensor id
std::vector<popart::TensorId> inputs_;
std::vector<popart::TensorId> outputs_;
// weights info map
std::vector<popart::TensorId> weights_;
std::string converted_proto_ = "";
const IpuStrategy *ipu_strategy_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr;
std::map<std::string, IpuCustomOpIdentifier> custom_ops_;
}; };
} // namespace ipu } // namespace ipu
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -12,26 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,26 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/device.h" #include "paddle/fluid/platform/device/ipu/ipu_device.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
Device::Device(const popart::DeviceInfo& device_info) int GetNumDevices() {
: id_(device_info.getId()), is_attached_(device_info.isAttached()) { bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
popart::DeviceType popart_device_type = device_info.getType(); if (ipu_model) {
switch (popart_device_type) { return 1;
case popart::DeviceType::IpuModel:
device_type_ = DeviceType::IpuModel;
break;
case popart::DeviceType::Ipu:
device_type_ = DeviceType::Ipu;
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"popart::DeviceType:Unsupported type %d", popart_device_type));
} }
int num_devices =
popart::DeviceManager::createDeviceManager().enumerateDevices().size();
PADDLE_ENFORCE_GT(num_devices, 0, platform::errors::Unavailable(
"Do not found any IPU devices, please "
"make sure Poplar sdk is enabled"));
return num_devices;
}
std::vector<int> GetDeviceIds() {
bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (ipu_model) {
return {0};
}
std::vector<int> device_ids;
auto devices =
popart::DeviceManager::createDeviceManager().enumerateDevices();
PADDLE_ENFORCE_GT(
devices.size(), 0,
platform::errors::Unavailable("Do not found any IPU devices, please make "
"sure Poplar sdk is enabled."));
for (auto device : devices) {
device_ids.push_back(device->getId());
}
return device_ids;
} }
} // namespace ipu } // namespace ipu
......
...@@ -21,23 +21,11 @@ namespace paddle { ...@@ -21,23 +21,11 @@ namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
enum class DeviceType { IpuModel = 0, Cpu, Ipu, OfflineIpu, Sim }; // get the number of all avaliable IPUs
int GetNumDevices();
class Device {
public: // get the device id of all avaliable IPUs
Device() {} std::vector<int> GetDeviceIds();
explicit Device(const popart::DeviceInfo& device_info);
int getId() const { return id_; }
bool isAttached() const { return is_attached_; }
DeviceType getType() const { return device_type_; }
private:
int id_;
bool is_attached_;
DeviceType device_type_;
/* TODO:: Add more elements in the future */
};
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
......
...@@ -10,23 +10,18 @@ See the License for the specific language governing permissions and ...@@ -10,23 +10,18 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h" #include "paddle/fluid/platform/device/ipu/ipu_info.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h" #include "paddle/fluid/platform/device/ipu/ipu_device.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
//! Get a list of device ids from environment variable or use all. //! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() { std::vector<int> GetSelectedIPUDevices() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend = return platform::ipu::GetDeviceIds();
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetDeviceIds();
} }
//! Get the total number of IPU devices in system. //! Get the total number of IPU devices in system.
int GetIPUDeviceCount() { int GetIPUDeviceCount() { return platform::ipu::GetNumDevices(); }
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetNumDevices();
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -17,8 +17,10 @@ limitations under the License. */ ...@@ -17,8 +17,10 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
std::vector<int> GetSelectedIPUDevices(); std::vector<int> GetSelectedIPUDevices();
int GetIPUDeviceCount(); int GetIPUDeviceCount();
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
#endif #endif
...@@ -22,6 +22,8 @@ namespace ipu { ...@@ -22,6 +22,8 @@ namespace ipu {
static constexpr const char *sIpuIndexAttr = "ipu_index"; static constexpr const char *sIpuIndexAttr = "ipu_index";
static constexpr const char *sIpuStageAttr = "ipu_stage"; static constexpr const char *sIpuStageAttr = "ipu_stage";
static constexpr const char *sMatmulSerializeFactor = "serialize_factor";
static constexpr const char *sMatmulSerializeMode = "serialize_mode";
static constexpr const char *sOpIdentifyIdAttr = "op_identify_id"; static constexpr const char *sOpIdentifyIdAttr = "op_identify_id";
static constexpr const char *sDebugInfoId = "__debug_info_id"; static constexpr const char *sDebugInfoId = "__debug_info_id";
...@@ -29,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1"; ...@@ -29,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1";
static constexpr const char *sBeta2 = "beta2"; static constexpr const char *sBeta2 = "beta2";
static constexpr const char *sBeta1Pow = "Beta1Pow"; static constexpr const char *sBeta1Pow = "Beta1Pow";
static constexpr const char *sBeta2Pow = "Beta2Pow"; static constexpr const char *sBeta2Pow = "Beta2Pow";
static constexpr const char *sLossScaling = "LossScaling";
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
......
...@@ -12,10 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,10 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include <glog/logging.h>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu {} // namespace ipu namespace ipu {
void IpuStrategy::enablePattern(const std::string& t) {
VLOG(10) << "enable popart pattern: " << t;
popart_patterns.enablePattern(t, true);
}
void IpuStrategy::disablePattern(const std::string& t) {
VLOG(10) << "disable popart pattern: " << t;
popart_patterns.enablePattern(t, false);
}
const bool IpuStrategy::isPatternEnabled(const std::string& t) {
return popart_patterns.isPatternEnabled(t);
}
} // namespace ipu
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -14,24 +14,86 @@ limitations under the License. */ ...@@ -14,24 +14,86 @@ limitations under the License. */
#pragma once #pragma once
#include <popart/op.hpp>
#include <popart/sessionoptions.hpp> #include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp>
#include "popart/patterns/patterns.hpp"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
using VirtualGraphMode = popart::VirtualGraphMode; using VirtualGraphMode = popart::VirtualGraphMode;
using RecomputationType = popart::RecomputationType;
struct IpuStrategy { struct IpuStrategy {
IpuStrategy() {
// we always save optimizer state to OffChip and enable rts for saving
// memory
auto storage = popart::TensorLocation(popart::TensorStorage::OffChip,
popart::ReplicatedTensorSharding::On);
popart_options.optimizerStateTensorLocationSettings =
popart::TensorLocationSettings(storage);
// We divide the accumulationFactor and replicatedGraphCount after all
// reduce
popart_options.accumulationAndReplicationReductionType =
popart::ReductionType::Mean;
popart_options.meanAccumulationAndReplicationReductionStrategy =
popart::MeanReductionStrategy::Post;
popart_options.enableFloatingPointChecks = false;
// A directory for log traces to be written into.
popart_options.logDir = "popart_log";
}
~IpuStrategy() {}
// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1; int num_ipus = 1;
// batches per step
int batches_per_step = 1; int batches_per_step = 1;
int batch_size = 1;
// micro batch-size
int micro_batch_size = 1;
// training flag, true for training
bool is_training = true; bool is_training = true;
// save the onnx model lowered by paddle program description
bool save_init_onnx = false; bool save_init_onnx = false;
bool save_last_onnx = true;
popart::SessionOptions popart_options_; // save the trained model
bool save_onnx_checkpoint = false;
// save paddle model per n steps
int save_per_n_step = 1;
// average sharding, debugging used
bool need_avg_shard = false; bool need_avg_shard = false;
// flag for fp16, true for pure fp16
bool enable_fp16 = false; bool enable_fp16 = false;
// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;
// loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f;
// defaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f;
// popart session option
popart::SessionOptions popart_options;
popart::Patterns popart_patterns;
public:
void enablePattern(const std::string& t);
void disablePattern(const std::string& t);
const bool isPatternEnabled(const std::string& t);
}; };
} // namespace ipu } // namespace ipu
......
...@@ -12,22 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,22 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_utils.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include <cmath>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
void* PaddleIArray::data() { return tensor_->data(); } void* PaddleIArray::data() { return tensor_.data(); }
popart::DataType PaddleIArray::dataType() const { popart::DataType PaddleIArray::dataType() const {
return VarType2PopartType(tensor_->type()); return VarType2PopartType(tensor_.type());
} }
std::size_t PaddleIArray::rank() const { return tensor_->dims().size(); } std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); }
int64_t PaddleIArray::dim(size_t index) const { int64_t PaddleIArray::dim(size_t index) const {
return tensor_->dims().at(index); return tensor_.dims().at(index);
} }
std::size_t PaddleIArray::nelms() const { std::size_t PaddleIArray::nelms() const {
...@@ -150,6 +151,32 @@ bool GetBoolEnv(std::string str) { ...@@ -150,6 +151,32 @@ bool GetBoolEnv(std::string str) {
} }
} }
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type) {
// format: {popart_tensor_id, paddle_tensor_id}, ...
std::vector<std::pair<std::string, std::string>> pre_post_fix;
if (opt_type == "adam" || opt_type == "lamb") {
pre_post_fix.push_back(std::make_pair("", ""));
pre_post_fix.push_back(std::make_pair("Accl1___", "_moment1_0"));
pre_post_fix.push_back(std::make_pair("Accl2___", "_moment2_0"));
pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
} else if (opt_type == "sgd" || opt_type == "momentum") {
// sgd
pre_post_fix.push_back(std::make_pair("", ""));
} else {
pre_post_fix.push_back(std::make_pair("", ""));
//
}
return pre_post_fix;
}
int RequestIpus(const int num_ipus) {
// num_ipus must be pow(2, n);
return std::pow(2, ceil(log2(num_ipus)));
}
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -17,14 +17,27 @@ limitations under the License. */ ...@@ -17,14 +17,27 @@ limitations under the License. */
#include <popart/ndarraywrapper.hpp> #include <popart/ndarraywrapper.hpp>
#include <popart/tensordata.hpp> #include <popart/tensordata.hpp>
#include <popart/tensorinfo.hpp> #include <popart/tensorinfo.hpp>
#include <popart/vendored/any.hpp>
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
using float16 = platform::float16;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using Scope = framework::Scope;
using OpDesc = framework::OpDesc;
using Graph = framework::ir::Graph;
using Node = framework::ir::Node;
using BlockDesc = framework::BlockDesc;
// onnx dtype // onnx dtype
// https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3 // https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3
enum ONNXDataType : int { enum ONNXDataType : int {
...@@ -49,14 +62,15 @@ enum ONNXDataType : int { ...@@ -49,14 +62,15 @@ enum ONNXDataType : int {
class PaddleIArray final : public popart::IArray { class PaddleIArray final : public popart::IArray {
public: public:
explicit PaddleIArray(framework::Tensor *tensor) : tensor_(tensor) { explicit PaddleIArray(const Tensor* tensor) {
tensor_.ShareDataWith(*tensor);
for (int i = 0; i < tensor->dims().size(); ++i) { for (int i = 0; i < tensor->dims().size(); ++i) {
shape_.push_back(tensor->dims().at(i)); shape_.push_back(tensor->dims().at(i));
} }
} }
public: public:
void *data(); void* data();
popart::DataType dataType() const; popart::DataType dataType() const;
std::size_t rank() const; std::size_t rank() const;
int64_t dim(size_t index) const; int64_t dim(size_t index) const;
...@@ -64,7 +78,7 @@ class PaddleIArray final : public popart::IArray { ...@@ -64,7 +78,7 @@ class PaddleIArray final : public popart::IArray {
const popart::Shape shape() const; const popart::Shape shape() const;
private: private:
framework::Tensor *tensor_; Tensor tensor_;
std::vector<int64_t> shape_; std::vector<int64_t> shape_;
}; };
...@@ -74,8 +88,7 @@ popart::DataType OnnxDtype2PopartType(const int type); ...@@ -74,8 +88,7 @@ popart::DataType OnnxDtype2PopartType(const int type);
bool GetBoolEnv(std::string str); bool GetBoolEnv(std::string str);
template <typename T> template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray( std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) {
const framework::Tensor &tensor) {
auto dtype = VarType2PopartType(tensor.type()); auto dtype = VarType2PopartType(tensor.type());
auto shape = std::vector<int64_t>(); auto shape = std::vector<int64_t>();
for (size_t i = 0; i < tensor.dims().size(); ++i) { for (size_t i = 0; i < tensor.dims().size(); ++i) {
...@@ -84,18 +97,140 @@ std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray( ...@@ -84,18 +97,140 @@ std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(
popart::TensorInfo tensor_info(dtype, shape); popart::TensorInfo tensor_info(dtype, shape);
return std::make_unique<popart::NDArrayWrapper<T>>( return std::make_unique<popart::NDArrayWrapper<T>>(
reinterpret_cast<T *>(tensor.data()), tensor_info); reinterpret_cast<T*>(tensor.data()), tensor_info);
} }
template <typename T> template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> LoDTensor2IArray( std::unique_ptr<popart::NDArrayWrapper<T>> LoDTensor2IArray(
framework::LoDTensor const &lod_tensor) { LoDTensor const& lod_tensor) {
PADDLE_ENFORCE_EQ( if (lod_tensor.lod().size() == 0) {
lod_tensor.lod().size(), 0UL, return Tensor2IArray<T>(lod_tensor);
platform::errors::InvalidArgument("LoDTensor2IArray is Unimplemented")); } else {
return Tensor2IArray<T>(lod_tensor); PADDLE_THROW(
platform::errors::Unimplemented("LoDTensor2IArray is Unimplemented"));
}
}
template <typename T>
T GetSingleVarFromScope(const Scope* scope, const std::string& var_name) {
auto var = scope->GetVar(var_name);
auto tensor = var->Get<framework::LoDTensor>();
// check dtype is ?
return tensor.data<T>()[0];
} }
struct CustomOpAttrVisitor : public boost::static_visitor<void> {
explicit CustomOpAttrVisitor(std::map<std::string, popart::any>* attr,
const std::string& attr_name)
: attrs_(attr), attr_name_(attr_name) {}
mutable std::map<std::string, popart::any>* attrs_;
std::string attr_name_;
void operator()(int v) const { attrs_->emplace(attr_name_, v); }
void operator()(float v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::string& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<int>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<float>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<std::string>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(bool v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<bool>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(BlockDesc* desc) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(const std::vector<BlockDesc*>& v) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `BlockDesc` type."));
}
void operator()(int64_t v) const { attrs_->emplace(attr_name_, v); }
void operator()(const std::vector<int64_t>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(const std::vector<double>& v) const {
attrs_->emplace(attr_name_, v);
}
void operator()(boost::blank) const {
PADDLE_THROW(platform::errors::Unavailable(
"Unsupported calling method for `boost::blank` type."));
}
};
struct IpuCustomOpIdentifier {
IpuCustomOpIdentifier(const std::string& _paddle_op,
const std::string& _popart_op,
const std::string& _domain, unsigned int _version)
: paddle_op(_paddle_op), popart_op(_domain, _popart_op, _version) {}
std::string repr() {
std::ostringstream os;
os << "paddle_op: " << paddle_op << ", domain: " << popart_op.domain
<< ", type: " << popart_op.type << ", version: " << popart_op.version;
return os.str();
}
std::string paddle_op;
popart::OperatorIdentifier popart_op;
};
struct ConstantOpAttrVisitor : public boost::static_visitor<void> {
explicit ConstantOpAttrVisitor(framework::LoDTensor* tensor,
framework::proto::VarType::Type dtype)
: tensor_(tensor), dtype_(dtype) {}
framework::LoDTensor* tensor_;
framework::proto::VarType::Type dtype_;
void operator()(const std::vector<int>& vec) const {
framework::TensorFromVector<int>(vec, tensor_);
}
void operator()(const std::vector<float>& vec) const {
if (dtype_ == framework::proto::VarType::FP16) {
std::vector<float16> vec_fp16;
std::transform(vec.begin(), vec.end(), std::back_inserter(vec_fp16),
[](float f) -> float16 { return float16(f); });
framework::TensorFromVector<float16>(vec_fp16, tensor_);
} else {
framework::TensorFromVector<float>(vec, tensor_);
}
}
void operator()(const std::vector<bool>& vec) const {
framework::TensorFromVector<bool>(vec, tensor_);
}
void operator()(const std::vector<int64_t>& vec) const {
framework::TensorFromVector<int64_t>(vec, tensor_);
}
void operator()(const std::vector<double>& vec) const {
framework::TensorFromVector<double>(vec, tensor_);
}
void RaiseError() const {
PADDLE_THROW(
platform::errors::InvalidArgument("Constant value must be a vector"));
}
void operator()(int v) const { RaiseError(); }
void operator()(float v) const { RaiseError(); }
void operator()(const std::string& v) const { RaiseError(); }
void operator()(const std::vector<std::string>& v) const { RaiseError(); }
void operator()(bool v) const { RaiseError(); }
void operator()(BlockDesc* desc) const { RaiseError(); }
void operator()(const std::vector<BlockDesc*>& v) const { RaiseError(); }
void operator()(int64_t v) const { RaiseError(); }
void operator()(boost::blank) const { RaiseError(); }
};
std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
const std::string& opt_type);
int RequestIpus(const int num_ipus);
} // namespace ipu } // namespace ipu
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// clang-format off
#pragma once
OP_DECL(popart_nllloss_v2, aiGraphcoreOpset.nllloss, SIG_ARG(INT32,popart::ReductionType,reduction) OPT_ARG(INT32,ignoreIndex) ARG(BOOL,inputIsLogProbability) ) // NOLINT
// clang-format on
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册