diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index 012294d0fff8565a2e7ea85fc8cfe6b170fd5e8d..0871624a5d749fe5042d4b4239b42ab4f25f7967 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -13,12 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_backend.h" -#include "paddle/fluid/platform/device/ipu/ipu_utils.h" -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/node.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/device/ipu/ipu_compiler.h" +#include "paddle/fluid/platform/device/ipu/ipu_executor.h" namespace paddle { namespace platform { @@ -40,7 +38,7 @@ IpuBackend::~IpuBackend() { executor_.reset(); } -void IpuBackend::Compile(Graph* graph, +void IpuBackend::Compile(framework::ir::Graph* graph, const std::vector& feed_list, const std::vector& fetch_list) { VLOG(10) << "enter IpuBackend::Compile"; @@ -63,8 +61,8 @@ void IpuBackend::Compile(Graph* graph, VLOG(10) << "leave IpuBackend::Compile"; } -void IpuBackend::Run(const std::vector& inputs, - const std::vector& outputs, +void IpuBackend::Run(const std::vector& inputs, + const std::vector& outputs, const framework::ExecutionContext& ctx) { timer_->Start(); executor_->Run(inputs, outputs, ctx); @@ -82,7 +80,7 @@ void IpuBackend::Reset() { executor_.reset(); } -void IpuBackend::SetScope(const Scope& scope) { +void IpuBackend::SetScope(const framework::Scope& scope) { scope_ = &scope; executor_->SetScope(&scope); } diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.h b/paddle/fluid/platform/device/ipu/ipu_backend.h index 0578d9face675ab2754eed213b1044ed8b2cd707..1e083e7a3518c920fbf93d815645707ed0e38fe6 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.h +++ b/paddle/fluid/platform/device/ipu/ipu_backend.h @@ -18,26 +18,25 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/device/ipu/ipu_compiler.h" -#include "paddle/fluid/platform/device/ipu/ipu_device.h" -#include "paddle/fluid/platform/device/ipu/ipu_executor.h" #include "paddle/fluid/platform/device/ipu/ipu_strategy.h" -#include "paddle/fluid/platform/device/ipu/ipu_utils.h" -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/timer.h" +namespace paddle { +namespace framework { +class ExecutionContext; +} // namespace framework +} // namespace paddle + namespace paddle { namespace platform { namespace ipu { -// IpuBackend is the center of paddle-ipu, its function include: -// 1. Compile paddle model to popart model -// 2. Run popart model, inference or training -// 3. Request and release device -// 4. Other helper function +class IpuStrategy; +class Compiler; +class Executor; + class IpuBackend { public: static IpuBackend *GetInstance(); @@ -46,47 +45,46 @@ class IpuBackend { IpuBackend(); ~IpuBackend(); - // what compile does include(call compiler_): - // 1. map paddle-op -> poart op - // 2. construct popart onnx compute graph - void Compile(Graph *graph, const std::vector &feed_list, + // What compile method does: + // Convert paddle ops to popart ops; + // Construct a popart graph, which is a onnx compute graph; + // Load the graph and weights to ipu. + void Compile(framework::ir::Graph *graph, + const std::vector &feed_list, const std::vector &fetch_list); - // what run does include: - // 1. construct forward onnx graph - // 2. graph-level optimization - // 3. autodiff - void Run(const std::vector &inputs, - const std::vector &outputs, + // Run the compiled graph on ipu + void Run(const std::vector &inputs, + const std::vector &outputs, const framework::ExecutionContext &ctx); // Sync weights from IPU while training void WeightsToHost(); - // detach IPU manually + // Detach IPU manually void Detach(); - // reset manually - // call it before destruct works + // Reset manually + // Call it before destruct works void Reset(); - void SetScope(const Scope &scope); - const Scope *GetScope() { return scope_; } + void SetScope(const framework::Scope &scope); + const framework::Scope *GetScope() { return scope_; } void SetIpuStrategy(const IpuStrategy &strategy); const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; } - // save compiled model to onnx + // Save compiled model to onnx void SaveModelProto(const std::string &path); private: - // not own - const Scope *scope_ = nullptr; + // Not own + const framework::Scope *scope_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr; - // own + // Own std::unique_ptr compiler_; std::unique_ptr executor_; - std::unique_ptr timer_; + std::unique_ptr timer_; bool is_compiled_ = false; diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index 463803dd03f1059f8f6918a7e3972206a0da954e..f2a37aae369ec2f2c964995d1d8d9e9ba75f8ab6 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -20,12 +20,110 @@ #include #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/platform/device/ipu/ipu_names.h" +#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h" namespace paddle { namespace platform { namespace ipu { +namespace { + +struct CustomOpAttrVisitor : public boost::static_visitor { + CustomOpAttrVisitor(std::map* attr, + const std::string& attr_name) + : attrs_(attr), attr_name_(attr_name) {} + + mutable std::map* attrs_; + std::string attr_name_; + + void operator()(int v) const { attrs_->emplace(attr_name_, v); } + void operator()(float v) const { attrs_->emplace(attr_name_, v); } + void operator()(const std::string& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(const std::vector& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(const std::vector& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(const std::vector& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(bool v) const { attrs_->emplace(attr_name_, v); } + void operator()(const std::vector& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(BlockDesc* desc) const { + PADDLE_THROW(platform::errors::Unavailable( + "Unsupported calling method for `BlockDesc` type when extracting " + "custom operator attributes.")); + } + void operator()(const std::vector& v) const { + PADDLE_THROW(platform::errors::Unavailable( + "Unsupported calling method for `BlockDesc` type when extracting " + "custom operator attributes.")); + } + void operator()(int64_t v) const { attrs_->emplace(attr_name_, v); } + void operator()(const std::vector& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(const std::vector& v) const { + attrs_->emplace(attr_name_, v); + } + void operator()(boost::blank) const { + PADDLE_THROW(platform::errors::Unavailable( + "Unsupported calling method for `boost::blank` type when extracting " + "custom operator attributes.")); + } +}; + +struct ConstantOpAttrVisitor : public boost::static_visitor { + ConstantOpAttrVisitor(framework::LoDTensor* tensor, VarType::Type dtype) + : tensor_(tensor), dtype_(dtype) {} + + framework::LoDTensor* tensor_; + VarType::Type dtype_; + + void operator()(const std::vector& vec) const { + framework::TensorFromVector(vec, tensor_); + } + void operator()(const std::vector& vec) const { + if (dtype_ == VarType::FP16) { + std::vector vec_fp16; + std::transform(vec.begin(), vec.end(), std::back_inserter(vec_fp16), + [](float f) -> float16 { return float16(f); }); + framework::TensorFromVector(vec_fp16, tensor_); + } else { + framework::TensorFromVector(vec, tensor_); + } + } + void operator()(const std::vector& vec) const { + framework::TensorFromVector(vec, tensor_); + } + void operator()(const std::vector& vec) const { + framework::TensorFromVector(vec, tensor_); + } + void operator()(const std::vector& vec) const { + framework::TensorFromVector(vec, tensor_); + } +#define RAISE_ERROR \ + PADDLE_THROW( \ + platform::errors::InvalidArgument("Constant value must be a vector")) + void operator()(int v) const { RAISE_ERROR; } + void operator()(float v) const { RAISE_ERROR; } + void operator()(const std::string& v) const { RAISE_ERROR; } + void operator()(const std::vector& v) const { RAISE_ERROR; } + void operator()(bool v) const { RAISE_ERROR; } + void operator()(BlockDesc* desc) const { RAISE_ERROR; } + void operator()(const std::vector& v) const { RAISE_ERROR; } + void operator()(int64_t v) const { RAISE_ERROR; } + void operator()(boost::blank) const { RAISE_ERROR; } +#undef RAISE_ERROR +}; + popart::AdamMode AdamModeFromStr(const std::string& str, const bool& use_no_bias_optimizer) { if (str == "adam") { @@ -117,6 +215,34 @@ TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) { } } +// Helper for adding namescope info +struct NameScopeHelper { + NameScopeHelper(const OpDesc* op, popart::Builder* builder); + + ~NameScopeHelper() { + if (pushed_) { + builder_->popNameScope(); + } + } + + bool pushed_ = false; + popart::Builder* builder_; +}; + +NameScopeHelper::NameScopeHelper(const OpDesc* op, popart::Builder* builder) + : builder_(builder) { + auto op_namescope = BOOST_GET_CONST(std::string, op->GetAttr(sOpNamescope)); + if (op_namescope.empty() || op_namescope == "/") { + return; + } + op_namescope.pop_back(); + op_namescope.erase(op_namescope.begin()); + builder->pushNameScope(op_namescope); + pushed_ = true; +} + +} // namespace + GraphHelper::GraphHelper(const Graph* g) { graph = g; sorted_ops = framework::ir::TopologySortOperations(*g); @@ -181,14 +307,12 @@ void Compiler::RegisterOpFunc() { auto op_type = op_desc->Type(); \ VLOG(10) << "build op:" << op_type << " args " << #Args; \ auto inputs = GetOpInputs(op_desc); \ - auto output_names = GetOpOutputs(op_desc); \ auto debug_context = BuildDebugContext(op_desc); \ auto aiGraphcoreOpset = builder_->aiGraphcoreOpset1(); \ auto aiOnnxOpset = builder_->aiOnnxOpset11(); \ NameScopeHelper ns_helper(op_desc, builder_.get()); \ auto output_ids = OnnxImpl(inputs Args, debug_context); \ PostLower(output_ids, op_desc); \ - InsertTensors(output_names, output_ids); \ }}, // NOLINT #include "paddle/fluid/platform/device/ipu/supported_ops_autogen.h" #include "paddle/fluid/platform/device/ipu/supported_ops_custom.h" @@ -219,7 +343,7 @@ void Compiler::InitInputs(const std::vector& feed_list) { auto* node = graph_helper_->vars_name_map[feed_name]; auto* var_desc = node->Var(); VLOG(10) << "feed_name= " << var_desc->Name(); - auto data_type = VarType2PopartType(var_desc->GetDataType()); + auto data_type = VarType2PopartDType(var_desc->GetDataType()); popart::TensorInfo input_info{data_type, var_desc->GetShape()}; VLOG(10) << "popart input_info = " << input_info; popart::TensorId tensor_id = @@ -255,8 +379,9 @@ void Compiler::LowerConstants(const Scope* scope) { auto shape = BOOST_GET_CONST(std::vector, op_desc->GetAttr("dims")); auto dtype_ = BOOST_GET_CONST(int, op_desc->GetAttr("dtype")); - auto dtype = PopartType2VarType(OnnxDtype2PopartType(dtype_)); - auto tensor_name = op_desc->Output("__outputs__")[0]; + auto dtype = PopartDType2VarType( + OnnxDType2PopartType(static_cast(dtype_))); + auto tensor_name = GetOpOutputs(op_desc).front(); auto* var = kid_scope.Var(tensor_name); VLOG(10) << "lowering constant: " << tensor_name; auto* tensor = var->GetMutable(); @@ -267,7 +392,7 @@ void Compiler::LowerConstants(const Scope* scope) { tensor->Resize(ddim); auto const_data = std::unique_ptr(); - popart::TensorInfo tensor_info(PdDataType2PopartType(tensor->dtype()), + popart::TensorInfo tensor_info(PhiDType2PopartDType(tensor->dtype()), shape); const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info)); NameScopeHelper ns_helper(op_desc, builder_.get()); @@ -303,7 +428,7 @@ void Compiler::LowerWeights(const Scope* scope) { var, platform::errors::NotFound("Tensor %s is not found in the scope", var_name)); auto tensor = var->Get(); - auto dtype = PdDataType2PopartType(tensor.dtype()); + auto dtype = PhiDType2PopartDType(tensor.dtype()); auto shape = std::vector(); for (size_t i = 0; i < tensor.dims().size(); ++i) { shape.push_back(tensor.dims().at(i)); @@ -336,11 +461,9 @@ void Compiler::LowerBody() { // pass } else if (op_type == "popart_checkpointoutput") { auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); NameScopeHelper ns_helper(op_desc, builder_.get()); auto output_ids = builder_->checkpointOutput(inputs); PostLower(output_ids, op_desc); - InsertTensors(outputs, output_ids); } else if (op_type == "popart_custom_op") { auto inputs = GetOpInputs(op_desc); auto outputs = GetOpOutputs(op_desc); @@ -359,10 +482,8 @@ void Compiler::LowerBody() { builder_->customOp(it->second.popart_op, it->second.popart_op.version, inputs, outputs.size(), attributes, debug_context); PostLower(output_ids, op_desc); - InsertTensors(outputs, output_ids); } else if (op_type == "popart_printtensor") { auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); auto debug_context = BuildDebugContext(op_desc); auto print_gradient = BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient")); @@ -371,7 +492,6 @@ void Compiler::LowerBody() { auto output_ids = builder_->aiGraphcoreOpset1().printtensor( inputs, print_gradient, debug_context, title); PostLower(output_ids, op_desc); - InsertTensors(outputs, output_ids); } else { auto itr = name_function_.find(op_type); if (itr != name_function_.end()) { @@ -601,23 +721,6 @@ void Compiler::LowerOptimizer(const Scope* scope) { } } -void Compiler::InsertTensors(const std::vector& output_names, - const std::vector& tensor_ids) { - PADDLE_ENFORCE_EQ(output_names.size(), tensor_ids.size(), - platform::errors::Fatal("InsertTensors size mismatch")); - for (int i = 0; i < tensor_ids.size(); i++) { - std::string tensor_id = tensor_ids[i]; - resources_->tensors.emplace(output_names[i], tensor_ids[i]); - } -} - -void Compiler::InsertTensors(const std::vector& output_names, - const std::string& tensor_id) { - PADDLE_ENFORCE_EQ(output_names.size(), 1, - platform::errors::Fatal("InsertTensors size mismatch")); - resources_->tensors.emplace(output_names[0], tensor_id); -} - void Compiler::PostLower(const std::vector& tensor_ids, const OpDesc* op_desc) { // Set pipline @@ -637,13 +740,26 @@ void Compiler::PostLower(const std::vector& tensor_ids, << " for op: " << op_desc->Type(); } } - + // Record output tensors + auto pd_outs = GetOpOutputs(op_desc); + PADDLE_ENFORCE_EQ( + pd_outs.size(), tensor_ids.size(), + platform::errors::Fatal("paddle and popart op have different outputs")); + for (int i = 0; i < tensor_ids.size(); ++i) { + resources_->tensors.emplace(pd_outs[i], tensor_ids[i]); + } for (auto& tensor_id : tensor_ids) { PostLower(tensor_id, op_desc, true); } } void Compiler::PostLower(const std::string& tensor_id, const OpDesc* op_desc) { + // Record output tensor + auto pd_outs = GetOpOutputs(op_desc); + PADDLE_ENFORCE_EQ( + pd_outs.size(), 1, + platform::errors::Fatal("paddle and popart op have different outputs")); + resources_->tensors.emplace(pd_outs[0], tensor_id); PostLower(tensor_id, op_desc, false); } @@ -718,13 +834,7 @@ std::string Compiler::GetFP16ModelProto() { return graph_transformer.getModelProto(); } -std::string Compiler::GetModelProto() { - if (ipu_strategy_->enable_fp16) { - return GetFP16ModelProto(); - } else { - return builder_->getModelProto(); - } -} +std::string Compiler::GetModelProto() { return builder_->getModelProto(); } void Compiler::SaveModelProto(const std::string& path) { builder_->saveModelProto(path); diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.h b/paddle/fluid/platform/device/ipu/ipu_compiler.h index bf00a453881b73dcac01851945f12c47764736f4..6f4e602af82df0a2af217b0f40f8815ee07886a2 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.h +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.h @@ -17,16 +17,15 @@ #include #include #include -#include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device/ipu/ipu_names.h" -#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" + #include "paddle/fluid/platform/device/ipu/ipu_utils.h" namespace paddle { namespace platform { namespace ipu { +class IpuStrategy; + struct CompilerResources { // popart input tensor_ids std::vector inputs; @@ -81,30 +80,6 @@ struct GraphHelper { std::vector sorted_vars_id; }; -// Helper for adding namescope info -struct NameScopeHelper { - NameScopeHelper(const OpDesc *op, popart::Builder *builder) - : builder_(builder) { - auto op_namescope = BOOST_GET_CONST(std::string, op->GetAttr(sOpNamescope)); - if (op_namescope.empty() || op_namescope == "/") { - return; - } - op_namescope.pop_back(); - op_namescope.erase(op_namescope.begin()); - builder->pushNameScope(op_namescope); - pushed_ = true; - } - - ~NameScopeHelper() { - if (pushed_) { - builder_->popNameScope(); - } - } - - bool pushed_ = false; - popart::Builder *builder_; -}; - class Compiler { public: Compiler(); @@ -138,11 +113,6 @@ class Compiler { const std::vector &GetOpOutputs(const OpDesc *op); const std::string GetNameScope(const OpDesc *op); popart::DebugContext BuildDebugContext(const OpDesc *op); - - void InsertTensors(const std::vector &output_names, - const std::vector &tensor_ids); - void InsertTensors(const std::vector &output_names, - const std::string &tensor_id); void PostLower(const std::vector &, const OpDesc *); void PostLower(const std::string &, const OpDesc *); void PostLower(const std::string &, const OpDesc *, bool); diff --git a/paddle/fluid/platform/device/ipu/ipu_device.cc b/paddle/fluid/platform/device/ipu/ipu_device.cc index 2459f5140eb5b25af82381366f25c714beb69aaf..b7a83b2ef1a6121531b5c580fccb8f7b46bb9dc2 100644 --- a/paddle/fluid/platform/device/ipu/ipu_device.cc +++ b/paddle/fluid/platform/device/ipu/ipu_device.cc @@ -13,14 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_device.h" + +#include + #include "paddle/fluid/platform/enforce.h" namespace paddle { namespace platform { namespace ipu { -// TODO(alleng) merge with ipu_utils -static bool GetBoolEnv(std::string str) { +namespace { +const bool GetBoolEnv(const std::string& str) { char* str_val = getenv(str.c_str()); if (str_val == NULL) { return false; @@ -32,6 +35,7 @@ static bool GetBoolEnv(std::string str) { return val; } } +} // namespace int GetNumDevices() { bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); diff --git a/paddle/fluid/platform/device/ipu/ipu_device.h b/paddle/fluid/platform/device/ipu/ipu_device.h index d39feffc92655b52dae1792fab0a5ef95bb6075f..c6876c032c8e480c9668dfc6f113238a873eff3d 100644 --- a/paddle/fluid/platform/device/ipu/ipu_device.h +++ b/paddle/fluid/platform/device/ipu/ipu_device.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include +#include namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index 4b8c8286e22e9c1f523545245d7c87b52ec2b59e..b020e4f21974389008fee130487a3b30151304f2 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -14,12 +14,17 @@ limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_executor.h" -using float16 = paddle::platform::float16; +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/device/ipu/ipu_compiler.h" +#include "paddle/fluid/platform/device/ipu/ipu_names.h" +#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" namespace paddle { namespace platform { namespace ipu { +namespace { + // Get paddle prefix and popart postfix of weight states // Format: {popart_postfix, paddle_prefix} std::vector> GetOptPrePostfix( @@ -54,6 +59,35 @@ std::vector> GetOptPrePostfix( return pre_post_fix; } +class PdIArray final : public popart::IArray { + public: + explicit PdIArray(const Tensor *tensor) { + tensor_.ShareDataWith(*tensor); + for (int i = 0; i < tensor->dims().size(); ++i) { + shape_.push_back(tensor->dims().at(i)); + } + } + + public: + void *data() { return tensor_.data(); } + popart::DataType dataType() const { + return PhiDType2PopartDType(tensor_.dtype()); + } + std::size_t rank() const { return tensor_.dims().size(); } + int64_t dim(size_t index) const { return tensor_.dims().at(index); } + std::size_t nelms() const { + return std::accumulate(shape_.begin(), shape_.end(), + static_cast(1), std::multiplies()); + } + const popart::Shape shape() const { return shape_; } + + private: + Tensor tensor_; + std::vector shape_; +}; + +} // namespace + Executor::~Executor() { Detach(); session_.reset(); @@ -110,15 +144,15 @@ void Executor::Run(const std::vector &inputs, VLOG(10) << "enter Executor::Run"; // inputs std::map popart_inputs; - std::map input_wrappers; + std::map input_wrappers; for (size_t i = 0; i < inputs.size(); i++) { auto tensor_id = compiler_resources_->inputs[i]; - input_wrappers.emplace(tensor_id, PaddleIArray(inputs[i])); + input_wrappers.emplace(tensor_id, PdIArray(inputs[i])); popart_inputs.emplace(tensor_id, input_wrappers.at(tensor_id)); } // anchors std::map popart_anchors; - std::map anchor_wrappers; + std::map anchor_wrappers; for (size_t i = 0; i < outputs.size(); i++) { auto tensor_id = compiler_resources_->outputs[i]; // get dims & dtype from session @@ -140,10 +174,10 @@ void Executor::Run(const std::vector &inputs, auto *tensor = outputs[i]; tensor->Resize(phi::make_ddim(output_shape)); auto fetch_dtype = fetch_info.dataType(); - auto paddle_type = PopartType2VarType(fetch_dtype); + auto paddle_type = PopartDType2VarType(fetch_dtype); tensor->mutable_data(ctx.GetPlace(), framework::TransToPhiDataType(paddle_type)); - anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor)); + anchor_wrappers.emplace(tensor_id, PdIArray(tensor)); popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id)); } VLOG(10) << "Prepared inputs/anchors"; @@ -203,16 +237,16 @@ void Executor::AcquireDevice() { device_ = popart::DeviceManager::createDeviceManager().acquireDeviceById( device_id); PADDLE_ENFORCE_NOT_NULL( - device_, platform::errors::Unavailable( - "Can't attach IPU in distribution, ipu_num = %d.", - RequestIpus(ipu_strategy_->num_ipus))); + device_, + errors::Unavailable("Can't attach IPU in distribution, ipu_num = %d.", + RequestIpus(ipu_strategy_->num_ipus))); } else { device_ = popart::DeviceManager::createDeviceManager().acquireAvailableDevice( RequestIpus(ipu_strategy_->num_ipus)); - PADDLE_ENFORCE_NOT_NULL(device_, platform::errors::Unavailable( - "Can't attach IPU, ipu_num = %d.", - RequestIpus(ipu_strategy_->num_ipus))); + PADDLE_ENFORCE_NOT_NULL( + device_, errors::Unavailable("Can't attach IPU, ipu_num = %d.", + RequestIpus(ipu_strategy_->num_ipus))); } VLOG(10) << "leave Executor::AcquireDevice"; } @@ -260,13 +294,13 @@ void Executor::SetWeightsIO() { void Executor::ConvertWeights(bool align_to_popart) { for (auto weight_pair : executor_resources_->weights_and_opt_state) { auto paddle_var = scope_->GetVar(weight_pair.second); - auto paddle_var_dtype = PdDataType2PopartType( + auto paddle_var_dtype = PhiDType2PopartDType( paddle_var->GetMutable()->dtype()); PADDLE_ENFORCE_EQ((paddle_var_dtype == popart::DataType::FLOAT || paddle_var_dtype == popart::DataType::FLOAT16), true, - platform::errors::InvalidArgument( + errors::InvalidArgument( "Currently, we only support FLOAT16 and FLOAT with " "Paddle, but received type is %s.", paddle_var_dtype)); @@ -276,7 +310,7 @@ void Executor::ConvertWeights(bool align_to_popart) { PADDLE_ENFORCE_EQ((popart_var_dtype == popart::DataType::FLOAT || popart_var_dtype == popart::DataType::FLOAT16), true, - platform::errors::InvalidArgument( + errors::InvalidArgument( "Currently, we only support FLOAT16 and FLOAT with " "popart, but received type is %s.", popart_var_dtype)); @@ -310,8 +344,8 @@ void Executor::ConvertWeights(bool align_to_popart) { num_elem * sizeof(float)); } } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Convert Paddle FLOAT16 to popart FLOAT")); + PADDLE_THROW( + errors::Unimplemented("Convert Paddle FLOAT16 to popart FLOAT")); } } } diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.h b/paddle/fluid/platform/device/ipu/ipu_executor.h index c59e623ab20b0215f3ecb8f0f6811e4cb0ee2997..c03a52a77a9d7d886c623726f9f54982da4b3b3b 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.h +++ b/paddle/fluid/platform/device/ipu/ipu_executor.h @@ -22,17 +22,21 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device/ipu/ipu_compiler.h" -#include "paddle/fluid/platform/device/ipu/ipu_names.h" -#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_utils.h" +namespace paddle { +namespace framework { +class ExecutionContext; +} // namespace framework +} // namespace paddle + namespace paddle { namespace platform { namespace ipu { +struct CompilerResources; +class IpuStrategy; + struct ExecutorResources { // map popart::WeightsIO weights_io; @@ -45,18 +49,18 @@ class Executor { Executor() = default; ~Executor(); - // build popart session + // Build popart session void Prepare(const std::string &proto); - // run popart session + // Run popart session void Run(const std::vector &inputs, const std::vector &outputs, const framework::ExecutionContext &ctx); - // sync weights from popart to paddle + // Sync weights from popart to paddle void WeightsToHost(); - // detach IPU + // Detach IPU void Detach(); // Scope @@ -83,16 +87,16 @@ class Executor { void WeightsToPaddle(); private: - // not own + // Not own const Scope *scope_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr; CompilerResources *compiler_resources_ = nullptr; - // deviceinfo for popart session + // Deviceinfo for popart session std::shared_ptr device_; - // popart session, where graph running + // Popart session, where graph running std::unique_ptr session_; - // one OneSession means a graph + // A ExecutorResources corresponds to a graph std::unique_ptr executor_resources_; }; diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc index c208a0eca57076ff28c963c8ee655f75ee0af374..e35464e30c7a8b483f5172449e66e88d1b6d8a41 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc @@ -316,8 +316,10 @@ IpuStrategy::IpuStrategy() { RegisterSetter(bool_options, "enable_half_partial", [&](bool value) { if (value) { popart_options.partialsTypeMatMuls = "half"; + popart_options.convolutionOptions.insert({{"partialsType", "half"}}); } else { popart_options.partialsTypeMatMuls = "float"; + popart_options.convolutionOptions.insert({{"partialsType", "float"}}); } }); diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.cc b/paddle/fluid/platform/device/ipu/ipu_utils.cc index 843f3ffde9e4556b91b34bee8e320fc2d53958c2..43e4a6820c81398217203dfb64f72cd0004ec1fa 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.cc +++ b/paddle/fluid/platform/device/ipu/ipu_utils.cc @@ -13,133 +13,111 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_utils.h" + #include namespace paddle { namespace platform { namespace ipu { -void* PaddleIArray::data() { return tensor_.data(); } - -popart::DataType PaddleIArray::dataType() const { - return PdDataType2PopartType(tensor_.dtype()); -} - -std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); } - -int64_t PaddleIArray::dim(size_t index) const { - return tensor_.dims().at(index); -} - -std::size_t PaddleIArray::nelms() const { - return std::accumulate(shape_.begin(), shape_.end(), static_cast(1), - std::multiplies()); -} - -const popart::Shape PaddleIArray::shape() const { return shape_; } - -popart::DataType VarType2PopartType( - const framework::proto::VarType::Type type) { +const popart::DataType VarType2PopartDType(const VarType::Type type) { switch (type) { - case framework::proto::VarType::UINT8: + case VarType::UINT8: return popart::DataType::UINT8; - case framework::proto::VarType::INT8: + case VarType::INT8: return popart::DataType::INT8; - case framework::proto::VarType::INT16: + case VarType::INT16: return popart::DataType::INT16; - case framework::proto::VarType::INT32: + case VarType::INT32: return popart::DataType::INT32; - case framework::proto::VarType::INT64: + case VarType::INT64: return popart::DataType::INT64; - case framework::proto::VarType::BOOL: + case VarType::BOOL: return popart::DataType::BOOL; - case framework::proto::VarType::FP64: + case VarType::FP64: return popart::DataType::DOUBLE; - case framework::proto::VarType::FP32: + case VarType::FP32: return popart::DataType::FLOAT; - case framework::proto::VarType::FP16: + case VarType::FP16: return popart::DataType::FLOAT16; - case framework::proto::VarType::BF16: + case VarType::BF16: return popart::DataType::BFLOAT16; - case framework::proto::VarType::COMPLEX64: + case VarType::COMPLEX64: return popart::DataType::COMPLEX64; - case framework::proto::VarType::COMPLEX128: + case VarType::COMPLEX128: return popart::DataType::COMPLEX128; default: - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Unsupported Paddle var type.")); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported VarType::Type when converting to popart data type.")); } } -popart::DataType PdDataType2PopartType( - const paddle::experimental::DataType type) { +const popart::DataType PhiDType2PopartDType(const phi::DataType type) { switch (type) { - case paddle::experimental::DataType::UINT8: + case phi::DataType::UINT8: return popart::DataType::UINT8; - case paddle::experimental::DataType::INT8: + case phi::DataType::INT8: return popart::DataType::INT8; - case paddle::experimental::DataType::INT16: + case phi::DataType::INT16: return popart::DataType::INT16; - case paddle::experimental::DataType::INT32: + case phi::DataType::INT32: return popart::DataType::INT32; - case paddle::experimental::DataType::INT64: + case phi::DataType::INT64: return popart::DataType::INT64; - case paddle::experimental::DataType::BOOL: + case phi::DataType::BOOL: return popart::DataType::BOOL; - case paddle::experimental::DataType::FLOAT64: + case phi::DataType::FLOAT64: return popart::DataType::DOUBLE; - case paddle::experimental::DataType::FLOAT32: + case phi::DataType::FLOAT32: return popart::DataType::FLOAT; - case paddle::experimental::DataType::FLOAT16: + case phi::DataType::FLOAT16: return popart::DataType::FLOAT16; - case paddle::experimental::DataType::BFLOAT16: + case phi::DataType::BFLOAT16: return popart::DataType::BFLOAT16; - case paddle::experimental::DataType::COMPLEX64: + case phi::DataType::COMPLEX64: return popart::DataType::COMPLEX64; - case paddle::experimental::DataType::COMPLEX128: + case phi::DataType::COMPLEX128: return popart::DataType::COMPLEX128; default: - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Unsupported Paddle data type.")); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported phi::DataType when converting to popart data type.")); } } -framework::proto::VarType::Type PopartType2VarType( - const popart::DataType type) { +const VarType::Type PopartDType2VarType(const popart::DataType type) { switch (type) { case popart::DataType::UINT8: - return framework::proto::VarType::UINT8; + return VarType::UINT8; case popart::DataType::INT8: - return framework::proto::VarType::INT8; + return VarType::INT8; case popart::DataType::INT16: - return framework::proto::VarType::INT16; + return VarType::INT16; case popart::DataType::INT32: - return framework::proto::VarType::INT32; + return VarType::INT32; case popart::DataType::INT64: - return framework::proto::VarType::INT64; + return VarType::INT64; case popart::DataType::BOOL: - return framework::proto::VarType::BOOL; + return VarType::BOOL; case popart::DataType::DOUBLE: - return framework::proto::VarType::FP64; + return VarType::FP64; case popart::DataType::FLOAT: - return framework::proto::VarType::FP32; + return VarType::FP32; case popart::DataType::FLOAT16: - return framework::proto::VarType::FP16; + return VarType::FP16; case popart::DataType::BFLOAT16: - return framework::proto::VarType::BF16; + return VarType::BF16; case popart::DataType::COMPLEX64: - return framework::proto::VarType::COMPLEX64; + return VarType::COMPLEX64; case popart::DataType::COMPLEX128: - return framework::proto::VarType::COMPLEX128; + return VarType::COMPLEX128; default: - PADDLE_THROW(paddle::platform::errors::Unavailable( - "Unsupported Paddle var type.")); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported popart::DataType when converting to var type.")); } } -popart::DataType OnnxDtype2PopartType(const int type) { - auto dtype = static_cast(type); - switch (dtype) { +const popart::DataType OnnxDType2PopartType(const ONNXDataType type) { + switch (type) { case ONNXDataType::BOOL: return popart::DataType::BOOL; case ONNXDataType::INT16: @@ -166,12 +144,69 @@ popart::DataType OnnxDtype2PopartType(const int type) { return popart::DataType::COMPLEX128; default: PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported ONNX data type: %d.", dtype)); + "Unsupported ONNXDataType when converting to popart data type.")); + } +} + +const ONNXDataType VarType2OnnxDType(const VarType::Type type) { + switch (type) { + case VarType::BOOL: + return ONNXDataType::BOOL; + case VarType::INT16: + return ONNXDataType::INT16; + case VarType::INT32: + return ONNXDataType::INT32; + case VarType::INT64: + return ONNXDataType::INT64; + case VarType::FP16: + return ONNXDataType::FLOAT16; + case VarType::FP32: + return ONNXDataType::FLOAT; + case VarType::FP64: + return ONNXDataType::DOUBLE; + case VarType::UINT8: + return ONNXDataType::UINT8; + case VarType::INT8: + return ONNXDataType::INT8; + case VarType::BF16: + return ONNXDataType::BFLOAT16; + case VarType::COMPLEX64: + return ONNXDataType::COMPLEX64; + case VarType::COMPLEX128: + return ONNXDataType::COMPLEX128; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported VarType::Type when converting to onnx data type.")); + } +} + +const std::string VarType2PopartStr(const VarType::Type type) { + switch (type) { + case VarType::UINT8: + return "UINT8"; + case VarType::INT8: + return "INT8"; + case VarType::INT16: + return "INT16"; + case VarType::INT32: + return "INT32"; + case VarType::INT64: + return "INT64"; + case VarType::BOOL: + return "BOOL"; + case VarType::FP64: + return "DOUBLE"; + case VarType::FP32: + return "FLOAT"; + case VarType::FP16: + return "FLOAT16"; + default: + PADDLE_THROW(platform::errors::Unavailable( + "Unsupported VarType::Type when converting to popart type string.")); } } -// count num should > 0 -bool GetBoolEnv(std::string str) { +const bool GetBoolEnv(const std::string& str) { char* str_val = getenv(str.c_str()); if (str_val == NULL) { return false; @@ -184,8 +219,7 @@ bool GetBoolEnv(std::string str) { } } -int RequestIpus(const int num_ipus) { - // num_ipus must be pow(2, n); +const int RequestIpus(const int num_ipus) { return std::pow(2, ceil(log2(num_ipus))); } diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h index 50859aebdb311e663a4e9c97a159e7377c72c922..2737f402953901fa94eac63edaa2020e060b9571 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.h +++ b/paddle/fluid/platform/device/ipu/ipu_utils.h @@ -19,155 +19,32 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/float16.h" +using float16 = paddle::platform::float16; +using Tensor = paddle::framework::Tensor; +using LoDTensor = paddle::framework::LoDTensor; +using Scope = paddle::framework::Scope; +using OpDesc = paddle::framework::OpDesc; +using Graph = paddle::framework::ir::Graph; +using Node = paddle::framework::ir::Node; +using BlockDesc = paddle::framework::BlockDesc; +using VarType = paddle::framework::proto::VarType; + namespace paddle { namespace platform { namespace ipu { -using float16 = platform::float16; -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; -using Scope = framework::Scope; -using OpDesc = framework::OpDesc; -using Graph = framework::ir::Graph; -using Node = framework::ir::Node; -using BlockDesc = framework::BlockDesc; - -// onnx dtype -// https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3 -enum ONNXDataType : int { - UNDEFINED = 0, - FLOAT = 1, - UINT8 = 2, - INT8 = 3, - UINT16 = 4, - INT16 = 5, - INT32 = 6, - INT64 = 7, - STRING = 8, - BOOL = 9, - FLOAT16 = 10, - DOUBLE = 11, - UINT32 = 12, - UINT64 = 13, - COMPLEX64 = 14, - COMPLEX128 = 15, - BFLOAT16 = 16 -}; - -class PaddleIArray final : public popart::IArray { - public: - explicit PaddleIArray(const Tensor* tensor) { - tensor_.ShareDataWith(*tensor); - for (int i = 0; i < tensor->dims().size(); ++i) { - shape_.push_back(tensor->dims().at(i)); - } - } - - public: - void* data(); - popart::DataType dataType() const; - std::size_t rank() const; - int64_t dim(size_t index) const; - std::size_t nelms() const; - const popart::Shape shape() const; - - private: - Tensor tensor_; - std::vector shape_; -}; - -popart::DataType VarType2PopartType(const framework::proto::VarType::Type type); -popart::DataType PdDataType2PopartType( - const paddle::experimental::DataType type); -framework::proto::VarType::Type PopartType2VarType(const popart::DataType type); -popart::DataType OnnxDtype2PopartType(const int type); -bool GetBoolEnv(std::string str); - -template -std::unique_ptr> Tensor2IArray(const Tensor& tensor) { - auto dtype = PdDataType2PopartType(tensor.dtype()); - auto shape = std::vector(); - for (size_t i = 0; i < tensor.dims().size(); ++i) { - shape.push_back(tensor.dims().at(i)); - } - popart::TensorInfo tensor_info(dtype, shape); - - return std::make_unique>( - reinterpret_cast(tensor.data()), tensor_info); -} - -template -std::unique_ptr> LoDTensor2IArray( - LoDTensor const& lod_tensor) { - if (lod_tensor.lod().size() == 0) { - return Tensor2IArray(lod_tensor); - } else { - PADDLE_THROW( - platform::errors::Unimplemented("LoDTensor2IArray is Unimplemented")); - } -} - template T GetSingleVarFromScope(const Scope* scope, const std::string& var_name) { auto var = scope->GetVar(var_name); auto tensor = var->Get(); - // check dtype is ? return tensor.data()[0]; } -struct CustomOpAttrVisitor : public boost::static_visitor { - explicit CustomOpAttrVisitor(std::map* attr, - const std::string& attr_name) - : attrs_(attr), attr_name_(attr_name) {} - mutable std::map* attrs_; - std::string attr_name_; - - void operator()(int v) const { attrs_->emplace(attr_name_, v); } - void operator()(float v) const { attrs_->emplace(attr_name_, v); } - void operator()(const std::string& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(const std::vector& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(const std::vector& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(const std::vector& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(bool v) const { attrs_->emplace(attr_name_, v); } - void operator()(const std::vector& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(BlockDesc* desc) const { - PADDLE_THROW(platform::errors::Unavailable( - "Unsupported calling method for `BlockDesc` type.")); - } - void operator()(const std::vector& v) const { - PADDLE_THROW(platform::errors::Unavailable( - "Unsupported calling method for `BlockDesc` type.")); - } - void operator()(int64_t v) const { attrs_->emplace(attr_name_, v); } - void operator()(const std::vector& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(const std::vector& v) const { - attrs_->emplace(attr_name_, v); - } - void operator()(boost::blank) const { - PADDLE_THROW(platform::errors::Unavailable( - "Unsupported calling method for `boost::blank` type.")); - } -}; - struct IpuCustomOpIdentifier { IpuCustomOpIdentifier(const std::string& _paddle_op, const std::string& _popart_op, @@ -185,51 +62,44 @@ struct IpuCustomOpIdentifier { popart::OperatorIdentifier popart_op; }; -struct ConstantOpAttrVisitor : public boost::static_visitor { - explicit ConstantOpAttrVisitor(framework::LoDTensor* tensor, - framework::proto::VarType::Type dtype) - : tensor_(tensor), dtype_(dtype) {} - framework::LoDTensor* tensor_; - framework::proto::VarType::Type dtype_; - - void operator()(const std::vector& vec) const { - framework::TensorFromVector(vec, tensor_); - } - void operator()(const std::vector& vec) const { - if (dtype_ == framework::proto::VarType::FP16) { - std::vector vec_fp16; - std::transform(vec.begin(), vec.end(), std::back_inserter(vec_fp16), - [](float f) -> float16 { return float16(f); }); - framework::TensorFromVector(vec_fp16, tensor_); - } else { - framework::TensorFromVector(vec, tensor_); - } - } - void operator()(const std::vector& vec) const { - framework::TensorFromVector(vec, tensor_); - } - void operator()(const std::vector& vec) const { - framework::TensorFromVector(vec, tensor_); - } - void operator()(const std::vector& vec) const { - framework::TensorFromVector(vec, tensor_); - } - void RaiseError() const { - PADDLE_THROW( - platform::errors::InvalidArgument("Constant value must be a vector")); - } - void operator()(int v) const { RaiseError(); } - void operator()(float v) const { RaiseError(); } - void operator()(const std::string& v) const { RaiseError(); } - void operator()(const std::vector& v) const { RaiseError(); } - void operator()(bool v) const { RaiseError(); } - void operator()(BlockDesc* desc) const { RaiseError(); } - void operator()(const std::vector& v) const { RaiseError(); } - void operator()(int64_t v) const { RaiseError(); } - void operator()(boost::blank) const { RaiseError(); } +// Onnx dtype +// https://github.com/onnx/onnx/blob/master/onnx/onnx-ml.proto3 +enum ONNXDataType : int { + UNDEFINED = 0, + FLOAT = 1, + UINT8 = 2, + INT8 = 3, + UINT16 = 4, + INT16 = 5, + INT32 = 6, + INT64 = 7, + STRING = 8, + BOOL = 9, + FLOAT16 = 10, + DOUBLE = 11, + UINT32 = 12, + UINT64 = 13, + COMPLEX64 = 14, + COMPLEX128 = 15, + BFLOAT16 = 16 }; -int RequestIpus(const int num_ipus); +// VarType::Type to popart::DataType +const popart::DataType VarType2PopartDType(const VarType::Type type); +// phi::DataType to popart::DataType +const popart::DataType PhiDType2PopartDType(const phi::DataType type); +// popart::DataType to VarType::Type +const VarType::Type PopartDType2VarType(const popart::DataType type); +// ONNXDataType to popart::DataType +const popart::DataType OnnxDType2PopartType(const ONNXDataType type); +// VarType::Type to ONNXDataType +const ONNXDataType VarType2OnnxDType(const VarType::Type type); +// VarType::Type to String in Popart +const std::string VarType2PopartStr(const VarType::Type type); +// Get bool from envirnment varaible +const bool GetBoolEnv(const std::string& str); +// Request number of ipus must be pow(2, n) +const int RequestIpus(const int num_ipus); } // namespace ipu } // namespace platform diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc index ab9ddfde21873187dd583cbd734e686a252738c9..254e5665674246761482beeee344591a47fc9827 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc @@ -56,15 +56,15 @@ Node *gelu_handler(Graph *graph, Node *node) { auto sqrt2 = CreateConst(graph, node, {}, {}, {{"value", std::vector{1.4142135623730951}}, {"dims", std::vector{1}}, - {"dtype", GetOutputVarDtype(node)}}); + {"dtype", GetOutputVarDType(node)}}); auto zero_point_five = CreateConst(graph, node, {}, {}, {{"value", std::vector{0.5}}, {"dims", std::vector{1}}, - {"dtype", GetOutputVarDtype(node)}}); + {"dtype", GetOutputVarDType(node)}}); auto one = CreateConst(graph, node, {}, {}, {{"value", std::vector{1}}, {"dims", std::vector{1}}, - {"dtype", GetOutputVarDtype(node)}}); + {"dtype", GetOutputVarDType(node)}}); auto div = CreateBaseOp(graph, node, "popart_div", {GetInputVarNode("X", node), sqrt2->outputs[0]}, {}, {}); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc index 3d22f75d345d6a2376788cddfd4f4c8dbddcbee2..7a14d23698def0da91a7b2e361a234dbb766f3e7 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc @@ -18,7 +18,6 @@ namespace paddle { namespace platform { namespace ipu { -// This avoids the static initialisation order fiasco, std::unordered_map &SymbolHandlers() { static std::unordered_map symbol_handlers; return symbol_handlers; @@ -34,8 +33,6 @@ bool RegisterHandler(const std::string &symbol, const SymbolHandler &handler) { return new_handler; } -// Return a pointer to a handler if one is registered for this kind of node or -// an empty std::function otherwise. SymbolHandler GetHandler(const std::string &kind) { auto it = SymbolHandlers().find(kind); if (it != SymbolHandlers().end()) { @@ -84,66 +81,6 @@ void CopyOpAttr(const std::string &attr_name, OpDesc *op, OpDesc *new_op, } } -const int VarType2OnnxDtype(const int type) { - auto dtype = static_cast(type); - switch (dtype) { - case framework::proto::VarType::BOOL: - return static_cast(ONNXDataType::BOOL); - case framework::proto::VarType::INT16: - return static_cast(ONNXDataType::INT16); - case framework::proto::VarType::INT32: - return static_cast(ONNXDataType::INT32); - case framework::proto::VarType::INT64: - return static_cast(ONNXDataType::INT64); - case framework::proto::VarType::FP16: - return static_cast(ONNXDataType::FLOAT16); - case framework::proto::VarType::FP32: - return static_cast(ONNXDataType::FLOAT); - case framework::proto::VarType::FP64: - return static_cast(ONNXDataType::DOUBLE); - case framework::proto::VarType::UINT8: - return static_cast(ONNXDataType::UINT8); - case framework::proto::VarType::INT8: - return static_cast(ONNXDataType::INT8); - case framework::proto::VarType::BF16: - return static_cast(ONNXDataType::BFLOAT16); - case framework::proto::VarType::COMPLEX64: - return static_cast(ONNXDataType::COMPLEX64); - case framework::proto::VarType::COMPLEX128: - return static_cast(ONNXDataType::COMPLEX128); - default: - PADDLE_THROW( - platform::errors::Unimplemented("Unsupported data type: %d.", dtype)); - } -} - -const std::string VarType2PopStr(const int type) { - auto dtype = static_cast(type); - switch (dtype) { - case framework::proto::VarType::UINT8: - return "UINT8"; - case framework::proto::VarType::INT8: - return "INT8"; - case framework::proto::VarType::INT16: - return "INT16"; - case framework::proto::VarType::INT32: - return "INT32"; - case framework::proto::VarType::INT64: - return "INT64"; - case framework::proto::VarType::BOOL: - return "BOOL"; - case framework::proto::VarType::FP64: - return "DOUBLE"; - case framework::proto::VarType::FP32: - return "FLOAT"; - case framework::proto::VarType::FP16: - return "FLOAT16"; - default: - PADDLE_THROW( - paddle::platform::errors::Unavailable("Unsupported data type.")); - } -} - Node *GetInputVarNode(const std::string &input_name, const Node *op_node, const int id) { auto var_name = op_node->Op()->Input(input_name).at(id); @@ -180,7 +117,7 @@ const bool is_float_equal(float a, float b, float eps) { return std::fabs(a - b) <= eps; } -const int GetOutputVarDtype(const Node *node, const std::string &output_name) { +const int GetOutputVarDType(const Node *node, const std::string &output_name) { auto out_node = GetOutputVarNode(output_name, node); PADDLE_ENFORCE_NOT_NULL(out_node, platform::errors::Unavailable( "Node's out node does not exist.")); @@ -188,7 +125,7 @@ const int GetOutputVarDtype(const Node *node, const std::string &output_name) { PADDLE_ENFORCE_NOT_NULL( var, platform::errors::Unavailable("Node is not a variable.")); auto proto_var_type = var->GetDataType(); - return VarType2OnnxDtype(proto_var_type); + return static_cast(VarType2OnnxDType(proto_var_type)); } } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h index 32133e128c588aca68bd0ec7bc574117954b6e07..7ac6097e0cc1462aa5fbed93220b4c4ec6b35b78 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h @@ -68,9 +68,6 @@ void ClearNode(Node *node); void CopyOpAttr(const std::string &attr_name, OpDesc *op, OpDesc *new_op, bool override = false); -const int VarType2OnnxDtype(const int type); -const std::string VarType2PopStr(const int type); - Node *GetInputVarNode(const std::string &input_name, const Node *op_node, const int id = 0); Node *GetOutputVarNode(const std::string &output_name, const Node *op_node, @@ -81,7 +78,7 @@ Node *GetOutputVarNodeByVarName(const std::string &var_name, const Node *op_node); const bool is_float_equal(float a, float b, float eps = 1e-8); -const int GetOutputVarDtype(const Node *node, +const int GetOutputVarDType(const Node *node, const std::string &output_name = "Out"); } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc index 6f82acb5b7db38956f52d50c4e414f8e82da719f..99fb76c9506812e35e05071424948a610f2059d0 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc @@ -28,6 +28,14 @@ Node *equal_handler(Graph *graph, Node *node) { return new_node; } +Node *not_equal_handler(Graph *graph, Node *node) { + auto equal_node = CreateBaseOp( + graph, node, "popart_equal", + {GetInputVarNode("X", node), GetInputVarNode("Y", node)}, {}); + return CreateBaseOp(graph, node, "popart_logical_not", + {equal_node->outputs[0]}, node->outputs, {}); +} + Node *logical_not_handler(Graph *graph, Node *node) { return CreateBaseOp(graph, node, "popart_logical_not", {GetInputVarNode("X", node)}, @@ -64,6 +72,7 @@ Node *less_than_handler(Graph *graph, Node *node) { } // namespace paddle REGISTER_HANDLER(equal, equal_handler); +REGISTER_HANDLER(not_equal, not_equal_handler); REGISTER_HANDLER(logical_not, logical_not_handler); REGISTER_HANDLER(logical_or, logical_or_handler); REGISTER_HANDLER(logical_and, logical_and_handler); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc index 444b55959cf221bf8c2006e0111cddfee7d5c24b..af72f84c9d771cecc32d01fb6b4074adde49da57 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc @@ -41,7 +41,7 @@ Node *pow_handler(Graph *graph, Node *node) { // Op(pow) -> Op(Constant)->Var(const_out)->Op(Pow) auto value_ = BOOST_GET_CONST(float, op->GetAttr("factor")); auto attrs = - MakeConstAttrMapFromValue(value_, {1}, GetOutputVarDtype(node)); + MakeConstAttrMapFromValue(value_, {1}, GetOutputVarDType(node)); auto new_node_const = CreateConst(graph, node, {}, {}, attrs); return CreateBaseOp(graph, node, "popart_pow", {GetInputVarNode("X", node), @@ -134,7 +134,7 @@ Node *matmul_handler(Graph *graph, Node *node) { } else { auto o_node = CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node}, {}); - auto attr = MakeConstAttrMapFromValue(alpha, {1}, GetOutputVarDtype(node)); + auto attr = MakeConstAttrMapFromValue(alpha, {1}, GetOutputVarDType(node)); auto const_node = CreateConst(graph, node, {}, {}, attr); return CreateBaseOp(graph, node, "popart_mul", {o_node->outputs[0], const_node->outputs[0]}, @@ -299,6 +299,80 @@ Node *cross_entropy2_handler(Graph *graph, Node *node) { } } +Node *softmax_with_cross_entropy_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto ignoreIndex = BOOST_GET_CONST(int, op->GetAttr("ignore_index")); + auto axis = BOOST_GET_CONST(int, op->GetAttr("axis")); + auto soft_label = BOOST_GET_CONST(bool, op->GetAttr("soft_label")); + if (soft_label) { + PADDLE_THROW(platform::errors::InvalidArgument( + "soft_label is not supported yet in IPU")); + } + Node *new_cast = nullptr; + if (GetInputVarNode("Label", node)->Var()->GetDataType() == + framework::proto::VarType::INT32) { + new_cast = GetInputVarNode("Label", node); + } else { + auto new_cast = CreateCast(graph, node, {GetInputVarNode("Label", node)}, + {}, framework::proto::VarType::INT32); + new_cast = new_cast->outputs[0]; + } + auto softmax_node = CreateSoftmaxOpset11( + graph, node, {GetInputVarNode("Logits", node)}, {}, axis); + + auto label_shape_ = GetInputVarNode("Label", node)->Var()->GetShape(); + if (label_shape_[label_shape_.size() - 1] != 1) { + auto log = CreateBaseOp(graph, node, "popart_log", + {softmax_node->outputs[0]}, {}, {}); + // softmax_with_cross_entropy is split to several ops in python. + // reduction is not needed here. + return CreateBaseOp( + graph, node, "popart_nllloss_v2", {log->outputs[0], new_cast}, + {GetOutputVarNode("Loss", node)}, + { + {"reduction", 2}, // popart::ReductionType::NoReduction + {"ignoreIndex", ignoreIndex}, + {"inputIsLogProbability", true}, + }); + } else { + std::vector new_shape_{label_shape_[0]}; + auto const_before_loss = CreateBaseOp( + graph, node, "popart_constant", {}, {}, + {{"value", new_shape_}, + {"dims", + std::vector{static_cast(new_shape_.size())}}, + {"dtype", ONNXDataType::INT64}}); + + auto reshape_before_loss = + CreateBaseOp(graph, node, "popart_reshape", + {new_cast, const_before_loss->outputs[0]}, {}, {}); + + auto log = CreateBaseOp(graph, node, "popart_log", + {softmax_node->outputs[0]}, {}, {}); + auto nllloss = CreateBaseOp( + graph, node, "popart_nllloss_v2", + {log->outputs[0], reshape_before_loss->outputs[0]}, {}, + { + {"reduction", 2}, // popart::ReductionType::NoReduction + {"ignoreIndex", ignoreIndex}, + {"inputIsLogProbability", true}, + }); + + auto const_after_loss = CreateBaseOp( + graph, node, "popart_constant", {}, {}, + {{"value", label_shape_}, + {"dims", + std::vector{static_cast(label_shape_.size())}}, + {"dtype", ONNXDataType::INT64}}); + + auto reshape_after_loss = + CreateBaseOp(graph, node, "popart_reshape", + {nllloss->outputs[0], const_after_loss->outputs[0]}, + {GetOutputVarNode("Loss", node)}, {}); + return reshape_after_loss; + } +} + Node *cumsum_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto exclusive = BOOST_GET_CONST(bool, op->GetAttr("exclusive")); @@ -378,6 +452,8 @@ REGISTER_HANDLER(matmul, matmul_handler); REGISTER_HANDLER(sum, sum_handler); REGISTER_HANDLER(softmax, softmax_handler); REGISTER_HANDLER(scale, scale_handler); +REGISTER_HANDLER(softmax_with_cross_entropy, + softmax_with_cross_entropy_handler); REGISTER_HANDLER(cross_entropy2, cross_entropy2_handler); REGISTER_HANDLER(cumsum, cumsum_handler); REGISTER_HANDLER(matmul_v2, matmul_v2_handler); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc index a08fbaa26d9eda5fd7f89a5866896dc8b3f30588..2e9913f58efbba0bfbbe625fa4f490d836668b6e 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc @@ -299,7 +299,7 @@ Node *dropout_handler(Graph *graph, Node *node) { CreateConst(graph, node, {}, {}, {{"value", std::vector{1 - dropout_prob_}}, {"dims", std::vector{1}}, - {"dtype", GetOutputVarDtype(node)}}); + {"dtype", GetOutputVarDType(node)}}); return CreateBaseOp(graph, node, "popart_mul", {GetInputVarNode("X", node), scale->outputs[0]}, {GetOutputVarNode("Out", node)}, {}); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc index 0339097d587900a1a18e122b4b91eff21144c502..0525bb66f16181f0c2baff5c1ca1dfe5fddc39ea 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc @@ -124,7 +124,7 @@ Node *CreateConst(Graph *graph, Node *node, const std::vector &inputs, Node *CreateCast(Graph *graph, Node *node, const std::vector &inputs, const std::vector &outputs, const int otype) { - auto to = VarType2PopStr(otype); + auto to = VarType2PopartStr(static_cast(otype)); return CreateBaseOp(graph, node, "popart_cast", inputs, outputs, {{"to", to}}); } diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc index 55c25bce159313b00f6bfbf9b965bd0b6b6dac4b..00926ee7a0b25f65f083ac3d5e53d16d73a64428 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc @@ -23,12 +23,14 @@ namespace { Node *fill_constant_handler(Graph *graph, Node *node) { auto *op = node->Op(); - if (!op->Input("ShapeTensor").empty()) { + auto op_inputs = op->Inputs(); + if (op_inputs.find("ShapeTensor") != op_inputs.end() && + !op->Input("ShapeTensor").empty()) { PADDLE_THROW( platform::errors::Unimplemented("op fill_constant with ShapeTensor")); } auto dtype_ = BOOST_GET_CONST(int, op->GetAttr("dtype")); - auto dtype = VarType2OnnxDtype(dtype_); + auto dtype = VarType2OnnxDType(static_cast(dtype_)); auto dims = BOOST_GET_CONST(std::vector, op->GetAttr("shape")); auto value_ = BOOST_GET_CONST(float, op->GetAttr("value")); size_t size = 1; @@ -37,19 +39,20 @@ Node *fill_constant_handler(Graph *graph, Node *node) { } Attribute value; switch (dtype_) { - case framework::proto::VarType::FP32: + case VarType::FP16: + case VarType::FP32: value = std::vector(size, value_); break; - case framework::proto::VarType::FP64: + case VarType::FP64: value = std::vector(size, value_); break; - case framework::proto::VarType::INT32: + case VarType::INT32: value = std::vector(size, value_); break; - case framework::proto::VarType::INT64: + case VarType::INT64: value = std::vector(size, value_); break; - case framework::proto::VarType::BOOL: + case VarType::BOOL: value = std::vector(size, value_); break; default: @@ -66,7 +69,7 @@ Node *gaussian_random_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto shape = BOOST_GET_CONST(std::vector, op->GetAttr("shape")); auto dtype_ = BOOST_GET_CONST(int, op->GetAttr("dtype")); - auto dtype = VarType2OnnxDtype(dtype_); + auto dtype = VarType2OnnxDType(static_cast(dtype_)); auto mean = BOOST_GET_CONST(float, op->GetAttr("mean")); auto scale = BOOST_GET_CONST(float, op->GetAttr("std")); // seed not work @@ -86,7 +89,7 @@ Node *uniform_random_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto shape = BOOST_GET_CONST(std::vector, op->GetAttr("shape")); auto dtype_ = BOOST_GET_CONST(int, op->GetAttr("dtype")); - auto dtype = VarType2OnnxDtype(dtype_); + auto dtype = VarType2OnnxDType(static_cast(dtype_)); auto high = BOOST_GET_CONST(float, op->GetAttr("max")); auto low = BOOST_GET_CONST(float, op->GetAttr("min")); // seed not work @@ -172,9 +175,21 @@ Node *squeeze_handler(Graph *graph, Node *node) { Node *cast_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto otype = BOOST_GET_CONST(int, op->GetAttr("out_dtype")); - auto new_node_cast = - CreateCast(graph, node, node->inputs, node->outputs, otype); - return new_node_cast; + auto new_node = CreateCast(graph, node, node->inputs, node->outputs, otype); + // Cast op created in mixed-precison has no pipline attrs + auto &prev_nodes = node->inputs.front()->inputs; + if (!prev_nodes.empty()) { + auto *prev_op = prev_nodes.front()->Op(); + if (!new_node->Op()->HasAttr(sIpuIndexAttr) && + prev_op->HasAttr(sIpuIndexAttr)) { + CopyOpAttr(sIpuIndexAttr, prev_op, new_node->Op()); + } + if (!new_node->Op()->HasAttr(sIpuStageAttr) && + prev_op->HasAttr(sIpuStageAttr)) { + CopyOpAttr(sIpuStageAttr, prev_op, new_node->Op()); + } + } + return new_node; } Node *lookup_table_op_handler(Graph *graph, Node *node, @@ -192,7 +207,7 @@ Node *lookup_table_op_handler(Graph *graph, Node *node, auto concat_const = CreateConst(graph, node, {}, {}, {{"value", const_value_}, {"dims", const_shape_}, - {"dtype", GetOutputVarDtype(node)}}); + {"dtype", GetOutputVarDType(node)}}); auto axes = CreateConst(graph, node, {}, {}, {{"value", std::vector{0}}, {"dims", std::vector{1}}, @@ -397,7 +412,7 @@ Node *expand_handler(Graph *graph, Node *node) { // cast to int64 expand_times = CreateCast(graph, node, {GetInputVarNode("ExpandTimes", node)}, {}, - framework::proto::VarType::INT64); + VarType::INT64); } else { auto expand_times_i32 = BOOST_GET_CONST(std::vector, op->GetAttr("expand_times")); @@ -423,27 +438,28 @@ Node *assign_handler(Graph *graph, Node *node) { Node *assign_value_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto dtype_ = BOOST_GET_CONST(int, op->GetAttr("dtype")); - auto dtype = VarType2OnnxDtype(dtype_); + auto dtype = VarType2OnnxDType(static_cast(dtype_)); auto dims_ = BOOST_GET_CONST(std::vector, op->GetAttr("shape")); std::vector dims(dims_.begin(), dims_.end()); Attribute values; std::string value_name; switch (dtype_) { - case framework::proto::VarType::BOOL: { + case VarType::BOOL: { value_name = "bool_values"; auto vec_int = BOOST_GET_CONST(std::vector, op->GetAttr(value_name)); std::vector vec_bool(vec_int.begin(), vec_int.end()); values = vec_bool; } break; - case framework::proto::VarType::INT32: + case VarType::INT32: value_name = "int32_values"; values = BOOST_GET_CONST(std::vector, op->GetAttr(value_name)); break; - case framework::proto::VarType::FP32: + case VarType::FP16: + case VarType::FP32: value_name = "fp32_values"; values = BOOST_GET_CONST(std::vector, op->GetAttr(value_name)); break; - case framework::proto::VarType::INT64: + case VarType::INT64: value_name = "int64_values"; values = BOOST_GET_CONST(std::vector, op->GetAttr(value_name)); break; @@ -463,39 +479,40 @@ Node *fill_any_like_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto value = BOOST_GET_CONST(float, op->GetAttr("value")); auto x_shape = GetInputVarNode("X", node)->Var()->GetShape(); - auto dtype = BOOST_GET_CONST(int, op->GetAttr("dtype")); - auto x_dtype = static_cast(dtype); + auto dtype_ = BOOST_GET_CONST(int, op->GetAttr("dtype")); + auto dtype = static_cast(dtype_); size_t size = 1; for (auto &dim : x_shape) { size *= dim; } Attribute out_value; - switch (x_dtype) { - case framework::proto::VarType::FP32: + switch (dtype) { + case VarType::FP16: + case VarType::FP32: out_value = std::vector(size, value); break; - case framework::proto::VarType::FP64: + case VarType::FP64: out_value = std::vector(size, value); break; - case framework::proto::VarType::INT32: + case VarType::INT32: out_value = std::vector(size, value); break; - case framework::proto::VarType::INT64: + case VarType::INT64: out_value = std::vector(size, value); break; - case framework::proto::VarType::BOOL: + case VarType::BOOL: out_value = std::vector(size, value); break; default: PADDLE_THROW( - platform::errors::Unimplemented("fill_any_like dtype: %d", x_dtype)); + platform::errors::Unimplemented("fill_any_like dtype: %d", dtype)); } return CreateConst(graph, node, node->inputs, node->outputs, AttributeMap{ {"value", out_value}, {"dims", x_shape}, - {"dtype", VarType2OnnxDtype(dtype)}, + {"dtype", VarType2OnnxDType(dtype)}, }); } @@ -538,8 +555,7 @@ Node *one_hot_v2_handler(Graph *graph, Node *node) { {"dims", std::vector{1}}, {"dtype", ONNXDataType::INT32}}); Node *value_tensor = nullptr; - if (GetOutputVarNode("Out", node)->Var()->GetDataType() == - framework::proto::VarType::FP16) { + if (GetOutputVarNode("Out", node)->Var()->GetDataType() == VarType::FP16) { value_tensor = CreateConst(graph, node, {}, {}, {{"value", std::vector{0, 1}}, {"dims", std::vector{2}},