diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 0e1e572a51f7fcbc84415bab3808dfaed97dfd08..dad5358590cb1497453681ce940898314a1d06eb 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -147,7 +147,7 @@ if(WITH_IPU) pass_library(ipu_runtime_replacer_pass base DIR ipu) pass_library(inference_process_pass base DIR ipu) pass_library(inference_postprocess_pass base DIR ipu) - pass_library(popart_canonicalization_pass base DIR ipu) + pass_library(popart_canonicalization_pass base DIR ipu DEPS paddle_ipu) pass_library(ipu_inplace_pass base DIR ipu) pass_library(infer_shape_pass base DIR ipu) pass_library(delete_scale_op_pass base DIR ipu) diff --git a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc index 3d8d353cbf530ebe9cc9ea90937b9acf5ddd4a0f..9fe50deaf2d72679bc5c41038936d01cad9de498 100644 --- a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc +++ b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc @@ -56,7 +56,7 @@ const bool is_regularization_op(const std::string& op_namescope) { } void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const { - // 这里构建的 op 符合 popart 的定义, 涉及到的一些值需要在 LowerOptimier 时获得 + // optimizer values will be extracted when lowering optimizer in ipu_backend OpDesc new_op("popart_optimizer", {}, {}, {}); new_op.SetAttr("op_role", 0); new_op.SetAttr("with_lr_sched", false); @@ -86,7 +86,7 @@ void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const { bool is_regularization = is_regularization_op(op_namescope); VLOG(10) << "found optimizer releated op: " << op_type; - // initial larning_rate will be set in LowerOptimier + // initial larning_rate will be set in ipu_backend set_ops.insert(op_type); if (op_type == "sgd") { auto type = std::string{"sgd"}; diff --git a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc index 975a4b62cc708859803a2137741caaf413e50210..6806e44f0950535b059e8e7186541ab90973e6ab 100644 --- a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc +++ b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h" +#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h" @@ -28,11 +29,8 @@ void PopartCanonicalizationPass::ApplyImpl(ir::Graph* graph) const { auto custom_ops = Get>("custom_ops"); std::vector missing_ops; - auto nodes = graph->Nodes(); - for (auto* node : nodes) { - if (!node->IsOp()) { - continue; - } + auto sorted_ops = TopologySortOperations(*graph); + for (auto* node : sorted_ops) { auto* op = node->Op(); auto op_type = op->Type(); diff --git a/paddle/fluid/platform/device/ipu/CMakeLists.txt b/paddle/fluid/platform/device/ipu/CMakeLists.txt index d54c6a33ecbf53071956aaf4b9d342efa5746f65..acf914c5087d0ff11cda2d663a490e84a8c33216 100644 --- a/paddle/fluid/platform/device/ipu/CMakeLists.txt +++ b/paddle/fluid/platform/device/ipu/CMakeLists.txt @@ -13,9 +13,9 @@ IF(WITH_IPU) "ipu_device.cc" ) - cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph graph_helper) - cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart enforce) - cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart graph_helper) + cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart-only graph graph_helper) + cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart-only enforce) + add_library(paddle_ipu SHARED ${PADDLE_IPU_SRC}) add_dependencies(paddle_ipu ipu_backend) set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "") set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "") diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index 8f2a7ef78c9824d7706be48f117a86b19c334b8a..e0b3b08a2313d0ba80e807494eb74612caf81fd5 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -43,17 +43,17 @@ void IpuBackend::Compile(Graph* graph, const std::vector& feed_list, const std::vector& fetch_list) { VLOG(10) << "enter IpuBackend::Compile"; - compiler_->Prepare(); - executor_->SetCompilerResources(compiler_->GetResources()); - - compiler_->InitInputs(graph, feed_list); - compiler_->LowerConstants(graph, scope_); - compiler_->LowerWeights(graph, scope_); - compiler_->LowerBody(graph); + compiler_->Prepare(graph); + compiler_->InitInputs(feed_list); + compiler_->LowerConstants(scope_); + compiler_->LowerWeights(scope_); + compiler_->LowerBody(); compiler_->InitOutputs(fetch_list); if (ipu_strategy_->is_training) { - compiler_->LowerOptimier(graph, scope_); + compiler_->LowerOptimizer(scope_); } + executor_->SetCompilerResources(compiler_->GetResources()); + is_compiled_ = true; // when call compile, means a new graph is_prepared_ = false; @@ -95,11 +95,9 @@ void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) { ipu_strategy_ = &strategy; compiler_->SetIpuStrategy(strategy); executor_->SetIpuStrategy(strategy); -} - -void IpuBackend::SetCustomOps( - const std::vector& custom_ops) { - compiler_->SetCustomOps(custom_ops); + if (!strategy.custom_ops.empty()) { + compiler_->SetCustomOps(strategy.custom_ops); + } } void IpuBackend::SaveModelProto(const std::string& path) { diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.h b/paddle/fluid/platform/device/ipu/ipu_backend.h index b12e2539258dfefe93e0828fa1a7341e21d62e70..1244192490c16c4cfb01ac1c5f195cc123c4ba16 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.h +++ b/paddle/fluid/platform/device/ipu/ipu_backend.h @@ -71,7 +71,6 @@ class IpuBackend { const Scope *GetScope() { return scope_; } void SetIpuStrategy(const IpuStrategy &strategy); const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; } - void SetCustomOps(const std::vector &custom_ops); // save compiled model to onnx void SaveModelProto(const std::string &path); diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index df2e456383e1754956810f254cd98651e3139bcf..cdb3f6f9b3e285728d5c372b51492e42027aadba 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -98,6 +98,19 @@ TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) { } } +GraphHelper::GraphHelper(const Graph* g) { + graph = g; + sorted_ops = framework::ir::TopologySortOperations(*g); + for (auto* node : g->Nodes()) { + nodes_id_map[node->id()] = node; + if (node->IsVar()) { + vars_name_map[node->Name()] = node; + sorted_vars_id.push_back(node->id()); + } + } + std::sort(sorted_vars_id.begin(), sorted_vars_id.end()); +} + Compiler::Compiler() { RegisterOpFunc(); } Compiler::~Compiler() { @@ -105,9 +118,10 @@ Compiler::~Compiler() { resources_.reset(); } -void Compiler::Prepare() { +void Compiler::Prepare(const Graph* graph) { builder_ = popart::Builder::create(); resources_ = std::make_unique(); + graph_helper_ = std::make_unique(graph); } void Compiler::RegisterOpFunc() { @@ -171,93 +185,24 @@ void Compiler::RegisterOpFunc() { #undef INT_VEC } -void Compiler::LowerBody(const Graph* graph) { - VLOG(10) << "enter Compiler::LowerBody"; - auto nodes = framework::ir::TopologySortOperations(*graph); - for (auto* node : nodes) { - auto* op_desc = node->Op(); - auto op_type = op_desc->Type(); - VLOG(10) << "lowering op: " << op_type; - - if (op_type == "popart_constant") { - // pass - } else if (op_type == "popart_optimizer") { - // pass - } else if (op_type == "popart_checkpointoutput") { - auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); - auto output_ids = builder_->checkpointOutput(inputs); - InsertTensors(outputs, output_ids); - } else if (op_type == "popart_custom_op") { - auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); - auto debug_context = BuildDebugContext(op_desc); - auto attributes = std::map{}; - for (auto& attr : op_desc->GetAttrMap()) { - CustomOpAttrVisitor visitor(&attributes, attr.first); - boost::apply_visitor(visitor, attr.second); - } - auto __op_type = - BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type")); - VLOG(10) << "Build graph from custom op: " << __op_type; - auto it = custom_ops_.find(__op_type); - auto output_ids = - builder_->customOp(it->second.popart_op, it->second.popart_op.version, - inputs, outputs.size(), attributes, debug_context); - SetIpuIndexStage(output_ids, op_desc); - InsertTensors(outputs, output_ids); - } else if (op_type == "popart_printtensor") { - auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); - auto debug_context = BuildDebugContext(op_desc); - auto print_gradient = - BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient")); - auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title")); - auto output_ids = builder_->aiGraphcoreOpset1().printtensor( - inputs, print_gradient, debug_context, title); - SetIpuIndexStage(output_ids, op_desc); - InsertTensors(outputs, output_ids); - } else { - auto itr = name_function_.find(op_type); - if (itr != name_function_.end()) { - itr->second(node->Op()); - } else { - PADDLE_THROW(platform::errors::NotFound( - "%s is not registered, please check for unsupported operators for " - "running on IPU", - op_type)); - } - } - } - VLOG(10) << "leave Compiler::LowerBody"; -} - -void Compiler::InitInputs(Graph* graph, - const std::vector& feed_list) { +void Compiler::InitInputs(const std::vector& feed_list) { for (const auto& feed_name : feed_list) { - feed_list_.push_back(feed_name); - for (const Node* n : graph->Nodes()) { - if (n->IsVar()) { - auto* var_desc = n->Var(); - if (feed_name == var_desc->Name()) { - VLOG(10) << "feed_name= " << var_desc->Name(); - auto data_type = VarType2PopartType(var_desc->GetDataType()); - popart::TensorInfo input_info{data_type, var_desc->GetShape()}; - VLOG(10) << "popart input_info = " << input_info; - popart::TensorId tensor_id = - builder_->addInputTensor(input_info, feed_name); - VLOG(10) << "popart input tensor id = " << tensor_id; - resources_->inputs.push_back(tensor_id); - resources_->tensors.emplace(var_desc->Name(), tensor_id); - } - } - } + auto* node = graph_helper_->vars_name_map[feed_name]; + auto* var_desc = node->Var(); + VLOG(10) << "feed_name= " << var_desc->Name(); + auto data_type = VarType2PopartType(var_desc->GetDataType()); + popart::TensorInfo input_info{data_type, var_desc->GetShape()}; + VLOG(10) << "popart input_info = " << input_info; + popart::TensorId tensor_id = + builder_->addInputTensor(input_info, feed_name); + VLOG(10) << "popart input tensor id = " << tensor_id; + resources_->inputs.push_back(tensor_id); + resources_->tensors.emplace(var_desc->Name(), tensor_id); } } void Compiler::InitOutputs(const std::vector& fetch_list) { for (const auto& fetch_name : fetch_list) { - fetch_list_.push_back(fetch_name); auto tensor = resources_->tensors.find(fetch_name); PADDLE_ENFORCE_NE( tensor, resources_->tensors.end(), @@ -271,14 +216,10 @@ void Compiler::InitOutputs(const std::vector& fetch_list) { } } -void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { +void Compiler::LowerConstants(const Scope* scope) { auto& kid_scope = scope->NewScope(); VLOG(10) << "enter Compiler::LowerConstants"; - for (auto* node : graph->Nodes()) { - if (!node->IsOp()) { - continue; - } - + for (auto* node : graph_helper_->sorted_ops) { auto* op_desc = node->Op(); auto op_type = op_desc->Type(); if (op_type == "popart_constant") { @@ -308,17 +249,16 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { VLOG(10) << "leave Compiler::LowerConstants"; } -void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { +void Compiler::LowerWeights(const Scope* scope) { VLOG(10) << "enter Compiler::LowerWeights"; - PADDLE_ENFORCE_NOT_NULL(scope, - platform::errors::PreconditionNotMet( - "You should call set_scope before LowerWeights")); // at this step, the graph doesn't contains optimizer related states - for (const auto* node : graph->Nodes()) { + for (auto id : graph_helper_->sorted_vars_id) { + auto* node = graph_helper_->nodes_id_map[id]; if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { if (node->Var()->Persistable() && node->inputs.empty()) { auto var_name = node->Var()->Name(); if (resources_->tensors.count(var_name) != 0) { + VLOG(10) << "found existed one, skip lowering Weight: " << var_name; continue; } VLOG(10) << "lowering weight: " << var_name; @@ -344,12 +284,68 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { VLOG(10) << "leave Compiler::LowerWeights"; } -void Compiler::LowerOptimier(const Graph* graph, const Scope* scope) { - for (auto* node : graph->Nodes()) { - if (!node->IsOp()) { - continue; +void Compiler::LowerBody() { + VLOG(10) << "enter Compiler::LowerBody"; + for (auto* node : graph_helper_->sorted_ops) { + auto* op_desc = node->Op(); + auto op_type = op_desc->Type(); + VLOG(10) << "lowering op: " << op_type; + + if (op_type == "popart_constant") { + // pass + } else if (op_type == "popart_optimizer") { + // pass + } else if (op_type == "popart_checkpointoutput") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto output_ids = builder_->checkpointOutput(inputs); + InsertTensors(outputs, output_ids); + } else if (op_type == "popart_custom_op") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto debug_context = BuildDebugContext(op_desc); + auto attributes = std::map{}; + for (auto& attr : op_desc->GetAttrMap()) { + CustomOpAttrVisitor visitor(&attributes, attr.first); + boost::apply_visitor(visitor, attr.second); + } + auto __op_type = + BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type")); + VLOG(10) << "Build graph from custom op: " << __op_type; + auto it = custom_ops_.find(__op_type); + auto output_ids = + builder_->customOp(it->second.popart_op, it->second.popart_op.version, + inputs, outputs.size(), attributes, debug_context); + SetIpuIndexStage(output_ids, op_desc); + InsertTensors(outputs, output_ids); + } else if (op_type == "popart_printtensor") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto debug_context = BuildDebugContext(op_desc); + auto print_gradient = + BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient")); + auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title")); + auto output_ids = builder_->aiGraphcoreOpset1().printtensor( + inputs, print_gradient, debug_context, title); + SetIpuIndexStage(output_ids, op_desc); + InsertTensors(outputs, output_ids); + } else { + auto itr = name_function_.find(op_type); + if (itr != name_function_.end()) { + itr->second(node->Op()); + } else { + PADDLE_THROW(platform::errors::NotFound( + "%s is not registered, please check for unsupported operators for " + "running on IPU", + op_type)); + } } + } + VLOG(10) << "leave Compiler::LowerBody"; +} +void Compiler::LowerOptimizer(const Scope* scope) { + for (auto* node : graph_helper_->sorted_ops) { auto* op_desc = node->Op(); auto op_type = op_desc->Type(); if (op_type == "popart_optimizer") { diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.h b/paddle/fluid/platform/device/ipu/ipu_compiler.h index 5576266b1a771682ef949c9825309b64c08c0531..5d1e8c2727d8f9ca36c9380584505dbfcabfb064 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.h +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.h @@ -68,34 +68,29 @@ struct CompilerResources { std::unique_ptr optimizer; }; +// helper for lowering graph +struct GraphHelper { + explicit GraphHelper(const Graph *); + + const Graph *graph; + std::map vars_name_map; + std::map nodes_id_map; + std::vector sorted_ops; + std::vector sorted_vars_id; +}; + class Compiler { public: Compiler(); ~Compiler(); - void RegisterOpFunc(); - void Prepare(); - void LowerBody(const Graph *graph); - void InitInputs(Graph *graph, const std::vector &feed_list); + void Prepare(const Graph *graph); + void InitInputs(const std::vector &feed_list); void InitOutputs(const std::vector &fetch_list); - void LowerConstants(const Graph *graph, const Scope *scope); - void LowerWeights(const Graph *graph, const Scope *scope); - void LowerOptimier(const Graph *graph, const Scope *scope); - - void InsertTensors(const std::vector &output_names, - const std::vector &tensor_ids); - void InsertTensors(const std::vector &output_names, - const std::string &tensor_id); - void SetIpuIndexStage(const std::vector &tensor_ids, - const OpDesc *op_desc); - void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc); - void SetAMPAttributes(const std::vector &tensor_ids, - const OpDesc *op_desc); - void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc); - void SetSerializeAttributes(const std::vector &tensor_ids, - const OpDesc *op_desc); - void SetSerializeAttributes(const std::string &tensor_id, - const OpDesc *op_desc); + void LowerConstants(const Scope *scope); + void LowerWeights(const Scope *scope); + void LowerBody(); + void LowerOptimizer(const Scope *scope); void SetIpuStrategy(const IpuStrategy &strategy) { ipu_strategy_ = &strategy; @@ -112,21 +107,34 @@ class Compiler { void SaveModelProtoNoCheck(const std::string &path); private: + void RegisterOpFunc(); std::vector GetOpInputs(const OpDesc *op); const std::vector &GetOpOutputs(const OpDesc *op); popart::DebugContext BuildDebugContext(const OpDesc *op); + void InsertTensors(const std::vector &output_names, + const std::vector &tensor_ids); + void InsertTensors(const std::vector &output_names, + const std::string &tensor_id); + void SetIpuIndexStage(const std::vector &tensor_ids, + const OpDesc *op_desc); + void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc); + void SetAMPAttributes(const std::vector &tensor_ids, + const OpDesc *op_desc); + void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc); + void SetSerializeAttributes(const std::vector &tensor_ids, + const OpDesc *op_desc); + void SetSerializeAttributes(const std::string &tensor_id, + const OpDesc *op_desc); + private: std::unique_ptr builder_; std::unique_ptr resources_; + std::unique_ptr graph_helper_; using OpFunc = std::function; std::unordered_map name_function_; - // feed_list_ & fetch_list save paddle tensor id - std::vector feed_list_; - std::vector fetch_list_; - const IpuStrategy *ipu_strategy_ = nullptr; std::map custom_ops_; }; diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc index 4a9b9c00cb75cd042bab527532de3314075e6dcd..943dfcc6cffb875fc3cebfc88e35adeaba47fd63 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc @@ -241,6 +241,15 @@ IpuStrategy::IpuStrategy() { #undef ADD_POPART_BOOL_OPTION_ALIAS #undef ADD_POPART_ENUM_OPTION_ALIAS + RegisterGetter(vector_options_getter, options_type, "custom_ops", "vector", + [&]() { + std::vector res; + for (auto x : custom_ops) { + res.push_back(x.repr()); + } + return res; + }); + RegisterSetter(bool_options, "enable_manual_shard", [&](bool value) { if (value) { popart_options.virtualGraphMode = popart::VirtualGraphMode::Manual; @@ -429,6 +438,14 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor, } } +void IpuStrategy::AddCustomOp(const std::string& paddle_op, + const std::string& popart_op, + const std::string& domain, int version) { + LOG(INFO) << "IpuStrategy add custom op: " << paddle_op; + custom_ops.push_back( + IpuCustomOpIdentifier(paddle_op, popart_op, domain, version)); +} + std::string IpuStrategy::GetOption(const std::string& option) { return get(option, options_getter); } diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index 0e2af26454c401960773de20744f285aecec6bed..64436dc14fec3393b0a2a4473ad436d7d08f5217 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/platform/device/ipu/ipu_utils.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -71,6 +72,9 @@ struct IpuStrategy { // popart pattern manager popart::Patterns popart_patterns; + // custom ops + std::vector custom_ops; + private: std::map> bool_options; std::map> uint64_options; @@ -123,6 +127,8 @@ struct IpuStrategy { const std::string &value); void SetTensorLocation(const std::string &tensor, const std::string &option, std::uint64_t value); + void AddCustomOp(const std::string &paddle_op, const std::string &popart_op, + const std::string &domain, int version); std::string GetOption(const std::string &); std::vector GetVectorOption(const std::string &);