未验证 提交 d945e24c 编写于 作者: A Allen Guo 提交者: GitHub

sync recent changes (#39763)

上级 df1dbff1
......@@ -147,7 +147,7 @@ if(WITH_IPU)
pass_library(ipu_runtime_replacer_pass base DIR ipu)
pass_library(inference_process_pass base DIR ipu)
pass_library(inference_postprocess_pass base DIR ipu)
pass_library(popart_canonicalization_pass base DIR ipu)
pass_library(popart_canonicalization_pass base DIR ipu DEPS paddle_ipu)
pass_library(ipu_inplace_pass base DIR ipu)
pass_library(infer_shape_pass base DIR ipu)
pass_library(delete_scale_op_pass base DIR ipu)
......
......@@ -56,7 +56,7 @@ const bool is_regularization_op(const std::string& op_namescope) {
}
void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
// 这里构建的 op 符合 popart 的定义, 涉及到的一些值需要在 LowerOptimier 时获得
// optimizer values will be extracted when lowering optimizer in ipu_backend
OpDesc new_op("popart_optimizer", {}, {}, {});
new_op.SetAttr("op_role", 0);
new_op.SetAttr("with_lr_sched", false);
......@@ -86,7 +86,7 @@ void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
bool is_regularization = is_regularization_op(op_namescope);
VLOG(10) << "found optimizer releated op: " << op_type;
// initial larning_rate will be set in LowerOptimier
// initial larning_rate will be set in ipu_backend
set_ops.insert(op_type);
if (op_type == "sgd") {
auto type = std::string{"sgd"};
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
......@@ -28,11 +29,8 @@ void PopartCanonicalizationPass::ApplyImpl(ir::Graph* graph) const {
auto custom_ops = Get<std::unordered_set<std::string>>("custom_ops");
std::vector<std::string> missing_ops;
auto nodes = graph->Nodes();
for (auto* node : nodes) {
if (!node->IsOp()) {
continue;
}
auto sorted_ops = TopologySortOperations(*graph);
for (auto* node : sorted_ops) {
auto* op = node->Op();
auto op_type = op->Type();
......
......@@ -13,9 +13,9 @@ IF(WITH_IPU)
"ipu_device.cc"
)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart graph graph_helper)
cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart enforce)
cc_library(paddle_ipu SHARED SRCS ${PADDLE_IPU_SRC} DEPS popart graph_helper)
cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart-only graph graph_helper)
cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart-only enforce)
add_library(paddle_ipu SHARED ${PADDLE_IPU_SRC})
add_dependencies(paddle_ipu ipu_backend)
set(PADDLE_IPU_LIB "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_ipu.so" CACHE STRING "")
set(PADDLE_IPU_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE STRING "")
......
......@@ -43,17 +43,17 @@ void IpuBackend::Compile(Graph* graph,
const std::vector<std::string>& feed_list,
const std::vector<std::string>& fetch_list) {
VLOG(10) << "enter IpuBackend::Compile";
compiler_->Prepare();
executor_->SetCompilerResources(compiler_->GetResources());
compiler_->InitInputs(graph, feed_list);
compiler_->LowerConstants(graph, scope_);
compiler_->LowerWeights(graph, scope_);
compiler_->LowerBody(graph);
compiler_->Prepare(graph);
compiler_->InitInputs(feed_list);
compiler_->LowerConstants(scope_);
compiler_->LowerWeights(scope_);
compiler_->LowerBody();
compiler_->InitOutputs(fetch_list);
if (ipu_strategy_->is_training) {
compiler_->LowerOptimier(graph, scope_);
compiler_->LowerOptimizer(scope_);
}
executor_->SetCompilerResources(compiler_->GetResources());
is_compiled_ = true;
// when call compile, means a new graph
is_prepared_ = false;
......@@ -95,11 +95,9 @@ void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) {
ipu_strategy_ = &strategy;
compiler_->SetIpuStrategy(strategy);
executor_->SetIpuStrategy(strategy);
}
void IpuBackend::SetCustomOps(
const std::vector<IpuCustomOpIdentifier>& custom_ops) {
compiler_->SetCustomOps(custom_ops);
if (!strategy.custom_ops.empty()) {
compiler_->SetCustomOps(strategy.custom_ops);
}
}
void IpuBackend::SaveModelProto(const std::string& path) {
......
......@@ -71,7 +71,6 @@ class IpuBackend {
const Scope *GetScope() { return scope_; }
void SetIpuStrategy(const IpuStrategy &strategy);
const IpuStrategy *GetIpuStrategy() { return ipu_strategy_; }
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
// save compiled model to onnx
void SaveModelProto(const std::string &path);
......
......@@ -98,6 +98,19 @@ TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) {
}
}
GraphHelper::GraphHelper(const Graph* g) {
graph = g;
sorted_ops = framework::ir::TopologySortOperations(*g);
for (auto* node : g->Nodes()) {
nodes_id_map[node->id()] = node;
if (node->IsVar()) {
vars_name_map[node->Name()] = node;
sorted_vars_id.push_back(node->id());
}
}
std::sort(sorted_vars_id.begin(), sorted_vars_id.end());
}
Compiler::Compiler() { RegisterOpFunc(); }
Compiler::~Compiler() {
......@@ -105,9 +118,10 @@ Compiler::~Compiler() {
resources_.reset();
}
void Compiler::Prepare() {
void Compiler::Prepare(const Graph* graph) {
builder_ = popart::Builder::create();
resources_ = std::make_unique<CompilerResources>();
graph_helper_ = std::make_unique<GraphHelper>(graph);
}
void Compiler::RegisterOpFunc() {
......@@ -171,75 +185,10 @@ void Compiler::RegisterOpFunc() {
#undef INT_VEC
}
void Compiler::LowerBody(const Graph* graph) {
VLOG(10) << "enter Compiler::LowerBody";
auto nodes = framework::ir::TopologySortOperations(*graph);
for (auto* node : nodes) {
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
VLOG(10) << "lowering op: " << op_type;
if (op_type == "popart_constant") {
// pass
} else if (op_type == "popart_optimizer") {
// pass
} else if (op_type == "popart_checkpointoutput") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto output_ids = builder_->checkpointOutput(inputs);
InsertTensors(outputs, output_ids);
} else if (op_type == "popart_custom_op") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto debug_context = BuildDebugContext(op_desc);
auto attributes = std::map<std::string, popart::any>{};
for (auto& attr : op_desc->GetAttrMap()) {
CustomOpAttrVisitor visitor(&attributes, attr.first);
boost::apply_visitor(visitor, attr.second);
}
auto __op_type =
BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
VLOG(10) << "Build graph from custom op: " << __op_type;
auto it = custom_ops_.find(__op_type);
auto output_ids =
builder_->customOp(it->second.popart_op, it->second.popart_op.version,
inputs, outputs.size(), attributes, debug_context);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else if (op_type == "popart_printtensor") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto debug_context = BuildDebugContext(op_desc);
auto print_gradient =
BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
inputs, print_gradient, debug_context, title);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else {
auto itr = name_function_.find(op_type);
if (itr != name_function_.end()) {
itr->second(node->Op());
} else {
PADDLE_THROW(platform::errors::NotFound(
"%s is not registered, please check for unsupported operators for "
"running on IPU",
op_type));
}
}
}
VLOG(10) << "leave Compiler::LowerBody";
}
void Compiler::InitInputs(Graph* graph,
const std::vector<std::string>& feed_list) {
void Compiler::InitInputs(const std::vector<std::string>& feed_list) {
for (const auto& feed_name : feed_list) {
feed_list_.push_back(feed_name);
for (const Node* n : graph->Nodes()) {
if (n->IsVar()) {
auto* var_desc = n->Var();
if (feed_name == var_desc->Name()) {
auto* node = graph_helper_->vars_name_map[feed_name];
auto* var_desc = node->Var();
VLOG(10) << "feed_name= " << var_desc->Name();
auto data_type = VarType2PopartType(var_desc->GetDataType());
popart::TensorInfo input_info{data_type, var_desc->GetShape()};
......@@ -250,14 +199,10 @@ void Compiler::InitInputs(Graph* graph,
resources_->inputs.push_back(tensor_id);
resources_->tensors.emplace(var_desc->Name(), tensor_id);
}
}
}
}
}
void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) {
for (const auto& fetch_name : fetch_list) {
fetch_list_.push_back(fetch_name);
auto tensor = resources_->tensors.find(fetch_name);
PADDLE_ENFORCE_NE(
tensor, resources_->tensors.end(),
......@@ -271,14 +216,10 @@ void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) {
}
}
void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
void Compiler::LowerConstants(const Scope* scope) {
auto& kid_scope = scope->NewScope();
VLOG(10) << "enter Compiler::LowerConstants";
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
}
for (auto* node : graph_helper_->sorted_ops) {
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
if (op_type == "popart_constant") {
......@@ -308,17 +249,16 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
VLOG(10) << "leave Compiler::LowerConstants";
}
void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
void Compiler::LowerWeights(const Scope* scope) {
VLOG(10) << "enter Compiler::LowerWeights";
PADDLE_ENFORCE_NOT_NULL(scope,
platform::errors::PreconditionNotMet(
"You should call set_scope before LowerWeights"));
// at this step, the graph doesn't contains optimizer related states
for (const auto* node : graph->Nodes()) {
for (auto id : graph_helper_->sorted_vars_id) {
auto* node = graph_helper_->nodes_id_map[id];
if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
if (node->Var()->Persistable() && node->inputs.empty()) {
auto var_name = node->Var()->Name();
if (resources_->tensors.count(var_name) != 0) {
VLOG(10) << "found existed one, skip lowering Weight: " << var_name;
continue;
}
VLOG(10) << "lowering weight: " << var_name;
......@@ -344,12 +284,68 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
VLOG(10) << "leave Compiler::LowerWeights";
}
void Compiler::LowerOptimier(const Graph* graph, const Scope* scope) {
for (auto* node : graph->Nodes()) {
if (!node->IsOp()) {
continue;
void Compiler::LowerBody() {
VLOG(10) << "enter Compiler::LowerBody";
for (auto* node : graph_helper_->sorted_ops) {
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
VLOG(10) << "lowering op: " << op_type;
if (op_type == "popart_constant") {
// pass
} else if (op_type == "popart_optimizer") {
// pass
} else if (op_type == "popart_checkpointoutput") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto output_ids = builder_->checkpointOutput(inputs);
InsertTensors(outputs, output_ids);
} else if (op_type == "popart_custom_op") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto debug_context = BuildDebugContext(op_desc);
auto attributes = std::map<std::string, popart::any>{};
for (auto& attr : op_desc->GetAttrMap()) {
CustomOpAttrVisitor visitor(&attributes, attr.first);
boost::apply_visitor(visitor, attr.second);
}
auto __op_type =
BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
VLOG(10) << "Build graph from custom op: " << __op_type;
auto it = custom_ops_.find(__op_type);
auto output_ids =
builder_->customOp(it->second.popart_op, it->second.popart_op.version,
inputs, outputs.size(), attributes, debug_context);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else if (op_type == "popart_printtensor") {
auto inputs = GetOpInputs(op_desc);
auto outputs = GetOpOutputs(op_desc);
auto debug_context = BuildDebugContext(op_desc);
auto print_gradient =
BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
inputs, print_gradient, debug_context, title);
SetIpuIndexStage(output_ids, op_desc);
InsertTensors(outputs, output_ids);
} else {
auto itr = name_function_.find(op_type);
if (itr != name_function_.end()) {
itr->second(node->Op());
} else {
PADDLE_THROW(platform::errors::NotFound(
"%s is not registered, please check for unsupported operators for "
"running on IPU",
op_type));
}
}
}
VLOG(10) << "leave Compiler::LowerBody";
}
void Compiler::LowerOptimizer(const Scope* scope) {
for (auto* node : graph_helper_->sorted_ops) {
auto* op_desc = node->Op();
auto op_type = op_desc->Type();
if (op_type == "popart_optimizer") {
......
......@@ -68,34 +68,29 @@ struct CompilerResources {
std::unique_ptr<popart::Optimizer> optimizer;
};
// helper for lowering graph
struct GraphHelper {
explicit GraphHelper(const Graph *);
const Graph *graph;
std::map<std::string, Node *> vars_name_map;
std::map<int, Node *> nodes_id_map;
std::vector<Node *> sorted_ops;
std::vector<int> sorted_vars_id;
};
class Compiler {
public:
Compiler();
~Compiler();
void RegisterOpFunc();
void Prepare();
void LowerBody(const Graph *graph);
void InitInputs(Graph *graph, const std::vector<std::string> &feed_list);
void Prepare(const Graph *graph);
void InitInputs(const std::vector<std::string> &feed_list);
void InitOutputs(const std::vector<std::string> &fetch_list);
void LowerConstants(const Graph *graph, const Scope *scope);
void LowerWeights(const Graph *graph, const Scope *scope);
void LowerOptimier(const Graph *graph, const Scope *scope);
void InsertTensors(const std::vector<std::string> &output_names,
const std::vector<std::string> &tensor_ids);
void InsertTensors(const std::vector<std::string> &output_names,
const std::string &tensor_id);
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetSerializeAttributes(const std::string &tensor_id,
const OpDesc *op_desc);
void LowerConstants(const Scope *scope);
void LowerWeights(const Scope *scope);
void LowerBody();
void LowerOptimizer(const Scope *scope);
void SetIpuStrategy(const IpuStrategy &strategy) {
ipu_strategy_ = &strategy;
......@@ -112,21 +107,34 @@ class Compiler {
void SaveModelProtoNoCheck(const std::string &path);
private:
void RegisterOpFunc();
std::vector<std::string> GetOpInputs(const OpDesc *op);
const std::vector<std::string> &GetOpOutputs(const OpDesc *op);
popart::DebugContext BuildDebugContext(const OpDesc *op);
void InsertTensors(const std::vector<std::string> &output_names,
const std::vector<std::string> &tensor_ids);
void InsertTensors(const std::vector<std::string> &output_names,
const std::string &tensor_id);
void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
const OpDesc *op_desc);
void SetSerializeAttributes(const std::string &tensor_id,
const OpDesc *op_desc);
private:
std::unique_ptr<popart::Builder> builder_;
std::unique_ptr<CompilerResources> resources_;
std::unique_ptr<GraphHelper> graph_helper_;
using OpFunc = std::function<void(OpDesc *op_desc)>;
std::unordered_map<std::string, OpFunc> name_function_;
// feed_list_ & fetch_list save paddle tensor id
std::vector<std::string> feed_list_;
std::vector<std::string> fetch_list_;
const IpuStrategy *ipu_strategy_ = nullptr;
std::map<std::string, IpuCustomOpIdentifier> custom_ops_;
};
......
......@@ -241,6 +241,15 @@ IpuStrategy::IpuStrategy() {
#undef ADD_POPART_BOOL_OPTION_ALIAS
#undef ADD_POPART_ENUM_OPTION_ALIAS
RegisterGetter(vector_options_getter, options_type, "custom_ops", "vector",
[&]() {
std::vector<std::string> res;
for (auto x : custom_ops) {
res.push_back(x.repr());
}
return res;
});
RegisterSetter(bool_options, "enable_manual_shard", [&](bool value) {
if (value) {
popart_options.virtualGraphMode = popart::VirtualGraphMode::Manual;
......@@ -429,6 +438,14 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor,
}
}
void IpuStrategy::AddCustomOp(const std::string& paddle_op,
const std::string& popart_op,
const std::string& domain, int version) {
LOG(INFO) << "IpuStrategy add custom op: " << paddle_op;
custom_ops.push_back(
IpuCustomOpIdentifier(paddle_op, popart_op, domain, version));
}
std::string IpuStrategy::GetOption(const std::string& option) {
return get(option, options_getter);
}
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <popart/patterns/patterns.hpp>
#include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp>
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......@@ -71,6 +72,9 @@ struct IpuStrategy {
// popart pattern manager
popart::Patterns popart_patterns;
// custom ops
std::vector<IpuCustomOpIdentifier> custom_ops;
private:
std::map<std::string, std::function<void(bool)>> bool_options;
std::map<std::string, std::function<void(std::uint64_t)>> uint64_options;
......@@ -123,6 +127,8 @@ struct IpuStrategy {
const std::string &value);
void SetTensorLocation(const std::string &tensor, const std::string &option,
std::uint64_t value);
void AddCustomOp(const std::string &paddle_op, const std::string &popart_op,
const std::string &domain, int version);
std::string GetOption(const std::string &);
std::vector<std::string> GetVectorOption(const std::string &);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册