From cd6ed0e3447596b7a539cb9563dc55a0565de9e5 Mon Sep 17 00:00:00 2001 From: jjfeing Date: Tue, 28 Apr 2020 22:15:40 +0800 Subject: [PATCH] support buffer fusion --- .../parallel_compile/tbe_compiler/common.py | 10 +- mindspore/ccsrc/kernel/kernel_fusion.cc | 3 +- mindspore/ccsrc/kernel/tbe/tbe_adapter.cc | 46 +++++ mindspore/ccsrc/kernel/tbe/tbe_adapter.h | 13 +- .../ccsrc/kernel/tbe/tbe_kernel_build.cc | 157 +++++++++++----- mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h | 15 +- .../ascend/ascend_backend_optimization.cc | 2 + .../ascend/buffer_fusion/buffer_fusion.cc | 176 +++++++++--------- .../ascend/buffer_fusion/buffer_fusion.h | 1 - .../ir_fusion/refresh_parameter_format.cc | 71 +++++++ .../ir_fusion/refresh_parameter_format.h | 40 ++++ .../ccsrc/session/anf_runtime_algorithm.cc | 4 +- mindspore/ccsrc/utils/utils.h | 4 + mindspore/ops/_op_impl/tbe/reduce_mean.py | 1 + mindspore/ops/operations/nn_ops.py | 1 + 15 files changed, 386 insertions(+), 158 deletions(-) create mode 100644 mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc create mode 100644 mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/common.py b/mindspore/_extends/parallel_compile/tbe_compiler/common.py index 6258cf8d4..39866d2ba 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/common.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/common.py @@ -122,10 +122,12 @@ def get_args(op_info, arg_type): elif arg_type == 'attrs': for item in op_info[arg_type]: - if 'value' not in item: - raise ValueError("Json string Errors, attr key:value not found.") - if item["name"] != "isRef": - args.append(item['value']) + if item["valid"]: + if 'value' not in item: + raise ValueError("Json string Errors, attr key:value not found.") + if item["name"] != "isRef": + args.append(item['value']) + return args diff --git a/mindspore/ccsrc/kernel/kernel_fusion.cc b/mindspore/ccsrc/kernel/kernel_fusion.cc index cd8936f21..4e1ad97e2 100644 --- a/mindspore/ccsrc/kernel/kernel_fusion.cc +++ b/mindspore/ccsrc/kernel/kernel_fusion.cc @@ -108,7 +108,8 @@ std::map KernelFusion(const std::vector } if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { - MS_LOG(DEBUG) << "fuison op build failed, err log: " << task_result << " change to single op build."; + MS_LOG(INFO) << "Fusion warning: Fuison op build failed, err log: " << task_result + << " change to single op build."; build_failed_num++; } auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false); diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc index 44750fab4..2e2e27cbc 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc @@ -153,6 +153,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector &inputs_list, + std::vector *inputs_json) { + MS_EXCEPTION_IF_NULL(inputs_json); + if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { + (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json))); + } else { + if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { + inputs_json->emplace_back(inputs_list[2]); + inputs_json->emplace_back(inputs_list[0]); + inputs_json->emplace_back(inputs_list[1]); + for (size_t i = 3; i < inputs_list.size(); ++i) { + inputs_json->emplace_back(inputs_list[i]); + } + } else { + inputs_json->emplace_back(inputs_list[1]); + inputs_json->emplace_back(inputs_list[0]); + for (size_t i = 2; i < inputs_list.size(); ++i) { + inputs_json->emplace_back(inputs_list[i]); + } + } + } +} + +void TbeAdapter::FusionDataOrderPass(const std::string &op_name, const std::vector &data_layer, + std::vector *reorder_data_layer) { + MS_EXCEPTION_IF_NULL(reorder_data_layer); + if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { + (void)std::copy(data_layer.begin(), data_layer.end(), std::back_inserter((*reorder_data_layer))); + } else { + if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { + reorder_data_layer->emplace_back(data_layer[2]); + reorder_data_layer->emplace_back(data_layer[0]); + reorder_data_layer->emplace_back(data_layer[1]); + for (size_t i = 3; i < data_layer.size(); ++i) { + reorder_data_layer->emplace_back(data_layer[i]); + } + } else { + reorder_data_layer->emplace_back(data_layer[1]); + reorder_data_layer->emplace_back(data_layer[0]); + for (size_t i = 2; i < data_layer.size(); ++i) { + reorder_data_layer->emplace_back(data_layer[i]); + } + } + } +} + std::map TbeAdapter::build_json_attr_pass_map_ = { {"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass}, {"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass}, diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h index 27f6d315f..0208d6c6a 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h @@ -44,15 +44,12 @@ class TbeAdapter { static void GenTopKV2IndicesTensorInfo(const std::shared_ptr &anf_node, size_t real_input_index, std::vector *input_list, kCreaterType creater_type); + static void FusionInputOrderPass(const std::string &op_name, const std::vector &inputs_list, + std::vector *inputs_json); + static void FusionDataOrderPass(const std::string &op_name, const std::vector &data_layer, + std::vector *reorder_data_layer); + private: - static void Conv2DAttrJsonPass(const AnfNodePtr &anf_node, const std::vector> &op_info_attrs, - nlohmann::json *attrs_json); - static void Conv2DBackpropFilterAttrJsonPass(const AnfNodePtr &anf_node, - const std::vector> &op_info_attrs, - nlohmann::json *attrs_json); - static void Conv2DBackpropInputAttrJsonPass(const AnfNodePtr &anf_node, - const std::vector> &op_info_attrs, - nlohmann::json *attrs_json); static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node, const std::vector> &op_info_attrs, nlohmann::json *attrs_json); diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc index 939e7146e..24823b927 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc @@ -375,20 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr &anf_no MS_EXCEPTION_IF_NULL(primitive); for (const auto &attr_ptr : attrs_ptr) { std::string attr_name = attr_ptr->name(); + nlohmann::json attr_obj; + attr_obj["name"] = attr_name; if (primitive->GetAttr(attr_name) != nullptr) { - nlohmann::json attr_obj; auto value = primitive->GetAttr(attr_name); std::string type = attr_ptr->type(); ParseAttrValue(type, value, &attr_obj); - attr_obj["name"] = attr_name; attr_obj["valid"] = true; - (*attrs_json).push_back(attr_obj); } else { - if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") { - MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name - << " is required, but not set."; + if (op_info->impl_path().empty()) { + attr_obj["valid"] = false; + } else { + if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) { + MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name + << " is required, but not set."; + } else { + attr_obj["valid"] = false; + } } } + (*attrs_json).push_back(attr_obj); } return true; } @@ -484,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp MS_EXCEPTION_IF_NULL(fusion_kernel); // get input layer info std::vector> input_layers; - if (!GetInputLayers(input_nodes, compute_nodes, &input_layers)) { + std::map spec_data_input; + if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { return false; } // gen fusion scopre_op jsom @@ -505,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp for (const auto &layer : input_layers) { for (const auto &data_input : layer) { nlohmann::json data_str; - if (!GenFusionDataInputJson(data_input, &data_str, &index)) { - MS_LOG(DEBUG) << "GenFusionDataInputJson faild."; + if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) { + MS_LOG(INFO) << "Fusion error: gen fusion datainput json faild."; return false; } data_list.push_back(data_str); @@ -519,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector &inp } void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_node, size_t node_out_idx, - size_t desc_output_idx, nlohmann::json *output_desc) { + size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { std::string output_desc_name = anf_node->fullname_with_scope(); if (node_out_idx > 0) { output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); @@ -539,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_ (*output_desc)["shape"] = shape; auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); if (format == kOpFormat_DEFAULT) { - if (ori_shape.size() == 4) { - format = kOpFormat_NCHW; - } else { - format = kOpFormat_ND; - } + format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; } (*output_desc)["format"] = format; (*output_desc)["ori_format"] = kOpFormat_NCHW; (*output_desc)["output_index"] = desc_output_idx; + if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { + std::vector spec_shape = {}; + spec_shape.emplace_back(shape[0]); + spec_shape.emplace_back(shape[1]); + spec_shape.emplace_back(shape[2] * shape[3]); + spec_shape.emplace_back(shape[4]); + (*output_desc)["shape"] = spec_shape; + } else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") { + std::vector spec_shape = {}; + spec_shape.emplace_back(shape[0]); + spec_shape.emplace_back(shape[1]); + spec_shape.emplace_back(shape[2] * shape[3]); + spec_shape.emplace_back(16); + (*output_desc)["shape"] = spec_shape; + (*output_desc)["data_type"] = "bool"; + } } void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc) { std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); - (*output_desc)["data_type"] = tbe::TypeIdToString(kNumberTypeFloat32); (*output_desc)["output_index"] = output_index; std::vector shape; (*output_desc)["shape"] = shape; } -bool TbeKernelBuild::GetInputLayers(const vector &input_nodes, - const vector &compute_nodes, - std::vector> *input_layers) { +bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, + const std::vector &reorder_layer, + std::map *spec_data_input) { + if ((op_name == kReluGradV2OpName || op_name == kAddNOpName) && reorder_layer.empty()) { + MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. "; + return false; + } + MS_LOG(INFO) << "Fusion info: op_name: " << op_name << "input layer size: " << reorder_layer.size(); + if (op_name == kReluGradV2OpName) { + (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2; + } else if (op_name == kAddNOpName) { + for (const auto &it : reorder_layer) { + (*spec_data_input)[it] = kFusionAddN; + } + } + return true; +} + +bool TbeKernelBuild::GetInputLayers(const std::vector &input_nodes, + const std::vector &compute_nodes, + std::vector> *input_layers, + std::map *spec_data_input) { + auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) { + auto op_name = AnfAlgo::GetCNodeName(it); + return op_name == kConv2DBackpropInputOpName; + }); + bool need_spec = (result != compute_nodes.end()); size_t input_size = 0; for (const auto &compute_node : compute_nodes) { - std::vector layer; + std::vector layer = {}; + std::vector reorder_layer = {}; MS_EXCEPTION_IF_NULL(compute_node); + auto op_name = AnfAlgo::GetCNodeName(compute_node); auto ccompute_node = compute_node->cast(); if (ccompute_node == nullptr) { - MS_LOG(DEBUG) << "fusion compute node must be cnode"; + MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode"; return false; } + MS_LOG(INFO) << "Fusion info: compute name: " << compute_node->fullname_with_scope(); for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) { auto input = ccompute_node->input(i); auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input); if (find_iter != input_nodes.end()) { + MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); layer.emplace_back((*find_iter)); + } else { + MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() + << ") node's output."; + } + } + TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer); + if (need_spec) { + MS_LOG(INFO) << "Fusion info: match conv2d backprop input + ... patten."; + if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) { + return false; } } - input_size += layer.size(); - input_layers->emplace_back(layer); + input_size += reorder_layer.size(); + input_layers->emplace_back(reorder_layer); } if (input_nodes.size() != input_size) { - MS_LOG(DEBUG) << "fusion scope error, layer input:" << input_size << ", input_node:" << input_nodes.size(); + MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size + << ", input_node:" << input_nodes.size(); return false; } return true; } -bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr &data_input, nlohmann::json *data_str, - size_t *index) { +bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr &data_input, + const std::map &spec_data_input, + nlohmann::json *data_str, size_t *index) { MS_EXCEPTION_IF_NULL(data_str); MS_EXCEPTION_IF_NULL(index); std::vector output_desc_list; @@ -604,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr output_desc_list.push_back(output_desc); (*index)++; } else { + FusionDataType fusion_data_type = kFusionNormal; + if (spec_data_input.find(data_input) != spec_data_input.end()) { + fusion_data_type = spec_data_input.at(data_input); + } auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); auto real_node = kernel_idx.first; size_t real_idx = kernel_idx.second; MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx; // "output_desc" nlohmann::json output_desc; - GenDescJson(real_node, real_idx, real_idx, &output_desc); + GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); output_desc_list.push_back(output_desc); (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope()); } @@ -632,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { auto real_input_size = cnode->inputs().size() - 1; auto dyn_input_size = dyn_input_sizes.size(); if (dyn_input_size != 1) { - MS_LOG(DEBUG) << "fusion build not support dyn_input_sizes > 1"; + MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1"; return ret; } if (IntToSize(dyn_input_sizes[0]) != real_input_size) { - MS_LOG(DEBUG) << " dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" << real_input_size; + MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" + << real_input_size; return ret; } ret = true; @@ -663,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, std::vector *input_desc_list, size_t *index) { MS_EXCEPTION_IF_NULL(cnode); MS_EXCEPTION_IF_NULL(input_desc_list); + std::vector input_desc_list_tmp = {}; bool is_dynamic_input = IsDynamicInput(cnode); for (size_t i = 1; i < cnode->inputs().size(); ++i) { auto input = cnode->input(i); @@ -676,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, MS_LOG(INFO) << "node has dynamic input."; input_desc["dyn_index"] = (i - 1); } - (*input_desc_list).emplace_back(input_desc); + input_desc_list_tmp.emplace_back(input_desc); } size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); if (optional_num > 0) { @@ -686,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index); (*index)++; (*layer_iter)->emplace_back(nullptr); - (*input_desc_list).emplace_back(optional_input_desc); + input_desc_list_tmp.emplace_back(optional_input_desc); } } + auto op_name = AnfAlgo::GetCNodeName(cnode); + TbeAdapter::FusionInputOrderPass(op_name, input_desc_list_tmp, input_desc_list); return true; } std::vector TbeKernelBuild::GetDescOutputIndex(const std::vector &output_used_nums) { std::vector desc_output_index = {}; - bool find_reused = false; - size_t reused_num = 0; for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { auto output_use_num_item = output_used_nums[idx]; MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item; - if (output_use_num_item == 1 || output_use_num_item == 0) { + desc_output_index.emplace_back(idx); + if (output_use_num_item > 1) { desc_output_index.emplace_back(idx); - } else { - if (!find_reused) { - desc_output_index.emplace_back(idx); - } else { - desc_output_index.emplace_back(desc_output_index[idx - 1]); - } - reused_num += (output_use_num_item - 1); - find_reused = true; } } - auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1; - for (size_t i = 0; i < reused_num; ++i) { - desc_output_index.emplace_back(pad_value); - } return desc_output_index; } @@ -811,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto } auto ret = GetIOSizeImpl(data_output); input_size_list->push_back(ret); + MS_LOG(INFO) << "Fusion info: scope input name: " << op["name"] << ", size: " << ret; } } } @@ -819,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); auto real_node = kernel_idx.first; size_t real_idx = kernel_idx.second; + auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); + MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; for (const auto &op : fusion_op_list) { - auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); if (op["name"] == normal_name) { auto op_output_desces = op["output_desc"]; if (output_node != real_node) { // tuple_get item - MS_LOG(DEBUG) << "output is a tuple getitem node"; + MS_LOG(INFO) << "output is a tuple getitem node"; auto output_desc = op_output_desces[real_idx]; if (output_desc["shape"].empty()) { - continue; + MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; + return false; } auto ret = GetIOSizeImpl(output_desc); output_size_list->push_back(ret); + MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; } else { for (const auto &output_desc : op_output_desces) { if (output_desc["shape"].empty()) { + MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; continue; } auto ret = GetIOSizeImpl(output_desc); output_size_list->push_back(ret); + MS_LOG(INFO) << "Fusion info: scope output size: " << ret; } } } diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h index 1a3eee7fd..f6e28327d 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h @@ -35,6 +35,8 @@ namespace kernel { // kernel operate type used for generate json class TbeKernelBuild { + enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2 }; + public: static bool GetIOSize(const nlohmann::json &kernel_json, std::vector *input_size_list, std::vector *output_size_list); @@ -48,8 +50,9 @@ class TbeKernelBuild { private: TbeKernelBuild() = default; ~TbeKernelBuild() = default; - static bool GenFusionDataInputJson(const std::shared_ptr &data_input, nlohmann::json *data_str, - size_t *index); + static bool GenFusionDataInputJson(const std::shared_ptr &data_input, + const std::map &spec_data_input, + nlohmann::json *data_str, size_t *index); static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node, std::vector>::iterator *layer_iter, nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index); @@ -60,13 +63,17 @@ class TbeKernelBuild { static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, std::vector *output_desc_list); static void GenDescJson(const std::shared_ptr &anf_node, size_t node_out_idx, - size_t desc_output_idx, nlohmann::json *output_desc); + size_t desc_output_idx, nlohmann::json *output_desc, + FusionDataType fusion_data_type = kFusionNormal); static void GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc); static size_t GetIOSizeImpl(const nlohmann::json &desc); + static bool GetSpecInputLayers(const std::string &op_name, const std::vector &reorder_layer, + std::map *spec_data_input); static bool GetInputLayers(const std::vector &input_nodes, const std::vector &compute_nodes, - std::vector> *input_layers); + std::vector> *input_layers, + std::map *spec_data_input); static bool IsDynamicInput(const CNodePtr &cnode); static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); }; diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 03284c9ad..09f5aca98 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -38,6 +38,7 @@ #include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h" #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h" #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h" +#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h" #include "pre_activate/ascend/ir_fusion/transdata_split.h" #include "pre_activate/ascend/ir_fission/topk_split.h" @@ -265,6 +266,7 @@ void AscendBackendOptimization(const std::shared_ptr &kern other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); + other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc index abacb9137..851831383 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include "kernel/kernel_fusion.h" #include "debug/anf_ir_dump.h" @@ -261,23 +262,24 @@ CNodePtr CreateFusionOp(const std::vector &inputs_list, const std::v return buffer_fusion_kernel; } -kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector &inputs_list_in, - const std::vector &inputs_list, +kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector &inputs_list, const std::vector &outputs_list) { MS_LOG(DEBUG) << "Start Create Kernel Info"; kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; // inputs format and data type std::vector inputs_format; std::vector inputs_data_type; - for (auto node : inputs_list_in) { - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - auto &inputs = cnode->inputs(); - for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { - if (std::find(inputs_list.begin(), inputs_list.end(), inputs[input_index]) != inputs_list.end()) { - inputs_format.push_back(AnfAlgo::GetInputFormat(node, input_index - 1)); - inputs_data_type.push_back(AnfAlgo::GetInputDeviceDataType(node, input_index - 1)); - } + for (const auto &input : inputs_list) { + if (input->isa() && AnfAlgo::GetCNodeName(input) == prim::kPrimTupleGetItem->name()) { + auto tuple_getitem = input->cast(); + MS_EXCEPTION_IF_NULL(tuple_getitem); + inputs_format.push_back(AnfAlgo::GetOutputFormat( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + } else { + inputs_format.push_back(AnfAlgo::GetOutputFormat(input, 0)); + inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(input, 0)); } } // outputs format and data type @@ -360,62 +362,6 @@ void ReplaceOldNode(std::unordered_map *buffer_fusi } } -void GetInputList(const CNodePtr &node, const int32_t cur_fusion_id, std::vector *inputs_list) { - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(inputs_list); - auto &inputs = node->inputs(); - for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { - auto input = inputs[input_index]; - if (AnfAlgo::IsRealCNodeKernel(input)) { - if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input, kOpAttrFusionId); - if (fusion_id != cur_fusion_id) { - inputs_list->push_back(input); - } - } else { - inputs_list->push_back(input); - } - } else if (input->isa()) { - for (auto &input_in : input->cast()->inputs()) { - if (AnfAlgo::IsRealCNodeKernel(input_in)) { - if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input_in, kOpAttrFusionId); - if (fusion_id != cur_fusion_id) { - inputs_list->push_back(input); - } - } else { - inputs_list->push_back(input); - } - } - } - } else { - inputs_list->push_back(input); - } - } -} - -void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - if ((*buffer_fusion_infos).find(cur_fusion_id) == (*buffer_fusion_infos).end()) { - BufferFusionInfo_t buffer_fusion_info; - (*buffer_fusion_infos)[cur_fusion_id] = buffer_fusion_info; - } - std::vector inputs_list; - GetInputList(node, cur_fusion_id, &inputs_list); - if (!inputs_list.empty()) { - if (!(*buffer_fusion_infos)[cur_fusion_id].inputs_list.empty()) { - (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list.insert( - (*buffer_fusion_infos)[cur_fusion_id].inputs_list.end(), inputs_list.begin(), inputs_list.end()); - (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.insert( - (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.end(), node); - } else { - (*buffer_fusion_infos)[cur_fusion_id].inputs_list = inputs_list; - (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.push_back(node); - } - } -} - void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) { MS_EXCEPTION_IF_NULL(buffer_fusion_infos); @@ -429,6 +375,45 @@ void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, } } +void GetFusionScopeInputNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + auto fusion_id = buffer_fusion_info.first; + auto fusion_info = buffer_fusion_info.second; + for (const auto &node : fusion_info.anf_nodes) { + auto cnode = node->cast(); + for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { + auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == + fusion_info.anf_nodes.end()) { + if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), + (*buffer_fusion_infos)[fusion_id].inputs_list.end(), + cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { + (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); + } + } + } + } + } +} + +bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { + MS_EXCEPTION_IF_NULL(node1); + MS_EXCEPTION_IF_NULL(node2); + auto getitem1 = node1->cast(); + auto getitem2 = node2->cast(); + MS_EXCEPTION_IF_NULL(getitem1); + MS_EXCEPTION_IF_NULL(getitem2); + auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); + auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); + return output_idx1 < output_idx2; +} + void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) { MS_EXCEPTION_IF_NULL(kernel_graph); @@ -454,14 +439,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), std::back_inserter(tuple_getitem_nodes), [](const std::pair &use_node) { return use_node.first; }); - std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), - [](const AnfNodePtr &node1, const AnfNodePtr &node2) { - auto getitem1 = node1->cast(); - auto getitem2 = node2->cast(); - auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); - auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); - return output_idx1 < output_idx2; - }); + std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); for (auto getitem : tuple_getitem_nodes) { auto getitem_ptr = getitem->cast(); auto input2 = getitem_ptr->input(2); @@ -484,6 +462,36 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, } } +void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector &outputs_list, + const AnfNodePtr &fusion_kernel) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + for (size_t idx = 0; idx < outputs_list.size(); ++idx) { + auto output = outputs_list[idx]; + if (output->isa() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { + auto real_output = AnfAlgo::VisitKernel(output, 0); + auto output_cnode = output->cast(); + MS_EXCEPTION_IF_NULL(output_cnode); + auto input2 = output_cnode->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + session::AnfWithOutIndex out_pair(real_output.first, output_idx); + if (kernel_graph->IsInRefOutputMap(out_pair)) { + auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); + session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); + kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); + } + } else { + session::AnfWithOutIndex out_pair(output, 0); + if (kernel_graph->IsInRefOutputMap(out_pair)) { + auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); + session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); + kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); + } + } + } +} + void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { MS_EXCEPTION_IF_NULL(cnode); @@ -634,24 +642,12 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) const { MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - std::vector node_list = TopoSort(kernel_graph->get_return()); - for (auto &node : node_list) { - if (!AnfAlgo::IsRealCNodeKernel(node)) { - continue; - } - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { - auto cur_fusion_id = AnfAlgo::GetNodeAttr(cnode, kOpAttrFusionId); - CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos); - } - } GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); + GetFusionScopeInputNodeList(kernel_graph, buffer_fusion_infos); GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); for (auto &buffer_fusion_info : *buffer_fusion_infos) { buffer_fusion_info.second.kernel_build_info = - CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list, - buffer_fusion_info.second.outputs_list); + CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); } } @@ -743,7 +739,7 @@ bool BufferFusion::ReplaceFusionOp(std::unordered_map anf_nodes; std::vector inputs_list; - std::vector inputs_list_in; std::vector outputs_list; kernel::KernelBuildInfoPtr kernel_build_info; }; diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc new file mode 100644 index 000000000..857670a38 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" +#include "session/anf_runtime_algorithm.h" +#include "utils/utils.h" +#include "operator/ops.h" +#include "device/kernel_info.h" +#include "pre_activate/common/helper.h" +#include "pre_activate/common/optimizer.h" +#include "pre_activate/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +void DoRefresh(const CNodePtr &cnode) { + if (cnode == nullptr) { + MS_LOG(EXCEPTION) << "node is nullptr"; + } + for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); input_index++) { + auto input_kernel_node = AnfAlgo::GetInputNode(cnode, input_index); + if (input_kernel_node->isa()) { + std::shared_ptr builder = + std::make_shared(); + auto cnode_input_format = AnfAlgo::GetInputFormat(cnode, input_index); + auto kernel_node_format = AnfAlgo::GetOutputFormat(input_kernel_node, 0); + auto dtype = AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0); + if (kernel_node_format != cnode_input_format) { + builder->SetOutputsFormat({cnode_input_format}); + builder->SetOutputsDeviceType({dtype}); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); + } + } + } +} + +bool RefreshParameterFormat::Run(const FuncGraphPtr &func_graph) { + if (func_graph == nullptr) { + MS_LOG(ERROR) << "func_graph is nullptr."; + return false; + } + std::vector node_list = TopoSort(func_graph->get_return()); + for (auto node : node_list) { + if (node == nullptr || !node->isa()) { + continue; + } + auto cnode = node->cast(); + if (cnode == nullptr) { + continue; + } + auto node_name = AnfAlgo::GetCNodeName(cnode); + if (node_name == kBNTrainingUpdateOpName) { + DoRefresh(cnode); + } + } + return true; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h new file mode 100644 index 000000000..0ba688b13 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ + +#include +#include +#include +#include "ir/anf.h" +#include "pre_activate/common/pass.h" + +namespace mindspore { +namespace opt { +class RefreshParameterFormat : public Pass { + public: + explicit RefreshParameterFormat(size_t groups = 1) : Pass("refresh_parameter_format"), groups_(groups) {} + ~RefreshParameterFormat() override = default; + bool Run(const FuncGraphPtr &graph) override; + + private: + size_t groups_ = 1; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc index 525ff44dd..6aa79f82d 100644 --- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc @@ -825,6 +825,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n static std::map> spec_node_list = { {prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}}, {prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}}, + {kFusionOpConv2DBackpropInputReluGradV2Name, {{0, 1}, {1, 0}}}, + {kFusionOpConv2DBackpropInputAddNReluGradV2Name, {{0, 1}, {1, 0}}}, {prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}}, {prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}}, {prim::kPrimLayerNormBetaGammaBackprop->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}}}, @@ -835,7 +837,7 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n auto node_name = AnfAlgo::GetCNodeName(anf_node); if (AnfAlgo::GetKernelType(anf_node) == TBE_KERNEL) { auto find = spec_node_list.find(node_name); - if (find != spec_node_list.end()) { + if (find != spec_node_list.end() && cur_index < find->second.size()) { ret = find->second[cur_index]; MS_LOG(INFO) << "Real input index change to" << ret << ", node name:" << node_name; } diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 4dfc4baa3..59d7f27c1 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -122,6 +122,10 @@ constexpr auto kSendOpName = "Send"; constexpr auto kRecvOpName = "Recv"; constexpr auto kReluV2OpName = "ReLUV2"; constexpr auto kReluGradV2OpName = "ReluGradV2"; +constexpr auto kAddNOpName = "AddN"; +constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput"; +constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2"; +constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2"; // attr key name constexpr auto kAttrInputNames = "input_names"; diff --git a/mindspore/ops/_op_impl/tbe/reduce_mean.py b/mindspore/ops/_op_impl/tbe/reduce_mean.py index 47548e903..67b96933a 100644 --- a/mindspore/ops/_op_impl/tbe/reduce_mean.py +++ b/mindspore/ops/_op_impl/tbe/reduce_mean.py @@ -31,6 +31,7 @@ reduce_mean_op_info = TBERegOp("ReduceMean") \ .dtype_format(DataType.U8_Default, DataType.U8_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \ .get_op_info() diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 1e3254662..62d69c242 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -654,6 +654,7 @@ class Conv2D(PrimitiveWithInfer): self.add_prim_attr('data_format', "NCHW") self.out_channel = validator.check_integer('out_channel', out_channel, 0, Rel.GT, self.name) self.group = validator.check_integer('group', group, 0, Rel.GT, self.name) + self.add_prim_attr('offset_a', 0) def infer_shape(self, x_shape, w_shape): validator.check_integer("weight rank", len(w_shape), 4, Rel.EQ, self.name) -- GitLab