提交 a838c9bd 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!5685 update run for br: master

Merge pull request !5685 from guozhijian/udpate_run_from_c75b100_to_c75b150_master
graphengine @ 2dbfefcd
Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53
Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2
......@@ -17,8 +17,6 @@ import json
import os
import sys
from te.platform.cce_conf import te_set_version
from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
from te.platform.fusion_util import fusion_op
from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version
......@@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build = "compile"
op_pre_build = "pre_build"
fusion_pattern_start_flag = "fusion_pattern_start"
fusion_pattern_end_flag = "fusion_pattern_end"
......@@ -83,19 +80,7 @@ def build_op(build_type, json_str):
else:
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
# get function
if build_type == op_pre_build:
# set op parameter
op_build_cfg_dis()
set_current_op_func_name(op_name)
set_current_op_name(kernel_name)
init_op_pattern()
set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
set_op_build_type('prebuild')
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
py_fn_name = op_name
elif build_type == op_build:
if build_type == op_build:
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
......@@ -106,13 +91,6 @@ def build_op(build_type, json_str):
if op_func is None:
raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))
# pre build
if build_type == op_pre_build:
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
# disable only pattern configuration
op_build_cfg_en()
return get_op_pattern()
# call function
if kernel_name[0:19] == "bounding_box_encode":
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
......@@ -120,8 +98,6 @@ def build_op(build_type, json_str):
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
except Exception as e:
if build_type == op_pre_build:
op_build_cfg_en()
raise RuntimeError(e)
......@@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
Exception: If specific keyword is not found.
"""
args = json.loads(json_str)
te_set_version(ddk_version)
if 'fusion_op' not in args or not args['fusion_op']:
raise ValueError("Json string Errors, key:fusion_op not found.")
if 'prebuild_ops' not in args or not args['prebuild_ops']:
raise ValueError("Json string Errors, key:prebuild_ops not found.")
pre_build_op_list = args['prebuild_ops']
for op in pre_build_op_list:
build_op(op_pre_build, json.dumps(op))
fusion_op_arg = args['fusion_op']
return fusion_op(json.dumps(fusion_op_arg))
......@@ -159,8 +130,6 @@ def compile_with_json(json_str):
json_info = json.loads(json_str)
if "fusion_op" in json_info:
ret = compile_fusion_op(json_str)
elif "compile_type" in json_info:
ret = build_op(op_pre_build, json_str)
else:
ret = build_op(op_build, json_str)
return ret
......
......@@ -20,6 +20,8 @@
#include <vector>
#include <memory>
#include <algorithm>
#include <map>
#include <climits>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
......@@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
dim->set_size((::google::protobuf::int64)item);
}
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
node_inputs->set_tensor_type(input_data_type);
node_inputs->set_mem_device("HBM");
}
}
......@@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
node_outputs->set_tensor_type(output_data_type);
node_outputs->set_mem_device("HBM");
}
}
......@@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
return true;
}
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
if (!anf_node->isa<CNode>()) {
return true;
}
if (!AnfAlgo::IsDynamicShape(anf_node)) {
return true;
}
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
uint64_t ext_info_head_len = kExtInfoHeadSize;
std::string ext_info;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
// 1.addr:unknown shape type
uint64_t ext_info_len = ext_info.size();
ext_info_len += ext_info_head_len + sizeof(int32_t);
// 2.addr:input ShapeAndType
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
// 3.addr:output ShapeAndType
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
uint64_t ext_info_offset = ext_info.size();
ext_info.resize(ext_info_len, 0);
char *ext_info_buf = ext_info.data();
// deal1: unknown shape type
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
info->infoLen = sizeof(int32_t);
ext_info_offset += ext_info_head_len;
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
*shape_type = unknown_shape_type;
ext_info_offset += info->infoLen;
// deal2:input ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
info->infoLen = input_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len;
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t input_index = 0; input_index < input_num; input_index++) {
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<size_t> input_shape;
int32_t input_data_type;
if (input_type == kObjectTypeString) {
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input_node = cnode->inputs()[input_index + 1];
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_shape.push_back(1);
input_shape.push_back(value.size());
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
} else {
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
}
inputs[input_index].type = input_data_type;
size_t input_shape_index = 0;
for (; input_shape_index < input_shape.size(); input_shape_index++) {
inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);
}
if (input_shape.size() < kMaxShapeDims) {
inputs[input_index].dims[input_shape_index] = LLONG_MIN;
}
}
ext_info_offset += info->infoLen;
// deal3:output ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
info->infoLen = output_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len;
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t output_index = 0; output_index < output_num; output_index++) {
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
outputs[output_index].type = output_data_type;
size_t output_shape_index = 0;
for (; output_shape_index < output_shape.size(); output_shape_index++) {
outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);
}
if (output_shape_index < kMaxShapeDims) {
outputs[output_index].dims[output_shape_index] = LLONG_MIN;
}
}
// set ext info
kernel_mod_ptr->SetExtInfo(ext_info);
return true;
}
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
......@@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}
......
......@@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
ext_info_.clear();
}
void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
......@@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
......@@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
auto node_def_len = node_def_str_.length();
param_len += node_def_len;
param_len += sizeof(uint32_t);
AicpuParamHead aicpu_param_head;
aicpu_param_head.length = param_len;
aicpu_param_head.ioAddrNum = io_addrs_num;
if (ext_info_.empty()) {
MS_LOG(INFO) << "Static Shape Kernel";
aicpu_param_head.extInfoLength = 0;
aicpu_param_head.extInfoAddr = 0;
} else {
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
}
// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
args_.clear();
(void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
(void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
// TaskArgs append ioAddrs
if (io_addrs_size != 0) {
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
}
// size for node_def
args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
// When it's aicpu customized ops, taskArgs should append customized attr
if (node_def_len != 0) {
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
......@@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
node_name_ = kTopKV2;
}
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
AicpuTaskInfoPtr task_info_ptr =
make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
ext_info_, input_data_addrs, output_data_addrs, NeedDump());
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};
......
......@@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
void SetOutputList(const std::vector<int64_t> &outputList);
void SetAnfNode(const AnfNodePtr &anf_node);
void SetNodeDef(const std::string &nodeDef);
void SetExtInfo(const std::string &ext_info);
void SetNodeName(const std::string &node_name);
/**
......@@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
std::string node_def_str_;
std::string node_name_;
std::string node_so_;
std::string ext_info_;
std::vector<int64_t> inputList_;
std::vector<int64_t> outputList_;
AnfNodePtr anf_node_;
......
......@@ -21,7 +21,6 @@
#include <map>
#include <string>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
......@@ -50,6 +49,36 @@ struct AicpuParamHead {
uint64_t extInfoAddr; // extInfo address
} __attribute__((packed));
const uint32_t kExtInfoHeadSize = 8;
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
} __attribute__((packed));
// Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
struct ShapeAndType {
int32_t type;
int64_t dims[kMaxShapeDims];
} __attribute__((packed));
// Extend Info type for task
enum FWKTaskExtInfoType {
FWK_ADPT_EXT_SHAPE_TYPE = 0,
FWK_ADPT_EXT_INPUT_SHAPE,
FWK_ADPT_EXT_OUTPUT_SHAPE,
FWK_ADPT_EXT_INVALID
};
// for unknown shape op type
enum UnknowShapeOpType {
DEPEND_IN_SHAPE = 1, // op out shape get by input shape
DEPEND_CONST_VALUE = 2, // op out shape get by const op value
DEPEND_SHAPE_RANGE = 3, // op out shape get by range
DEPEND_COMPUTE = 4 // op out shape get by totally computing
};
class AicpuOpUtil {
public:
static int MsTypeToProtoType(TypeId ms_type);
......
......@@ -26,7 +26,7 @@ message AttrValue {
repeated int64 i = 3 [ packed = true ]; //"array(int)"
repeated float f = 4 [ packed = true ]; //"array(float)"
repeated bool b = 5 [ packed = true ]; //"array(bool)"
repeated DataType type = 6 [ packed = true ]; //"array(type)"
repeated int32 type = 6 [ packed = true ]; //"array(type)"
repeated TensorShape shape = 7; //"array(shape)"
repeated Tensor tensor = 8; //"array(tensor)"
}
......
......@@ -18,9 +18,16 @@ package mindspore;
import "attr.proto";
import "tensor.proto";
message DynamicIdx {
int32 idx = 1;
int32 num = 2;
}
message NodeDef {
string op = 2;
map<string, AttrValue> attrs = 3;
repeated Tensor inputs = 4;
repeated Tensor outputs = 5;
map<string, DynamicIdx> dym_inputs = 6;
map<string, DynamicIdx> dym_outputs = 7;
}
......@@ -26,9 +26,12 @@ message Tensor {
TensorShape tensor_shape = 1;
// tensor content data type
DataType tensor_type = 2;
int32 tensor_type = 2;
// tensor memory device
// data located memory device , "DDR" "HBM" OR "NONE"
string mem_device = 3;
string name = 4;
uint64 data_ptr = 5;
uint64 data_size = 6;
}
......@@ -31,5 +31,5 @@ message TensorShape {
bool unknown_rank = 3;
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
string data_format = 4;
int32 data_format = 4;
};
......@@ -19,17 +19,30 @@ option cc_enable_arenas = true;
package mindspore;
enum DataType {
MS_UNKNOWN = 0;
MS_BOOL = 1;
MS_FLOAT32 = 0;
MS_FLOAT16 = 1;
MS_INT8 = 2;
MS_UINT8 = 3;
MS_INT16 = 4;
MS_UINT16 = 5;
MS_INT32 = 6;
MS_UINT32 = 7;
MS_INT64 = 8;
MS_UINT64 = 9;
MS_FLOAT16 = 10;
MS_FLOAT32 = 11;
MS_FLOAT64 = 12;
MS_INT32 = 3;
MS_UINT8 = 4;
MS_INT16 = 6;
MS_UINT16 = 7;
MS_UINT32 = 8;
MS_INT64 = 9;
MS_UINT64 = 10;
MS_FLOAT64 = 11;
MS_BOOL = 12;
MS_STRING = 13;
MS_DUAL_SUB_INT8 = 14;
MS_DUAL_SUB_UINT8 = 15;
MS_COMPLEX64 = 16;
MS_COMPLEX128 = 17;
MS_QINT8 = 18;
MS_QINT16 = 19;
MS_QINT32 = 20;
MS_QUINT8 = 21;
MS_QUINT16 = 22;
MS_RESOURCE = 23;
MS_STRING_REF = 24;
MS_DUAL = 25;
MS_UNKNOWN = 26;
}
......@@ -37,7 +37,6 @@ enum FusionType {
COMMREDUCE,
SEGMENT,
OPAQUE,
DYNAMIC,
UNKNOWN_FUSION_TYPE = -1,
};
enum OpPattern {
......@@ -80,8 +79,8 @@ class KernelPack {
bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
const std::string Serialize() const;
const FlexArray *const GetJson() const { return json_; }
const FlexArray *const GetKernel() const { return kernel_; }
const FlexArray *GetJson() const { return json_; }
const FlexArray *GetKernel() const { return kernel_; }
~KernelPack() {
if (json_) {
delete[] json_;
......
......@@ -19,53 +19,36 @@
#include <map>
#include <string>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
std::vector<nlohmann::json> *prebuild_op_list) {
MS_EXCEPTION_IF_NULL(prebuild_op_list);
TbeKernelJsonCreator creator(PREBUILD);
for (const auto &anf_node : compute_nodes) {
nlohmann::json prebuild;
if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
(*prebuild_op_list).push_back(prebuild);
}
return true;
}
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
std::map<int32_t, KernelModPtr> kernel_mod_ret;
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &fusion_scope_iter : fusion_scopes) {
auto scope_id = fusion_scope_iter.scope_id;
string fusion_kernel_name;
nlohmann::json fusion_op;
string fusion_kernel = "te_fusion";
if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
&fusion_kernel)) {
&fusion_kernel_name)) {
continue;
}
// gen kernel_name & check cache
std::string json_str = fusion_op.dump();
size_t hash_id = std::hash<std::string>()(json_str);
auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto json_name =
fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
fusion_op["fusion_op_name"] = json_name;
// gen json for prebuild
std::vector<nlohmann::json> prebuild_op_list;
if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
continue;
}
// get io size
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
......@@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
auto kernel_mod =
build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
if (kernel_mod != nullptr) {
kernel_mod_ret[scope_id] = kernel_mod;
kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
continue;
}
}
// fusion build
nlohmann::json fusion_json;
fusion_json["fusion_op"] = fusion_op;
fusion_json["prebuild_ops"] = prebuild_op_list;
auto task_id = build_manger->StartCompileOp(fusion_json);
TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
if (task_id < 0) {
MS_EXCEPTION(ArgumentError) << "start compile failed.";
}
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
fusion_scope_iter.scope_id);
}
int build_failed_num = 0;
......
......@@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#include <utility>
#include <vector>
#include <map>
#include "backend/kernel_compiler/kernel.h"
......@@ -25,11 +26,9 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct FusionScopeInfo {
FusionScopeInfo() {}
FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
const std::vector<AnfNodePtr> &out)
: scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
int32_t scope_id;
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
int32_t scope_id{};
std::vector<AnfNodePtr> input_nodes;
std::vector<AnfNodePtr> compute_nodes;
std::vector<AnfNodePtr> output_nodes;
......
......@@ -40,14 +40,13 @@ class OpLib {
private:
static bool RegOpFromLocalInfo();
static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
const std::shared_ptr<OpInfo> &op_info);
static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
size_t index);
static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);
......
......@@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
*func_name = name_tmp;
auto iter = tbe_func_adapter_map.find(*func_name);
if (iter != tbe_func_adapter_map.end()) {
MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
*func_name = iter->second;
}
}
......
......@@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace mindspore {
namespace kernel {
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
namespace tbe {
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);
......
......@@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
const std::unordered_map<std::string, FusionType> fusion_type_maps = {
{"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
{"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE},
{"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
};
TypeId DtypeToTypeId(const std::string &dtypes) {
......
......@@ -41,8 +41,8 @@ class TbeKernelBuild {
std::vector<size_t> *output_size_list);
// Ub Fuison
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
std::string *fusion_kernel);
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
std::string *fusion_kernel_name);
static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);
......@@ -61,9 +61,14 @@ class TbeKernelBuild {
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenPreDescJson(nlohmann::json *output_desc);
static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
std::string *fusion_kernel_name);
static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc,
FusionDataType fusion_data_type = kFusionNormal);
static void GenSuffixDescJson(nlohmann::json *output_desc);
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc);
static size_t GetIOSizeImpl(const nlohmann::json &desc);
......@@ -76,6 +81,7 @@ class TbeKernelBuild {
static bool IsDynamicInput(const CNodePtr &cnode);
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
static std::string GetRealOpType(const std::string &origin_type);
static std::string GetNodeFusionType(const CNodePtr &cnode);
};
class TbeKernelJsonCreator {
......@@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
~TbeKernelJsonCreator() = default;
bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
std::string json_name() { return json_name_; }
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
private:
bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *inputs_json);
bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *outputs_json);
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,
......
......@@ -33,42 +33,6 @@
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &anf_node : anf_nodes) {
// gen kernel json
MS_EXCEPTION_IF_NULL(anf_node);
nlohmann::json kernel_json;
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
kernel_json["compile_type"] = "pre_build";
// op build
auto task_id = build_manger->StartCompileOp(kernel_json);
build_manger->SavePreTaskInfo(task_id, anf_node);
}
while (!build_manger->IsAllPreTaskFinish()) {
int task_id = -1;
std::string task_result;
std::string pre_build_result;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
if (task_result != "Success") {
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
}
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
}
return true;
}
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
......@@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
return build_manger->GenSameOpKernelMod();
}
ParallelBuildManager::ParallelBuildManager() {}
ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
pre_task_map_[task_id] = anf_node;
}
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
const std::string &json_name, const std::vector<size_t> &input_size_list,
const std::vector<size_t> &output_size_list, int32_t scope_id) {
......@@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_[task_id] = task_info;
}
bool ParallelBuildManager::IsAllPreTaskFinish() const {
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
return pre_task_map_.empty();
}
bool ParallelBuildManager::IsAllTaskFinish() const {
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
return task_map_.empty();
}
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
auto task_iter = pre_task_map_.find(task_id);
if (task_iter == pre_task_map_.end()) {
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
}
auto node = task_iter->second;
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
std::string start_flag = "fusion_pattern_start";
std::string end_flag = "fusion_pattern_end";
int start = pre_build_result.find(start_flag);
int end = pre_build_result.find(end_flag);
if (start != -1 && end != -1 && end >= start) {
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
if (result == "") {
(void)pre_task_map_.erase(task_iter);
return;
}
transform(result.begin(), result.end(), result.begin(), ::toupper);
FusionType fusion_type = tbe::GetFusionType(result);
builder->SetFusionType(fusion_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
}
(void)pre_task_map_.erase(task_iter);
}
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
auto task_iter = task_map_.find(task_id);
if (task_iter == task_map_.end()) {
......
......@@ -28,7 +28,6 @@
namespace mindspore {
namespace kernel {
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
struct KernelBuildTaskInfo {
......@@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
class ParallelBuildManager {
public:
ParallelBuildManager();
ParallelBuildManager() = default;
~ParallelBuildManager();
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
int32_t scope_id = 0);
......@@ -54,10 +52,7 @@ class ParallelBuildManager {
bool SearchInCache(const std::string &json_name, const std::string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
AnfNode *node) const;
bool IsAllPreTaskFinish() const;
bool IsAllTaskFinish() const;
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
......
......@@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
return GetCNodeOutputPrecision(kernel_with_index.first);
}
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
if (!node->isa<CNode>()) {
return false;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
if (!has_attr) {
return false;
}
return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
}
bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
if (node->inputs().empty()) {
......
......@@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm {
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
static bool IsDynamicShape(const AnfNodePtr &node);
static bool IsCondControlKernel(const CNodePtr &node);
static bool IsIndependentNode(const CNodePtr &node);
};
......
......@@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() {
}
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
device::ascend::KernelPreBuild(kernel_graph.get());
MS_LOG(INFO) << "HardwareOptimize start!";
opt::AscendBackendOptimization(kernel_graph);
opt::AscendGraphKernelCommonProcess(kernel_graph);
......
......@@ -19,7 +19,8 @@
#include <vector>
#include <string>
#include <memory>
#include <set>
#include <map>
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/kernel_info.h"
#include "backend/kernel_compiler/kernel.h"
......@@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
return kernel_mod_ptr;
}
static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!AnfAlgo::IsRealKernel(anf_node)) {
continue;
}
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
switch (kernel_type) {
case KernelType::TBE_KERNEL: {
if (AnfAlgo::GetKernelMod(anf_node) == nullptr &&
AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) {
tbe_nodes.push_back(anf_node);
}
break;
}
default: {
break;
}
}
}
bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
return ret;
}
static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
......@@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
return !(workspace_indexs.empty() && output_indexs.empty());
}
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
return ret;
}
bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
TbeUtils::LoadCache();
......
......@@ -22,10 +22,6 @@
namespace mindspore {
namespace device {
namespace ascend {
/**
* @brief kernel pre build for ascend.
*/
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
/**
* @brief kernel build for ascend.
*/
......
......@@ -32,6 +32,7 @@ namespace mindspore {
// op name. Op which not exists in operator/ops.h, so define it's name here
constexpr auto kFour2FiveOpName = "Four2Five";
constexpr auto kFive2FourOpName = "Five2Four";
constexpr auto kConv2DOpName = "Conv2D";
constexpr auto kConvBN1OpName = "ConvBN1";
constexpr auto kBN2AddReluOpName = "BN2AddRelu";
constexpr auto kBN2ReLUOpName = "BN2Relu";
......@@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size";
constexpr auto kAttrNumSegments = "num_segments";
constexpr auto kAttrBegin = "begin";
constexpr auto kAttrSize = "size";
constexpr auto kAttrIsDynamicShape = "is_dynamic_shape";
// attr value
constexpr auto kValueTargetSwitch = "target_switch";
......
......@@ -13,6 +13,7 @@
# limitations under the License.
"""aicpu ops"""
from .unique import _unique_aicpu
from .init_data_set_queue import _init_data_set_queue_aicpu
from .embedding_lookup import _embedding_lookup_aicpu
from .padding import _padding_aicpu
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Unique op"""
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
unique_op_info = AiCPURegOp("Unique") \
.fusion_type("OPAQUE") \
.input(0, "x", "required") \
.output(0, "y", "required") \
.output(1, "idx", "required") \
.dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
.dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \
.get_op_info()
@op_info_register(unique_op_info)
def _unique_aicpu():
"""Unique AiCPU register"""
return
......@@ -17,7 +17,7 @@
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
matmul_op_info = TBERegOp("MatMul") \
.fusion_type("ELEMWISE") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("matmul.so") \
.compute_cost(10) \
......
......@@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg
from .sparse_ops import SparseToDense
__all__ = [
'Unique',
'ReverseSequence',
'EditDistance',
'CropAndResize',
......
......@@ -597,9 +597,9 @@ class Unique(Primitive):
containing indices of elements in the input coressponding to the output tensor.
Examples:
>>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32)
>>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32)
>>> out = P.Unique()(x)
(Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32))
(Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32))
"""
@prim_attr_register
def __init__(self):
......
......@@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {}
std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; }
TsdClient* TsdClient::GetInstance() {
static TsdClient instance;
return &instance;
}
/**
* @ingroup TsdClient
* @brief 构造函数
*/
TsdClient::TsdClient() { rankSize_ = 1; }
/**
* @ingroup TsdClient
* @brief 析构函数
*/
TsdClient::~TsdClient() = default;
/**
* @ingroup TsdClient
* @brief framework发送拉起hccp和computer process的命令
* @param [in] phyDeviceId : FMK传入物理ID
* @param [in] phyDeviceId : FMK传入rankSize
* @return TDT_OK:成功 或者其他错误码
*/
TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; }
/**
* @ingroup TsdClient
* @brief 通知TsdClient关闭相关资源
* @param 无
* @return TDT_OK:成功 或者其他错误码
*/
TDT_StatusT TsdClient::Close() { return TDT_OK; }
} // namespace tdt
#endif // TDT_MOCK_H
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册