提交 fc4bf192 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!5820 fix ub fusion

Merge pull request !5820 from jjfeing/r0.7_ub_fusion
......@@ -17,8 +17,6 @@ import json
import os
import sys
from te.platform.cce_conf import te_set_version
from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
from te.platform.fusion_util import fusion_op
from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version
......@@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build = "compile"
op_pre_build = "pre_build"
fusion_pattern_start_flag = "fusion_pattern_start"
fusion_pattern_end_flag = "fusion_pattern_end"
......@@ -83,19 +80,7 @@ def build_op(build_type, json_str):
else:
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
# get function
if build_type == op_pre_build:
# set op parameter
op_build_cfg_dis()
set_current_op_func_name(op_name)
set_current_op_name(kernel_name)
init_op_pattern()
set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
set_op_build_type('prebuild')
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
py_fn_name = op_name
elif build_type == op_build:
if build_type == op_build:
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
......@@ -106,13 +91,6 @@ def build_op(build_type, json_str):
if op_func is None:
raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))
# pre build
if build_type == op_pre_build:
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
# disable only pattern configuration
op_build_cfg_en()
return get_op_pattern()
# call function
if kernel_name[0:19] == "bounding_box_encode":
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
......@@ -120,8 +98,6 @@ def build_op(build_type, json_str):
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
except Exception as e:
if build_type == op_pre_build:
op_build_cfg_en()
raise RuntimeError(e)
......@@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
Exception: If specific keyword is not found.
"""
args = json.loads(json_str)
te_set_version(ddk_version)
if 'fusion_op' not in args or not args['fusion_op']:
raise ValueError("Json string Errors, key:fusion_op not found.")
if 'prebuild_ops' not in args or not args['prebuild_ops']:
raise ValueError("Json string Errors, key:prebuild_ops not found.")
pre_build_op_list = args['prebuild_ops']
for op in pre_build_op_list:
build_op(op_pre_build, json.dumps(op))
fusion_op_arg = args['fusion_op']
return fusion_op(json.dumps(fusion_op_arg))
......@@ -159,8 +130,6 @@ def compile_with_json(json_str):
json_info = json.loads(json_str)
if "fusion_op" in json_info:
ret = compile_fusion_op(json_str)
elif "compile_type" in json_info:
ret = build_op(op_pre_build, json_str)
else:
ret = build_op(op_build, json_str)
return ret
......
......@@ -37,7 +37,6 @@ enum FusionType {
COMMREDUCE,
SEGMENT,
OPAQUE,
DYNAMIC,
UNKNOWN_FUSION_TYPE = -1,
};
enum OpPattern {
......@@ -80,8 +79,8 @@ class KernelPack {
bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
const std::string Serialize() const;
const FlexArray *const GetJson() const { return json_; }
const FlexArray *const GetKernel() const { return kernel_; }
const FlexArray *GetJson() const { return json_; }
const FlexArray *GetKernel() const { return kernel_; }
~KernelPack() {
if (json_) {
delete[] json_;
......
......@@ -19,53 +19,36 @@
#include <map>
#include <string>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
std::vector<nlohmann::json> *prebuild_op_list) {
MS_EXCEPTION_IF_NULL(prebuild_op_list);
TbeKernelJsonCreator creator(PREBUILD);
for (const auto &anf_node : compute_nodes) {
nlohmann::json prebuild;
if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
(*prebuild_op_list).push_back(prebuild);
}
return true;
}
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
std::map<int32_t, KernelModPtr> kernel_mod_ret;
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &fusion_scope_iter : fusion_scopes) {
auto scope_id = fusion_scope_iter.scope_id;
string fusion_kernel_name;
nlohmann::json fusion_op;
string fusion_kernel = "te_fusion";
if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
&fusion_kernel)) {
&fusion_kernel_name)) {
continue;
}
// gen kernel_name & check cache
std::string json_str = fusion_op.dump();
size_t hash_id = std::hash<std::string>()(json_str);
auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->device_id();
auto json_name =
fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
fusion_op["fusion_op_name"] = json_name;
// gen json for prebuild
std::vector<nlohmann::json> prebuild_op_list;
if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
continue;
}
// get io size
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
......@@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
auto kernel_mod =
build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
if (kernel_mod != nullptr) {
kernel_mod_ret[scope_id] = kernel_mod;
kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
continue;
}
}
// fusion build
nlohmann::json fusion_json;
fusion_json["fusion_op"] = fusion_op;
fusion_json["prebuild_ops"] = prebuild_op_list;
auto task_id = build_manger->StartCompileOp(fusion_json);
TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
if (task_id < 0) {
MS_EXCEPTION(ArgumentError) << "start compile failed.";
}
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
fusion_scope_iter.scope_id);
}
int build_failed_num = 0;
......
......@@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#include <utility>
#include <vector>
#include <map>
#include "backend/kernel_compiler/kernel.h"
......@@ -25,11 +26,9 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct FusionScopeInfo {
FusionScopeInfo() {}
FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
const std::vector<AnfNodePtr> &out)
: scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
int32_t scope_id;
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
int32_t scope_id{};
std::vector<AnfNodePtr> input_nodes;
std::vector<AnfNodePtr> compute_nodes;
std::vector<AnfNodePtr> output_nodes;
......
......@@ -40,14 +40,13 @@ class OpLib {
private:
static bool RegOpFromLocalInfo();
static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
const std::shared_ptr<OpInfo> &op_info);
static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
size_t index);
static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);
......
......@@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
*func_name = name_tmp;
auto iter = tbe_func_adapter_map.find(*func_name);
if (iter != tbe_func_adapter_map.end()) {
MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
*func_name = iter->second;
}
}
......
......@@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace mindspore {
namespace kernel {
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
namespace tbe {
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);
......
......@@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
const std::unordered_map<std::string, FusionType> fusion_type_maps = {
{"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
{"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE},
{"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
};
TypeId DtypeToTypeId(const std::string &dtypes) {
......
......@@ -41,8 +41,8 @@ class TbeKernelBuild {
std::vector<size_t> *output_size_list);
// Ub Fuison
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
std::string *fusion_kernel);
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
std::string *fusion_kernel_name);
static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);
......@@ -61,9 +61,14 @@ class TbeKernelBuild {
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenPreDescJson(nlohmann::json *output_desc);
static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
std::string *fusion_kernel_name);
static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc,
FusionDataType fusion_data_type = kFusionNormal);
static void GenSuffixDescJson(nlohmann::json *output_desc);
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc);
static size_t GetIOSizeImpl(const nlohmann::json &desc);
......@@ -76,6 +81,7 @@ class TbeKernelBuild {
static bool IsDynamicInput(const CNodePtr &cnode);
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
static std::string GetRealOpType(const std::string &origin_type);
static std::string GetNodeFusionType(const CNodePtr &cnode);
};
class TbeKernelJsonCreator {
......@@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
~TbeKernelJsonCreator() = default;
bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
std::string json_name() { return json_name_; }
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
private:
bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *inputs_json);
bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *outputs_json);
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,
......
......@@ -33,42 +33,6 @@
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &anf_node : anf_nodes) {
// gen kernel json
MS_EXCEPTION_IF_NULL(anf_node);
nlohmann::json kernel_json;
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
kernel_json["compile_type"] = "pre_build";
// op build
auto task_id = build_manger->StartCompileOp(kernel_json);
build_manger->SavePreTaskInfo(task_id, anf_node);
}
while (!build_manger->IsAllPreTaskFinish()) {
int task_id = -1;
std::string task_result;
std::string pre_build_result;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
if (task_result != "Success") {
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
}
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
}
return true;
}
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
......@@ -123,15 +87,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
return build_manger->GenSameOpKernelMod();
}
ParallelBuildManager::ParallelBuildManager() {}
ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
pre_task_map_[task_id] = anf_node;
}
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
const std::string &json_name, const std::vector<size_t> &input_size_list,
const std::vector<size_t> &output_size_list, int32_t scope_id) {
......@@ -150,42 +107,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_[task_id] = task_info;
}
bool ParallelBuildManager::IsAllPreTaskFinish() const {
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
return pre_task_map_.empty();
}
bool ParallelBuildManager::IsAllTaskFinish() const {
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
return task_map_.empty();
}
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
auto task_iter = pre_task_map_.find(task_id);
if (task_iter == pre_task_map_.end()) {
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
}
auto node = task_iter->second;
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
std::string start_flag = "fusion_pattern_start";
std::string end_flag = "fusion_pattern_end";
int start = pre_build_result.find(start_flag);
int end = pre_build_result.find(end_flag);
if (start != -1 && end != -1 && end >= start) {
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
if (result == "") {
(void)pre_task_map_.erase(task_iter);
return;
}
transform(result.begin(), result.end(), result.begin(), ::toupper);
FusionType fusion_type = tbe::GetFusionType(result);
builder->SetFusionType(fusion_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
}
(void)pre_task_map_.erase(task_iter);
}
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
auto task_iter = task_map_.find(task_id);
if (task_iter == task_map_.end()) {
......
......@@ -28,7 +28,6 @@
namespace mindspore {
namespace kernel {
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
struct KernelBuildTaskInfo {
......@@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
class ParallelBuildManager {
public:
ParallelBuildManager();
ParallelBuildManager() = default;
~ParallelBuildManager();
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
int32_t scope_id = 0);
......@@ -54,10 +52,7 @@ class ParallelBuildManager {
bool SearchInCache(const std::string &json_name, const std::string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
AnfNode *node) const;
bool IsAllPreTaskFinish() const;
bool IsAllTaskFinish() const;
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
......
......@@ -474,7 +474,6 @@ void AscendSession::InitRuntimeResource() {
}
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
device::ascend::KernelPreBuild(kernel_graph.get());
MS_LOG(INFO) << "HardwareOptimize start!";
opt::AscendBackendOptimization(kernel_graph);
opt::AscendGraphKernelCommonProcess(kernel_graph);
......
......@@ -19,7 +19,8 @@
#include <vector>
#include <string>
#include <memory>
#include <set>
#include <map>
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/kernel_info.h"
#include "backend/kernel_compiler/kernel.h"
......@@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
return kernel_mod_ptr;
}
static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!AnfAlgo::IsRealKernel(anf_node)) {
continue;
}
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
switch (kernel_type) {
case KernelType::TBE_KERNEL: {
if (AnfAlgo::GetKernelMod(anf_node) == nullptr &&
AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) {
tbe_nodes.push_back(anf_node);
}
break;
}
default: {
break;
}
}
}
bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
return ret;
}
static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
......@@ -230,12 +205,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
return !(workspace_indexs.empty() && output_indexs.empty());
}
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
return ret;
}
bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
TbeUtils::LoadCache();
......
......@@ -22,10 +22,6 @@
namespace mindspore {
namespace device {
namespace ascend {
/**
* @brief kernel pre build for ascend.
*/
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
/**
* @brief kernel build for ascend.
*/
......
......@@ -32,6 +32,7 @@ namespace mindspore {
// op name. Op which not exists in operator/ops.h, so define it's name here
constexpr auto kFour2FiveOpName = "Four2Five";
constexpr auto kFive2FourOpName = "Five2Four";
constexpr auto kConv2DOpName = "Conv2D";
constexpr auto kConvBN1OpName = "ConvBN1";
constexpr auto kBN2AddReluOpName = "BN2AddRelu";
constexpr auto kBN2ReLUOpName = "BN2Relu";
......
......@@ -17,7 +17,7 @@
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
matmul_op_info = TBERegOp("MatMul") \
.fusion_type("ELEMWISE") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("matmul.so") \
.compute_cost(10) \
......
......@@ -367,8 +367,7 @@ def test_resnet_and_resnet_thor_imagenet_4p():
os.system("rm -rf " + str(i))
print("End training...")
assert acc > 0.15
# the original perf is: 20 in C75B100
assert cost < 22
assert cost < 20
# THOR
thor_acc = 0.0
......@@ -384,5 +383,4 @@ def test_resnet_and_resnet_thor_imagenet_4p():
os.system("rm -rf " + str(i))
print("End training...")
assert thor_acc > 0.22
# the original perf is: 21 in C75B100
assert thor_cost < 23
assert thor_cost < 21
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册