提交 43c79eb8 编写于 作者: L liubuyu

mindspore path adjust

上级 0746bffb

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
## common setting
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR})
link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
......@@ -35,20 +36,20 @@ if(ENABLE_GPU)
include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS})
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/gpu/*.cc"
"device/gpu/*.cu"
"kernel/gpu/*.cu"
"kernel/akg/gpu/*.cc"
"kernel/akg/akg_kernel_build.cc"
"kernel/akg/akg_kernel_attrs_process.cc"
"runtime/device/gpu/*.cc"
"runtime/device/gpu/*.cu"
"backend/kernel_compiler/gpu/*.cu"
"backend/kernel_compiler/akg/gpu/*.cc"
"backend/kernel_compiler/akg/akg_kernel_build.cc"
"backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
)
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
"device/gpu/distribution/collective_wrapper.cc"
"device/gpu/distribution/mpi_wrapper.cc"
"device/gpu/distribution/nccl_wrapper.cc"
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
"runtime/device/gpu/distribution/collective_wrapper.cc"
"runtime/device/gpu/distribution/mpi_wrapper.cc"
"runtime/device/gpu/distribution/nccl_wrapper.cc"
)
set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
......@@ -101,15 +102,15 @@ if (ENABLE_DUMP_PROTO)
endif ()
if (ENABLE_D)
include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto")
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
......@@ -125,18 +126,32 @@ if (MINDSPORE_PROTO_LIST)
endif()
## make sub objects
set(SUB_COMP
transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
pybind_api pynative session utils vm base abstract
set(SUB_COMP
transform/graph_ir
transform/onnx
backend/optimizer
backend/kernel_compiler
backend/session
runtime/device
frontend/optimizer
frontend/parallel
frontend/operator
pipeline/jit
pipeline/pynative
common debug gvar predict pybind_api utils vm base abstract
)
foreach (_comp ${SUB_COMP})
add_subdirectory(${_comp})
if (TARGET _mindspore_${_comp}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
string(REPLACE "/" "_" sub ${_comp})
if (TARGET _mindspore_${sub}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
endif ()
endforeach ()
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
add_dependencies(_mindspore_ir_obj proto_input flat_input)
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
......@@ -207,8 +222,8 @@ endif()
# set c_expression building
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/init.cc")
set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/jit/init.cc")
MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
......@@ -265,8 +280,8 @@ if (ENABLE_CPU)
endif ()
if (ENABLE_MINDDATA)
add_subdirectory(mindrecord)
add_subdirectory(dataset)
add_subdirectory(minddata/mindrecord)
add_subdirectory(minddata/dataset)
endif ()
# build inference
......@@ -275,7 +290,7 @@ set(LOAD_ONNX_SRC
${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
)
add_library(inference SHARED
${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
${LOAD_ONNX_SRC}
)
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
......
file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"kernel_build_info.cc"
"kash/*.cc"
"common_utils.cc"
"oplib/*.cc"
)
if (ENABLE_D)
file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"kernel_query.cc"
"kernel_fusion.cc"
"akg/ascend/*.cc"
"akg/akg_kernel_build.cc"
"akg/akg_kernel_attrs_process.cc"
"akg/akg_kernel_metadata.cc"
"tbe/*.cc"
"aicpu/*.cc"
"rts/*.cc"
"hccl/*.cc"
)
add_compile_definitions(ENABLE_D)
endif ()
if (ENABLE_CPU)
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"cpu/*.cc"
)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc"
"cpu/ps/pull_kernel.cc"
"cpu/ps/embedding_look_up_ps_kernel.cc"
"cpu/ps/embedding_look_up_proxy_kernel.cc"
"cpu/ps/apply_momentum_ps_kernel.cc"
"cpu/ps/sparse_apply_adam_ps_kernel.cc"
"cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
if (NOT ENABLE_MPI)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/embedding_look_up_comm_grad_cpu_kernel.cc")
endif ()
endif ()
if (ENABLE_GPU)
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"gpu/*.cu"
"akg/gpu/*.cc"
"akg/akg_kernel_build.cc"
"akg/akg_kernel_attrs_process.cc"
)
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc")
list(REMOVE_ITEM GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc")
if (ENABLE_MPI)
include(ExternalProject)
file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/nccl/*.cc")
list(APPEND GPU_SRC_LIST ${GPU_NCCL_LIST})
endif ()
# add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST})
endif()
set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
#include <google/protobuf/text_format.h>
#include <fstream>
#include <utility>
#include <string>
#include <vector>
#include <memory>
#include <algorithm>
#include <map>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "proto/tensor.pb.h"
#include "proto/tensor_shape.pb.h"
#include "proto/attr.pb.h"
#include "proto/node_def.pb.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "common/utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/session/kernel_graph.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace kernel {
using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;
bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,
std::vector<size_t> *input_size_list) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(input_size_list);
for (size_t i = 0; i < input_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {
if (!anf_node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anf_node is not CNode.";
}
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (cnode->inputs().size() < (i + 1)) {
MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1;
return false;
}
auto input_node = cnode->inputs()[i + 1];
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<ValueNode>()) {
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_size_list->push_back(value.size());
}
} else {
auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
input_size_list->push_back(LongToSize(size_i));
}
}
return true;
}
bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {
return false;
}
kernel_mod_ptr->SetInputSizeList(input_size_list);
for (size_t i = 0; i < output_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
output_size_list.push_back(LongToSize(size_i));
}
kernel_mod_ptr->SetOutputSizeList(output_size_list);
return true;
}
void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {
MS_EXCEPTION_IF_NULL(node_attr);
MS_EXCEPTION_IF_NULL(value);
if (type == "int") {
auto attr_value = GetValue<int>(value);
(*node_attr)[attr_name].set_i(attr_value);
} else if (type == "str") {
auto attr_value = GetValue<std::string>(value);
(*node_attr)[attr_name].set_s(attr_value);
} else if (type == "bool") {
auto attr_value = GetValue<bool>(value);
(*node_attr)[attr_name].set_b(attr_value);
} else if (type == "float") {
auto attr_value = GetValue<float>(value);
(*node_attr)[attr_name].set_f(attr_value);
} else if (type == "listInt") {
std::vector<int> attr_value;
auto value_type = value->type();
MS_EXCEPTION_IF_NULL(value_type);
auto value_type_str = value_type->ToString();
if (value_type_str == "Int32") {
int data = GetValue<int>(value);
attr_value.push_back(data);
} else {
attr_value = GetValue<std::vector<int>>(value);
}
mindspore::AttrValue input_shape_attr;
mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
MS_EXCEPTION_IF_NULL(input_shape_attr_list);
for (const auto shape : attr_value) {
input_shape_attr_list->add_i(shape);
}
(*node_attr)[attr_name] = input_shape_attr;
} else {
MS_LOG(EXCEPTION) << "type: " << type << "not support";
}
}
void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(proto);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
if (op_name == kPrint) {
return;
}
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
MS_EXCEPTION_IF_NULL(op_info_ptr);
auto attrs_ptr = op_info_ptr->attrs_ptr();
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
for (const auto &attr_ptr : attrs_ptr) {
MS_EXCEPTION_IF_NULL(attr_ptr);
std::string attr_name = attr_ptr->name();
auto value = primitive->GetAttr(attr_name);
if (value != nullptr) {
if (attr_name == kQueueName || attr_name == kSharedName) {
attr_name = kChannelName;
} else if (attr_name == kSeed0) {
attr_name = kSeed;
} else if (attr_name == kSeed1) {
attr_name = kSeed2;
}
std::string type = attr_ptr->type();
ParseAttrValue(type, attr_name, value, node_attr);
}
}
MS_LOG(INFO) << "Set node attr end!";
}
void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(proto);
MS_EXCEPTION_IF_NULL(anf_node);
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
if (input_num == 0) {
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input.";
return;
}
for (size_t input_index = 0; input_index < input_num; input_index++) {
::mindspore::Tensor *node_inputs = proto->add_inputs();
MS_EXCEPTION_IF_NULL(node_inputs);
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<size_t> input_shape;
int32_t input_data_type;
if (input_type == kObjectTypeString) {
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input_node = cnode->inputs()[input_index + 1];
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_shape.push_back(1);
input_shape.push_back(value.size());
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
} else {
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
}
mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape();
for (auto item : input_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
dim->set_size((::google::protobuf::int64)item);
}
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
node_inputs->set_mem_device("HBM");
}
}
void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(proto);
MS_EXCEPTION_IF_NULL(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
if (output_num == 0) {
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";
return;
}
for (size_t output_index = 0; output_index < output_num; output_index++) {
::mindspore::Tensor *node_outputs = proto->add_outputs();
MS_EXCEPTION_IF_NULL(node_outputs);
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();
MS_EXCEPTION_IF_NULL(tensorShape);
for (auto item : output_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
MS_EXCEPTION_IF_NULL(dim);
dim->set_size((::google::protobuf::int64)item);
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
node_outputs->set_mem_device("HBM");
}
}
void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(proto);
MS_LOG(INFO) << "SetNodedefProto entry";
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
// set op name
proto->set_op(op_name);
// set inputs tensor
SetNodeInputs(anf_node, proto);
// set outputs tensor
SetNodeOutputs(anf_node, proto);
// set node attr
SetNodeAttr(anf_node, proto);
MS_LOG(INFO) << "SetNodedefProto end!";
}
bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
MS_EXCEPTION_IF_NULL(anf_node);
MS_LOG(INFO) << "CreateNodeDefBytes entry";
mindspore::NodeDef proto;
SetNodedefProto(anf_node, &proto);
std::string nodeDefStr;
if (!proto.SerializeToString(&nodeDefStr)) {
MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
return false;
}
kernel_mod_ptr->SetNodeDef(nodeDefStr);
MS_LOG(INFO) << "CreateNodeDefBytes end!";
return true;
}
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
kernel_mod_ptr->SetAnfNode(anf_node);
kernel_mod_ptr->SetNodeName(op_name);
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}
return kernel_mod_ptr;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
#include <memory>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
#include <memory>
#include <string>
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
MS_LOG(INFO) << "AicpuMetadataInfo.";
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
if (op_info_ptr == nullptr) {
MS_LOG(DEBUG) << "Aicpu does not have op [" << op_name << "]";
return;
}
// For compatibility with the current framework
if (op_name == kPrint || op_name == kGetNext || op_name == kPack) {
std::vector<std::string> inputs_format{};
std::vector<TypeId> inputs_type{};
if (op_name == kPrint || op_name == kPack) {
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
inputs_format.emplace_back(kOpFormat_DEFAULT);
inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
}
}
std::vector<std::string> outputs_format;
std::vector<TypeId> outputs_type;
for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
outputs_format.emplace_back(kOpFormat_DEFAULT);
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
}
auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
builder.SetInputsFormat(inputs_format);
builder.SetInputsDeviceType(inputs_type);
builder.SetOutputsFormat(outputs_format);
builder.SetOutputsDeviceType(outputs_type);
builder.SetProcessor(AICPU);
builder.SetKernelType(AICPU_KERNEL);
builder.SetFusionType(OPAQUE);
kernel_info_list->push_back(builder.Build());
return;
}
if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) {
MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed";
return;
}
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_
#include <string>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {
void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include <memory>
#include <vector>
#include <string>
#include <algorithm>
#include "runtime/mem.h"
#include "runtime/rt.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
#include "utils/convert_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "utils/context/ms_context.h"
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
namespace mindspore {
namespace kernel {
constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so";
AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
AicpuOpKernelMod::~AicpuOpKernelMod() {
args_.clear();
inputList_.clear();
outputList_.clear();
anf_node_ = nullptr;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
}
void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; }
void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
anf_node_ = anf_node;
}
void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
MS_LOG(INFO) << "CreateCpuKernelInfoOffline start";
node_so_ = AICPU_OPS_SO_NAME;
// InputOutputAddr
vector<void *> io_addrs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });
auto io_addrs_num = io_addrs.size();
// calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len
auto param_len = sizeof(AicpuParamHead);
// get input and output addrs size, no need to check overflow
auto io_addrs_size = io_addrs_num * sizeof(uint64_t);
// refresh paramLen, no need to check overflow
param_len += io_addrs_size;
auto node_def_len = node_def_str_.length();
param_len += node_def_len;
// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
args_.clear();
(void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
// TaskArgs append ioAddrs
if (io_addrs_size != 0) {
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
}
// When it's aicpu customized ops, taskArgs should append customized attr
if (node_def_len != 0) {
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
}
MS_LOG(INFO) << "CreateCpuKernelInfoOffline end";
}
bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
CreateCpuKernelInfo(inputs, outputs);
if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
<< ", args_size:" << args_.length();
if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
reinterpret_cast<const void *>(node_name_.c_str()), 1,
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
stream_ptr) != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Aicpu op launch failed!";
return false;
}
return true;
}
std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";
stream_id_ = stream_id;
node_so_ = AICPU_OPS_SO_NAME;
std::vector<void *> input_data_addrs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });
std::vector<void *> output_data_addrs;
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });
if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_
#include <vector>
#include <memory>
#include <string>
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
namespace mindspore {
namespace kernel {
class AicpuOpKernelMod : public AscendKernelMod {
public:
AicpuOpKernelMod();
~AicpuOpKernelMod() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
void SetInputList(const std::vector<int64_t> &inputList);
void SetOutputList(const std::vector<int64_t> &outputList);
void SetAnfNode(const AnfNodePtr &anf_node);
void SetNodeDef(const std::string &nodeDef);
void SetNodeName(const std::string &node_name);
/**
* @brief Build AICPU Engine kernel structure, and allocate device memory for offline task generate
* @return SUCCESS
* @return FAIL
*
*/
void CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void SetInputSizeList(const std::vector<size_t> &size_list);
void SetOutputSizeList(const std::vector<size_t> &size_list);
void SetWorkspaceSizeList(const std::vector<size_t> &size_list);
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
private:
std::string args_;
std::string node_def_str_;
std::string node_name_;
std::string node_so_;
std::vector<int64_t> inputList_;
std::vector<int64_t> outputList_;
AnfNodePtr anf_node_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
using AicpuOpKernelModPtr = std::shared_ptr<AicpuOpKernelMod>;
using AicputOpKernelModPtrList = std::vector<AicpuOpKernelModPtr>;
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include <vector>
#include <string>
#include "proto/types.pb.h"
#include "runtime/mem.h"
#include "runtime/rt.h"
#include "utils/convert_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
{mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN},
{mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL},
{mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32},
{mindspore::TypeId::kNumberTypeInt8, mindspore::DataType::MS_INT8},
{mindspore::TypeId::kNumberTypeInt16, mindspore::DataType::MS_INT16},
{mindspore::TypeId::kNumberTypeInt32, mindspore::DataType::MS_INT32},
{mindspore::TypeId::kNumberTypeInt64, mindspore::DataType::MS_INT64},
{mindspore::TypeId::kNumberTypeUInt, mindspore::DataType::MS_UINT32},
{mindspore::TypeId::kNumberTypeUInt8, mindspore::DataType::MS_UINT8},
{mindspore::TypeId::kNumberTypeUInt16, mindspore::DataType::MS_UINT16},
{mindspore::TypeId::kNumberTypeUInt32, mindspore::DataType::MS_UINT32},
{mindspore::TypeId::kNumberTypeUInt64, mindspore::DataType::MS_UINT64},
{mindspore::TypeId::kNumberTypeFloat16, mindspore::DataType::MS_FLOAT16},
{mindspore::TypeId::kNumberTypeFloat, mindspore::DataType::MS_FLOAT32},
{mindspore::TypeId::kNumberTypeFloat32, mindspore::DataType::MS_FLOAT32},
{mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64},
};
int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) {
auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type);
if (iter != MS_PROTO_DATA_TYPE_MAP.end()) {
return MS_PROTO_DATA_TYPE_MAP[ms_type];
} else {
MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type);
return -1;
}
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
#include <cstdint>
#include <vector>
#include <map>
#include <string>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
constexpr auto kInitData = "InitData";
constexpr auto kGetNext = "GetNext";
constexpr auto kPrint = "Print";
constexpr auto kPack = "Pack";
constexpr auto kOutputTypes = "output_types";
constexpr auto kOutputShapes = "output_shapes";
constexpr auto kChannelName = "channel_name";
constexpr auto kSharedName = "shared_name";
constexpr auto kShapes = "shapes";
constexpr auto kTypes = "types";
constexpr auto kQueueName = "queue_name";
constexpr auto kSeed = "seed";
constexpr auto kSeed0 = "Seed0";
constexpr auto kSeed1 = "Seed1";
constexpr auto kSeed2 = "seed2";
constexpr auto kTopK = "TopK";
constexpr auto kTopKV2 = "TopKV2";
struct AicpuParamHead {
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
} __attribute__((packed));
class AicpuOpUtil {
public:
static int MsTypeToProtoType(TypeId ms_type);
private:
// kernel id
static uint64_t KernelId_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
#include <algorithm>
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/optimizer/common/helper.h"
namespace mindspore {
namespace kernel {
void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// The x and output are akg op input and output param.
std::vector<std::string> input_names = {"x"};
std::vector<std::string> output_names = {"output"};
AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node);
TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
std::string dst_type;
if (dst_type_id == kFloat32->type_id()) {
dst_type = "float32";
} else if (dst_type_id == kFloat16->type_id()) {
dst_type = "float16";
}
AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
}
void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names = {"x"};
std::vector<std::string> output_names = {"output"};
AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node);
std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(anf_node, 0);
if (origin_shape.size() != kShape4dDims) {
MS_LOG(EXCEPTION) << "The dim of origin_shape is not equal to 4, but it's dim is " << origin_shape.size() << ".";
}
std::vector<int> shape_transform;
(void)std::transform(origin_shape.begin(), origin_shape.end(), std::back_inserter(shape_transform),
[](const int &origin_shape) { return static_cast<int>(origin_shape); });
AnfAlgo::SetNodeAttr("shape4d", MakeValue(shape_transform), anf_node);
AnfAlgo::SetNodeAttr("output_format", MakeValue(kOpFormat_NCHW), anf_node);
TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
std::string dst_type;
if (dst_type_id == kFloat32->type_id()) {
dst_type = "float32";
} else if (dst_type_id == kFloat16->type_id()) {
dst_type = "float16";
}
AnfAlgo::SetNodeAttr("dstType", MakeValue(dst_type), anf_node);
}
void SetAkgAttrsForCast(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// The x and output are akg op input and output param.
std::vector<std::string> input_names = {"x", "dst_type"};
std::vector<std::string> output_names = {"output"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
std::string dst_type;
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
if (output_type == kFloat32->type_id()) {
dst_type = "float32";
} else if (output_type == kFloat16->type_id()) {
dst_type = "float16";
} else if (output_type == kInt32->type_id()) {
dst_type = "int32";
} else {
MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString();
}
AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
}
void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names{"dy", "data", "mean"};
std::vector<std::string> output_names{"dgamma_red_hw", "dbeta_red_hw", "data_minus_mean"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
}
void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node) {
const size_t kBNGrad2InputSize = 5;
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names{"dgamma_red_hw", "dbeta_red_hw", "variance", "gamma"};
std::vector<std::string> output_names{"bn_scale", "bn_bias", "rs", "dgamma_dx", "dbeta_dx"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (cnode->inputs().size() < kBNGrad2InputSize) {
MS_LOG(EXCEPTION) << "The inputs size of BNGrad2 is less then " << kBNGrad2InputSize;
}
auto input1 = cnode->input(1);
MS_EXCEPTION_IF_NULL(input1);
auto tuple_getitem = input1->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(tuple_getitem);
if (tuple_getitem->inputs().size() < kTupleGetItemInputSize) {
MS_LOG(EXCEPTION) << "The inputs size of tuple_getitem is less then " << kTupleGetItemInputSize;
}
auto bn_grad1 = tuple_getitem->input(kRealInputNodeIndexInTupleGetItem);
std::vector<size_t> data_shape = AnfAlgo::GetInputDeviceShape(bn_grad1, 0);
AnfAlgo::SetNodeAttr(kAttrDataShape, MakeValue(opt::Convert2Int(data_shape)), anf_node);
}
void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names{"dy", "rs", "dgamma_dx", "dbeta_dx", "data_minus_mean"};
std::vector<std::string> output_names{"dx"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
}
void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// Set attr for fused_bn1
std::vector<std::string> fused_bn1_input_names{"data"};
std::vector<std::string> fused_bn1_output_names{"mean", "var_part"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn1_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn1_output_names), anf_node);
}
void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// Set attr for fused_bn2
std::vector<std::string> fused_bn2_input_names{"mean", "var_part", "running_mean", "running_var"};
std::vector<std::string> fused_bn2_output_names{"variance", "running_mean", "running_variance"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn2_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn2_output_names), anf_node);
}
void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// Set attr for fused_bn3
std::vector<std::string> fused_bn3_input_names{"data", "mean", "variance", "gamma", "beta"};
std::vector<std::string> fused_bn3_output_names{"y"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn3_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn3_output_names), anf_node);
}
void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> conv_bn1_output_names{"data", "var_part", "mean"};
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(conv_bn1_output_names), anf_node);
}
void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> bn2_add_relu_input_names{"data", "var_part", "mean", "other_branch_data",
"gamma", "beta", "running_mean", "running_var"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_add_relu_input_names), anf_node);
std::vector<std::string> bn2_add_relu_output_names{"output", "running_mean", "running_variance", "save_inv_variance"};
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_add_relu_output_names), anf_node);
}
void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> bn2_input_names{"data", "var_part", "mean", "gamma", "beta", "running_mean", "running_var"};
std::vector<std::string> bn2_output_names{"y", "running_mean", "running_variance", "save_inv_variance"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_output_names), anf_node);
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H
#define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H
#include <vector>
#include <memory>
#include <string>
#include <unordered_map>
#include "ir/anf.h"
#include "utils/utils.h"
#include "frontend/operator/ops.h"
namespace mindspore {
namespace kernel {
void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node);
void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node);
void SetAkgAttrsForCast(const AnfNodePtr &anf_node);
void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node);
void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node);
void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node);
void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node);
void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node);
void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node);
void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node);
void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node);
void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node);
const std::unordered_map<std::string, std::function<void(const AnfNodePtr &anf_node)>> kAkgKernelAttrsProcessMap = {
{kFour2FiveOpName, SetAkgAttrsForFour2Five},
{kFive2FourOpName, SetAkgAttrsForFive2Four},
{"Cast", SetAkgAttrsForCast},
{kBNGrad1OpName, SetAkgAttrsForBNGrad1},
{kBNGrad2OpName, SetAkgAttrsForBNGrad2},
{kBNGrad3OpName, SetAkgAttrsForBNGrad3},
{kFusedBN1OpName, SetAkgAttrsForFusedBN1},
{kFusedBN2OpName, SetAkgAttrsForFusedBN2},
{kFusedBN3OpName, SetAkgAttrsForFusedBN3},
{kConvBN1OpName, SetAkgAttrsForConvBN1},
{kBN2AddReluOpName, SetAkgAttrsForBN2AddRelu},
{kBN2ReLUOpName, SetAkgAttrsForBN2Relu},
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_
#include <unordered_map>
#include <string>
#include <vector>
#include <memory>
#include <map>
#include <utility>
#include "backend/kernel_compiler/kernel.h"
#include "ir/dtype.h"
#include <nlohmann/json.hpp>
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/oplib/oplib.h"
namespace mindspore {
namespace kernel {
class AkgKernelBuild {
public:
AkgKernelBuild() {
input_tensor_idx_ = {};
output_tensor_idx_ = 0;
}
~AkgKernelBuild() = default;
KernelPackPtr BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size,
std::vector<size_t> *const output_size);
static std::string GetProcessor(const AnfNodePtr &anf_node);
static std::string PyObjectToStr(PyObject *const PyObj);
protected:
bool CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json);
bool CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json);
bool CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name,
const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json);
KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node);
int GetOpCntInc();
size_t GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx);
size_t GetOutputTensorIdxInc();
bool GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name,
nlohmann::json *const node_json);
static int op_cnt_;
// lock for variable fusionOpCnt in singleton mode
static std::mutex op_cnt_mtx_;
std::string json_name_;
std::string json_info_;
std::unordered_map<AnfNodePtr, size_t> input_tensor_idx_;
size_t output_tensor_idx_;
};
bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size,
std::vector<size_t> *const output_size);
void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position,
nlohmann::json *const node_json);
std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag,
const std::pair<size_t, size_t> &position);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
#include <memory>
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace kernel {
void AkgMetadataInfo(const CNodePtr &kernel_node,
std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
for (size_t i = 0; i < support_devices.size(); i++) {
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG);
if (op_info_ptr == nullptr) {
continue;
}
if (!ParseMetadata(kernel_node, op_info_ptr, Processor(i), kernel_info_list)) {
MS_LOG(WARNING) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "] failed.";
} else {
MS_LOG(DEBUG) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "].";
break;
}
}
if (kernel_info_list->empty()) {
MS_LOG(WARNING) << "Akg dose not has metadata of op[" << op_name << "].";
}
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_
#include <string>
#include <vector>
#include <unordered_map>
#include <memory>
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {
void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include <Python.h>
#include "ir/dtype.h"
#include "ir/func_graph.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
constexpr int32_t PARALLEL_ARGS_SIZE = 3;
constexpr int32_t PROCESS_NUM = 16;
constexpr int32_t TIME_OUT = 300;
constexpr auto kOpDesc = "op_desc";
constexpr auto kShape = "shape";
constexpr auto kDataType = "data_type";
constexpr auto kInputDesc = "input_desc";
constexpr auto kOutputDesc = "output_desc";
constexpr auto kTensorName = "tensor_name";
constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel";
constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler";
namespace {
void UpdateTensorNameInJson(const std::vector<AnfNodePtr> &anf_nodes,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
for (auto const &anf_node : anf_nodes) {
std::vector<int> dyn_input_sizes;
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
}
bool is_dynamic_input = !dyn_input_sizes.empty();
size_t input_num = is_dynamic_input ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node);
size_t real_input_index = 0;
for (size_t i = 0; i < input_num; ++i) {
size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1;
for (size_t j = 0; j < input_tensor_num; ++j) {
auto tmp_input = GetKernelInput(anf_node, real_input_index);
std::string tensor_name = GetTensorName((*node_json_map)[anf_node], kInputDesc, std::make_pair(i, j));
if (node_json_map->find(tmp_input.first) != node_json_map->end()) {
std::string new_tensor_name =
GetTensorName((*node_json_map)[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second));
SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &((*node_json_map)[anf_node]));
MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of ["
<< anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output ["
<< new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "].";
} else {
MS_LOG(DEBUG) << "[" << real_input_index << "] input " << tensor_name << "] of ["
<< anf_node->fullname_with_scope() << "] is out input.";
}
real_input_index++;
}
}
}
}
nlohmann::json GetInputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
nlohmann::json inputs_json;
auto input_index = GetInputIndex(anf_nodes, input_list);
for (size_t i = 0; i < input_index.size(); ++i) {
auto tmp_input = input_index[i];
auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first);
std::string dtype = TypeId2String(type_id);
nlohmann::json input_desc_json;
input_desc_json[kTensorName] = GetTensorName((*node_json_map)[tmp_input.first], kInputDesc, tmp_input.second);
input_desc_json[kDataType] = dtype;
input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first);
inputs_json.emplace_back(std::vector<nlohmann::json>{input_desc_json});
}
return inputs_json;
}
nlohmann::json GetOutputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list, const nlohmann::json &inputs_json,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
nlohmann::json outputs_json;
auto output_index = GetOutputIndex(anf_nodes, input_list, output_list);
for (size_t i = 0; i < output_index.size(); ++i) {
auto tmp_output = output_index[i];
bool found = false;
nlohmann::json output_desc_json;
for (size_t input_i = 0; input_i < input_list.size(); ++input_i) {
if (tmp_output.first == input_list[input_i]) {
output_desc_json = inputs_json[input_i][0];
found = true;
break;
}
}
if (!found) {
auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second);
std::string dtype = TypeId2String(type_id);
output_desc_json[kTensorName] =
GetTensorName((*node_json_map)[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second));
output_desc_json[kDataType] = dtype;
auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second);
if (output_shape.empty()) {
output_shape.push_back(1);
}
output_desc_json[kShape] = output_shape;
}
outputs_json.emplace_back(output_desc_json);
}
return outputs_json;
}
std::pair<std::vector<std::string>, std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>>> PreProcessJsonForBuild(
const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) {
// Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess.
std::vector<std::string> jsons;
std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> repeat_nodes;
std::unordered_set<std::string> json_name_set;
for (const auto &[builder, anf_node] : build_args) {
MS_EXCEPTION_IF_NULL(anf_node);
auto json_name = builder.json_name();
MS_LOG(DEBUG) << "Akg start compile op: " << json_name;
auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node));
if (cached_kernel_pack != nullptr) {
MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack);
kernel_mod_ptr->SetInputSizeList(builder.input_size_list());
kernel_mod_ptr->SetOutputSizeList(builder.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
continue;
}
if (json_name_set.count(json_name) != 0) {
repeat_nodes.push_back({builder, anf_node});
continue;
}
json_name_set.insert(json_name);
auto node_json = builder.kernel_json();
kernel::SaveJsonInfo(json_name, node_json);
jsons.push_back(node_json);
}
return std::make_pair(jsons, repeat_nodes);
}
bool PostProcessAfterCompile(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args,
const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &repeat_nodes) {
for (const auto &[builder, anf_node] : build_args) {
auto json_name = builder.json_name();
auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node));
if (new_kernel_pack == nullptr) {
MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
return false;
}
auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(new_kernel_pack);
kernel_mod_ptr->SetInputSizeList(builder.input_size_list());
kernel_mod_ptr->SetOutputSizeList(builder.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!";
}
for (const auto &[builder, anf_node] : repeat_nodes) {
auto node_json = builder.kernel_json();
auto json_name = builder.json_name();
auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node));
if (cached_kernel_pack == nullptr) {
return false;
}
MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack);
kernel_mod_ptr->SetInputSizeList(builder.input_size_list());
kernel_mod_ptr->SetOutputSizeList(builder.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
}
return true;
}
} // namespace
bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]";
auto it = kAkgKernelAttrsProcessMap.find(op_name);
if (it != kAkgKernelAttrsProcessMap.end()) {
it->second(anf_node);
}
MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]";
nlohmann::json node_json;
if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) {
MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed.";
}
kernel_json_ = node_json.dump();
if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) {
MS_LOG(ERROR) << "Cal mem size failed.";
return false;
}
return true;
}
bool AkgAscendKernelBuilder::GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
for (auto const &anf_node : anf_nodes) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (!AnfAlgo::IsRealKernel(anf_node)) {
MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "].";
return false;
}
auto it = kAkgKernelAttrsProcessMap.find(op_name);
if (it != kAkgKernelAttrsProcessMap.end()) {
it->second(anf_node);
}
nlohmann::json node_json;
if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) {
MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed.";
return false;
}
// No need for composite op.
node_json.erase("id");
node_json.erase("op");
node_json.erase("composite");
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->GetAttr("fusion") != nullptr) {
node_json["fusion"] = primitive->GetAttr("fusion")->ToString();
}
(*node_json_map)[anf_node] = node_json;
}
return true;
}
bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes,
const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list) {
if (anf_nodes.empty() || input_list.empty()) {
MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size()
<< "].";
return false;
}
MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list ["
<< input_list.size() << "].";
std::map<AnfNodePtr, nlohmann::json> node_json_map;
if (!GenJsonAndPreprocess4Fused(anf_nodes, &node_json_map)) {
return false;
}
UpdateTensorNameInJson(anf_nodes, &node_json_map);
nlohmann::json fused_node_json;
std::vector<nlohmann::json> node_json_desc;
std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc),
[&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; });
fused_node_json[kOpDesc] = node_json_desc;
fused_node_json[kInputDesc] = GetInputsJson(anf_nodes, input_list, &node_json_map);
fused_node_json[kOutputDesc] =
GetOutputsJson(anf_nodes, input_list, output_list, fused_node_json[kInputDesc], &node_json_map);
size_t hash_id = std::hash<std::string>()(fused_node_json.dump());
json_name_ = "Fused_";
auto fg = anf_nodes[0]->func_graph();
MS_EXCEPTION_IF_NULL(fg);
auto attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL);
if (attr_val != nullptr) {
auto fg_attr = GetValue<std::string>(attr_val);
(void)json_name_.append(fg_attr).append("_");
}
(void)json_name_.append(std::to_string(hash_id));
fused_node_json["composite_graph"] = fg->ToString();
fused_node_json["op"] = json_name_;
fused_node_json["platform"] = "AKG";
fused_node_json["process"] = "aicore";
fused_node_json["composite"] = true;
kernel_json_ = fused_node_json.dump();
if (!GetIOSize(fused_node_json, &input_size_list_, &output_size_list_)) {
MS_LOG(ERROR) << "Cal mem size failed.";
return false;
}
return true;
}
void GenParallelCompileFuncArgs(const std::vector<std::string> &kernel_jsons, PyObject **p_args) {
MS_EXCEPTION_IF_NULL(p_args);
*p_args = PyTuple_New(PARALLEL_ARGS_SIZE);
PyObject *arg1 = PyList_New(kernel_jsons.size());
for (int i = 0; i < PyList_Size(arg1); ++i) {
PyList_SetItem(arg1, i, Py_BuildValue("s", kernel_jsons[i].c_str()));
}
PyObject *arg2 = Py_BuildValue("i", PROCESS_NUM);
PyObject *arg3 = Py_BuildValue("i", TIME_OUT);
(void)PyTuple_SetItem(*p_args, 0, arg1);
(void)PyTuple_SetItem(*p_args, 1, arg2);
(void)PyTuple_SetItem(*p_args, 2, arg3);
}
bool AkgOpParallelBuild(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) {
auto [jsons, repeat_nodes] = PreProcessJsonForBuild(build_args);
if (jsons.empty()) {
return true;
}
// Try to call python method to compile nodes parallely.
PyObject *p_module = nullptr;
PyObject *p_func = nullptr;
PyObject *p_arg = nullptr;
PyObject *p_res = nullptr;
p_module = PyImport_ImportModule(kMultiProcModule);
if (p_module == nullptr) {
MS_LOG(ERROR) << "Failed to import [" << kMultiProcModule << "].";
return false;
}
p_func = PyObject_GetAttrString(p_module, kCompileAkgKernelParallelFunc);
GenParallelCompileFuncArgs(jsons, &p_arg);
MS_LOG(DEBUG) << "Call function [" << kCompileAkgKernelParallelFunc << "], try to compile " << jsons.size()
<< " Akg kernels parallelly.";
p_res = PyEval_CallObject(p_func, p_arg);
if (p_res == nullptr) {
PyErr_Print();
MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n("
<< AkgKernelBuild::PyObjectToStr(p_arg) << ").";
return false;
}
if (PyObject_IsTrue(p_res) != 1) {
PyErr_Print();
MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n("
<< AkgKernelBuild::PyObjectToStr(p_arg) << ").";
return false;
}
if (!PostProcessAfterCompile(build_args, repeat_nodes)) {
return false;
}
return true;
}
bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> json_and_node;
for (const auto &anf_node : anf_nodes) {
MS_EXCEPTION_IF_NULL(anf_node);
AkgAscendKernelBuilder akg_cce_kernel_builder;
KernelPackPtr kernel_pack = nullptr;
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::IsGraphKernel(cnode)) {
auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode);
auto mng = func_graph->manager();
if (mng == nullptr) {
mng = Manage(func_graph, true);
func_graph->set_manager(mng);
}
MS_EXCEPTION_IF_NULL(func_graph);
std::vector<AnfNodePtr> node_list;
std::vector<AnfNodePtr> input_list;
std::vector<AnfNodePtr> output_list;
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
MS_LOG(INFO) << "Akg start compile composite op[" << op_name << "]";
GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list);
if (!akg_cce_kernel_builder.CollectFusedJson(node_list, input_list, output_list)) {
MS_EXCEPTION(UnknownError) << "Akg build failed composite op[" << op_name << "].";
}
} else {
if (!akg_cce_kernel_builder.CollectJson(anf_node)) {
MS_EXCEPTION(UnknownError) << "Akg build failed op[" << AnfAlgo::GetCNodeName(anf_node) << "].";
}
}
json_and_node.push_back({akg_cce_kernel_builder, anf_node});
}
if (json_and_node.empty()) {
MS_LOG(DEBUG) << "There is no kernel needed to be compiled.";
return true;
}
return AkgOpParallelBuild(json_and_node);
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_
#include <string>
#include <memory>
#include <vector>
#include <map>
#include "ir/anf.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
namespace mindspore {
namespace kernel {
class AkgAscendKernelBuilder : public AkgKernelBuild {
public:
AkgAscendKernelBuilder() = default;
~AkgAscendKernelBuilder() = default;
bool CollectJson(const AnfNodePtr &anf_node);
bool CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list);
std::string json_name() const { return json_name_; }
std::string kernel_json() const { return kernel_json_; }
const std::vector<size_t> &input_size_list() const { return input_size_list_; }
const std::vector<size_t> &output_size_list() const { return output_size_list_; }
private:
bool GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes,
std::map<AnfNodePtr, nlohmann::json> *node_json_map);
std::string kernel_json_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
};
bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
#include <algorithm>
#include <fstream>
#include <map>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <vector>
#include "nlohmann/json.hpp"
#include "runtime/rt.h"
#include "utils/log_adapter.h"
#include "utils/convert_utils.h"
#include "utils/context/ms_context.h"
namespace mindspore {
namespace kernel {
using std::fstream;
using std::map;
using std::mutex;
using std::string;
using TbeTaskInfoPtr = std::shared_ptr<ge::model_runner::TbeTaskInfo>;
using tbe::KernelManager;
constexpr uint32_t DEFAULT_BLOCK_DIM = 1;
/**
* @brief infotable contain func_stub\blockdim\kernel file buffer
*/
AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {}
void AkgKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
void AkgKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
void AkgKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
const std::vector<size_t> &AkgKernelMod::GetInputSizeList() const { return input_size_list_; }
const std::vector<size_t> &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; }
const std::vector<size_t> &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
if (kernel_pack_ == nullptr) {
MS_LOG(ERROR) << "kernel pack should not be nullptr.";
return false;
}
uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1.
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
if (func_stub == 0) {
MS_LOG(ERROR) << "GenFuncStub failed.";
return false;
}
// pack all addresses into a vector.
std::vector<void *> runtime_args;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args),
[](const AddressPtr &output) -> void * { return output->addr; });
rtL2Ctrl_t *l2ctrl = nullptr;
auto stream = reinterpret_cast<rtStream_t *>(stream_ptr);
if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast<void *>(func_stub), block_dim, runtime_args.data(),
SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) {
MS_LOG(ERROR) << "Call runtime rtKernelLaunch error.";
return false;
}
return true;
}
std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
if (kernel_pack_ == nullptr) {
MS_LOG(EXCEPTION) << "kernel pack should not be nullptr.";
}
std::vector<uint8_t> args;
const uint32_t args_size = 0;
std::vector<uint8_t> sm_desc;
void *binary = nullptr;
const uint32_t binary_size = 0;
std::vector<uint8_t> meta_data;
std::vector<void *> input_data_addrs;
std::vector<void *> output_data_addrs;
std::vector<void *> workspace_addrs;
// pack all addresses into a vector.
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });
uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1.
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
if (func_stub == 0) {
MS_LOG(EXCEPTION) << "GenFuncStub failed.";
}
std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_);
MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
return {task_info_ptr};
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_
#include <string>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
namespace mindspore {
namespace kernel {
class AkgKernelMod : public AscendKernelMod {
public:
explicit AkgKernelMod(const KernelPackPtr &kernel_pack);
~AkgKernelMod() final {}
void SetInputSizeList(const std::vector<size_t> &size_list);
void SetOutputSizeList(const std::vector<size_t> &size_list);
void SetWorkspaceSizeList(const std::vector<size_t> &size_list);
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
private:
KernelPackPtr kernel_pack_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
using AkgKernelModPtr = std::shared_ptr<AkgKernelMod>;
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
#include <vector>
#include <memory>
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include "common/utils.h"
namespace mindspore {
namespace kernel {
KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
AkgKernelBuild akg_kernel_build;
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
KernelPackPtr kernel_pack = akg_kernel_build.BuildByJson(anf_node, &input_size_list, &output_size_list);
MS_EXCEPTION_IF_NULL(kernel_pack);
auto kernel_mod_ptr = std::make_shared<GpuKernelMod>(kernel_pack);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
kernel_mod_ptr->SetInputSizeList(input_size_list);
kernel_mod_ptr->SetOutputSizeList(output_size_list);
return kernel_mod_ptr;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#include "backend/kernel_compiler/kernel.h"
#include "base/base.h"
namespace mindspore {
namespace kernel {
KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include <fstream>
#include <algorithm>
#include "nlohmann/json.hpp"
#include "common/utils.h"
namespace mindspore {
namespace kernel {
using std::fstream;
using std::string;
using std::vector;
GpuKernelManagerPtr GpuKernelMod::kernelmanager_ = std::make_shared<GpuKernelManager>();
GpuKernelManager::GpuKernelManager() {}
CUresult GpuKernelManager::GetFunction(const KernelPackPtr &kernel_pack, bool force_reload,
vector<uint32_t> *thread_info, CUfunction *func) {
if (kernel_pack->GetJson() == nullptr || kernel_pack->GetJson()->contents == nullptr ||
kernel_pack->GetKernel() == nullptr || kernel_pack->GetKernel()->contents == nullptr) {
MS_LOG(ERROR) << "GPU:Invalid kernel pack, json or kernel is nullptr.";
return CUDA_ERROR_INVALID_IMAGE;
}
auto js = nlohmann::json::parse(kernel_pack->GetJson()->contents,
kernel_pack->GetJson()->contents + kernel_pack->GetJson()->len);
string fn = js["kernelName"];
if (!force_reload) {
auto iter = infotable_.find(fn);
if (iter != infotable_.end()) {
auto kernelmeta = iter->second;
*thread_info = kernelmeta->thread_info_;
*func = kernelmeta->func_addr_;
return CUDA_SUCCESS;
}
}
thread_info->emplace_back(js["blockIdx.x"]);
thread_info->emplace_back(js["blockIdx.y"]);
thread_info->emplace_back(js["blockIdx.z"]);
thread_info->emplace_back(js["threadIdx.x"]);
thread_info->emplace_back(js["threadIdx.y"]);
thread_info->emplace_back(js["threadIdx.z"]);
CUmodule module;
CUresult result = cuModuleLoadData(&module, kernel_pack->GetKernel()->contents);
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "cuModuleLoadData failed.";
return result;
}
result = cuModuleGetFunction(func, module, fn.c_str());
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "cuModuleGetFunction failed.";
return result;
}
infotable_[fn] = std::make_shared<GpuKernelMeta>(*func, module, *thread_info);
return result;
}
GpuKernelMod::GpuKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {}
void GpuKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
void GpuKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
const std::vector<size_t> &GpuKernelMod::GetInputSizeList() const { return input_size_list_; }
const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; }
const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == 0) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
if (kernel_pack_ == nullptr) {
MS_LOG(ERROR) << "kernel pack should not be nullptr.";
return false;
}
vector<uint32_t> thread_info;
CUfunction kernel_addr;
CUresult result = kernelmanager_->GetFunction(kernel_pack_, false, &thread_info, &kernel_addr);
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "GetFunction failed.";
return false;
}
std::vector<void *> runtimeargs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
[](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
[](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); });
result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4],
thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr),
reinterpret_cast<void **>(&runtimeargs[0]), 0);
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "Launch Kernel failed.";
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
#include <cuda.h>
#include <string>
#include <vector>
#include <unordered_map>
#include <memory>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
struct GpuKernelMeta {
CUfunction func_addr_;
CUmodule module_;
std::vector<uint32_t> thread_info_;
GpuKernelMeta(CUfunction funcAddr, CUmodule module, const std::vector<uint32_t> &thread_info)
: func_addr_(funcAddr), module_(module), thread_info_(thread_info) {}
};
using GpuKernelMetaPtr = std::shared_ptr<GpuKernelMeta>;
class GpuKernelManager {
public:
GpuKernelManager();
virtual ~GpuKernelManager() {
for (auto iter = infotable_.begin(); iter != infotable_.end(); ++iter) {
CUresult ret = cuModuleUnload(iter->second->module_);
if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_DEINITIALIZED) {
MS_LOG(ERROR) << "Unload GPU Module failed.";
}
}
}
CUresult GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, std::vector<uint32_t> *thread_info,
CUfunction *func);
private:
std::unordered_map<std::string, GpuKernelMetaPtr> infotable_;
};
using GpuKernelManagerPtr = std::shared_ptr<GpuKernelManager>;
class GpuKernelMod : public KernelMod {
public:
explicit GpuKernelMod(const KernelPackPtr &kernel_pack);
virtual ~GpuKernelMod() {}
void SetInputSizeList(const std::vector<size_t> &size_list);
void SetOutputSizeList(const std::vector<size_t> &size_list);
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
static GpuKernelManagerPtr kernelmanager_;
private:
KernelPackPtr kernel_pack_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
using GpuKernelModPtr = std::shared_ptr<GpuKernelMod>;
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
#include <vector>
#include <memory>
#include "framework/ge_runtime/task_info.h"
#include "backend/kernel_compiler/kernel.h"
#ifdef ENABLE_DATA_DUMP
#include "debug/data_dump_parser.h"
#endif
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
namespace mindspore {
namespace kernel {
class AscendKernelMod : public KernelMod {
public:
virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() {
#ifdef ENABLE_DATA_DUMP
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
#else
return false;
#endif
}
protected:
uint32_t block_dim_{1};
uint32_t stream_id_{0};
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
此差异已折叠。
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_
#define MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_
#include <dirent.h>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <map>
#include <string>
#include <vector>
#include <utility>
#include <nlohmann/json.hpp>
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/oplib/opinfo.h"
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {
constexpr auto kCceKernelMeta = "./kernel_meta/";
constexpr auto kGpuKernelMeta = "./cuda_meta";
constexpr auto kProcessorAiCore = "aicore";
constexpr auto kProcessorAiCpu = "aicpu";
constexpr auto kProcessorCuda = "cuda";
constexpr auto kJsonSuffix = ".json";
constexpr auto kInfoSuffix = ".info";
constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600;
constexpr auto kAkgModule = "_akg";
constexpr auto kArgDataformat = "data_format";
const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"};
struct KernelMetaInfo {
uintptr_t func_stub_;
uint32_t block_dim_;
};
using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
class KernelMeta {
public:
KernelMeta() = default;
void Initialize();
void RemoveKernelCache();
std::string Search(const std::string &kernel_name) const;
bool Insert(const std::string &kernel_name, const std::string &kernel_json);
std::string GetKernelMetaPath() { return kernel_meta_path_; }
static KernelMeta *GetInstance() {
static KernelMeta kernel_meta;
return &kernel_meta;
}
~KernelMeta() = default;
private:
bool initialized_ = false;
std::string kernel_meta_path_;
std::unordered_map<std::string, std::string> kernel_meta_map_;
};
struct SparseGradient {
float *value_;
int *indices_;
size_t indices_size_;
};
struct MultiThreadComputeParams {
float *var_;
float *accum_;
float *linear_;
float *m_;
float *m_t_;
float *v_;
float lr_;
float l1_;
float l2_;
float lr_power_;
float beta1_;
float beta2_;
float epsilon_;
SparseGradient sparse_grad_;
size_t var_first_dim_size_;
size_t var_outer_dim_size_;
bool use_nesterov_;
};
using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams *param, size_t start, size_t end)>;
bool CheckCache(const std::string &kernel_name);
KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
TypeId DtypeToTypeId(const std::string &dtypes);
std::string Dtype2ShortType(const std::string &dtypes);
std::string TypeId2String(TypeId type_id);
size_t GetDtypeNbyte(const std::string &dtypes);
bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor,
std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list);
void SaveJsonInfo(const std::string &json_name, const std::string &info);
std::string GetProcessor(const AnfNodePtr &anf_node);
bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
int Sign(float x);
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim, bool use_multi_threads = true);
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
const std::vector<AnfNodePtr> &input_list);
std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list,
const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list);
void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list,
std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list);
void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list);
bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
bool IsWeightBoundary(const AnfNodePtr &node);
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
size_t total_compute_size);
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
std::vector<size_t> *slice_positions);
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/addn_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
input_num_ = AnfAlgo::GetInputTensorNum(kernel_node);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}
bool AddNCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t offset = 0;
for (size_t i = 0; i < output_shape_[0]; ++i) {
for (size_t j = 0; j < output_shape_[1]; ++j) {
for (size_t k = 0; k < output_shape_[2]; ++k) {
for (size_t m = 0; m < output_shape_[3]; ++m) {
float sum = 0;
for (size_t index = 0; index < input_num_; ++index) {
auto input_addr = reinterpret_cast<float *>(inputs[index]->addr);
sum += input_addr[offset];
}
output_addr[offset++] = sum;
}
}
}
}
return true;
}
void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but AddNCPUKernel olny support 4d or lower.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but AddNCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class AddNCPUKernel : public CPUKernel {
public:
AddNCPUKernel() : input_num_(0) {}
~AddNCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
void CheckParam(const CNodePtr &kernel_node);
size_t input_num_;
std::vector<size_t> output_shape_;
};
MS_REG_CPU_KERNEL(AddN,
KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
AddNCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"
#include "utils/log_adapter.h"
namespace mindspore {
namespace kernel {
namespace {
constexpr auto kRanksGroup = "group";
constexpr auto kAllGatherInputNum = 1;
} // namespace
void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != kAllGatherInputNum) {
MS_LOG(EXCEPTION) << "allgather input num:" << input_num;
}
auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup);
if (ranks_group != nullptr) {
ranks_group_ = GetValue<std::vector<int>>(ranks_group);
} else {
MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup;
}
}
bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto input_data_num = inputs[0]->size / sizeof(float);
auto mpi_instance = device::cpu::MPIAdapter::Instance();
MS_EXCEPTION_IF_NULL(mpi_instance);
return mpi_instance->AllGather(input_addr, output_addr, ranks_group_, input_data_num);
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class AllGatherCPUKernel : public CPUKernel {
public:
AllGatherCPUKernel() = default;
~AllGatherCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
std::vector<int> ranks_group_;
};
MS_REG_CPU_KERNEL(_HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
AllGatherCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {
namespace kernel {
void ApplyMomentumCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {}
bool ApplyMomentumCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /*outputs*/) {
if (inputs.size() < 5) {
MS_LOG(EXCEPTION) << "error input output size!";
}
if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) {
MS_LOG(EXCEPTION) << "error input data size!";
}
auto weight = reinterpret_cast<float *>(inputs[0]->addr);
auto accumulate = reinterpret_cast<float *>(inputs[1]->addr);
float learning_rate = reinterpret_cast<float *>(inputs[2]->addr)[0];
auto gradient = reinterpret_cast<float *>(inputs[3]->addr);
float moment = reinterpret_cast<float *>(inputs[4]->addr)[0];
size_t elem_num = inputs[0]->size / sizeof(float);
for (size_t i = 0; i < elem_num; ++i) {
accumulate[i] = accumulate[i] * moment + gradient[i];
weight[i] -= accumulate[i] * learning_rate;
}
return true;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class ApplyMomentumCPUKernel : public MKLCPUKernel {
public:
ApplyMomentumCPUKernel() = default;
~ApplyMomentumCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};
MS_REG_CPU_KERNEL(ApplyMomentum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ApplyMomentumCPUKernel);
MS_REG_CPU_KERNEL(ApplyMomentum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ApplyMomentumCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (shape.size() != 2) {
MS_LOG(EXCEPTION) << "argmax kernel dims invalid " << shape.size();
}
batch_size_ = shape[0];
class_num_ = shape[1];
int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
if (axis != -1 && axis != 1) {
MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis;
}
}
bool ArgmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspaces*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "input or output empty!";
}
size_t batch_float_size = batch_size_ * sizeof(float);
size_t batch_class_float_size = class_num_ * batch_float_size;
if (inputs[0]->size != batch_class_float_size || outputs[0]->size != batch_float_size) {
MS_LOG(EXCEPTION) << "invalid input or output data size!";
}
auto input = reinterpret_cast<float *>(inputs[0]->addr);
auto output = reinterpret_cast<int *>(outputs[0]->addr);
size_t row_start = 0;
for (size_t i = 0; i < batch_size_; ++i) {
size_t max_index = 0;
float max_value = input[row_start];
for (size_t j = 1; j < class_num_; ++j) {
size_t index = row_start + j;
if (input[index] > max_value) {
max_value = input[index];
max_index = j;
}
}
output[i] = SizeToInt(max_index);
row_start += class_num_;
}
return true;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class ArgmaxCPUKernel : public CPUKernel {
public:
ArgmaxCPUKernel() = default;
~ArgmaxCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
size_t class_num_{0};
size_t batch_size_{0};
};
MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
ArgmaxCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h"
namespace mindspore {
namespace kernel {
void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (input_shape_.size() == 4) {
data_shape_ = 4;
} else if (input_shape_.size() == 2) {
data_shape_ = 2;
} else {
MS_LOG(EXCEPTION) << "bias add input data format should be NCHW or NC";
}
if (input_shape_.size() != 2 && input_shape_.size() != 4) {
MS_LOG(EXCEPTION) << "bias add input shape nchw or nc";
}
if (bias_shape_.size() != 1) {
MS_LOG(EXCEPTION) << "bias shape invalid";
}
if (input_shape_[1] != bias_shape_[0]) {
MS_LOG(EXCEPTION) << "bias shape not match";
}
}
bool BiasAddCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 2 || outputs.size() != 1) {
MS_LOG(EXCEPTION) << "inputs outputs size not supoort";
}
auto src_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto bias_addr = reinterpret_cast<float *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
if (data_shape_ == 4) {
size_t h_size = input_shape_[3];
size_t c_size = input_shape_[2] * h_size;
size_t n_size = input_shape_[1] * c_size;
size_t hw_size = input_shape_[2] * input_shape_[3];
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
size_t c_offset = 0;
for (size_t c = 0; c < input_shape_[1]; ++c) {
for (size_t hw = 0; hw < hw_size; ++hw) {
size_t offset = n_offset + c_offset + hw;
output_addr[offset] = src_addr[offset] + bias_addr[c];
}
c_offset += c_size;
}
n_offset += n_size;
}
} else {
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
for (size_t c = 0; c < input_shape_[1]; ++c) {
output_addr[n_offset + c] = src_addr[n_offset + c] + bias_addr[c];
}
n_offset += input_shape_[1];
}
}
return true;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class BiasAddCPUKernel : public CPUKernel {
public:
BiasAddCPUKernel() = default;
~BiasAddCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
uint8_t data_shape_{0};
std::vector<size_t> input_shape_;
std::vector<size_t> bias_shape_;
};
MS_REG_CPU_KERNEL(
BiasAdd,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BiasAddCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h"
namespace mindspore {
namespace kernel {
void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (input_shape_.size() != 4 && input_shape_.size() != 2) {
MS_LOG(EXCEPTION) << "input data format not support";
}
}
bool BiasAddGradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 1 || outputs.size() != 1) {
MS_LOG(EXCEPTION) << "input output size not support";
}
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
if (input_shape_.size() == 4) {
size_t h_size = input_shape_[3];
size_t c_size = h_size * input_shape_[2];
size_t n_size = c_size * input_shape_[1];
size_t hw_size = input_shape_[2] * input_shape_[3];
size_t c_offset = 0;
for (size_t c = 0; c < input_shape_[1]; ++c) {
output_addr[c] = 0;
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
for (size_t hw = 0; hw < hw_size; ++hw) {
size_t offset = c_offset + n_offset + hw;
output_addr[c] += input_addr[offset];
}
n_offset += n_size;
}
c_offset += c_size;
}
} else if (input_shape_.size() == 2) {
for (size_t c = 0; c < input_shape_[1]; ++c) {
output_addr[c] = 0;
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
output_addr[c] += input_addr[c + n_offset];
n_offset += input_shape_[1];
}
}
}
return true;
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class BiasAddGradCPUKernel : public CPUKernel {
public:
BiasAddGradCPUKernel() = default;
~BiasAddGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
std::vector<size_t> input_shape_;
};
MS_REG_CPU_KERNEL(BiasAddGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BiasAddGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/concat_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_1_shape.size());
}
axis_ += 4 - input_1_shape.size();
auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
for (size_t i = 0; i < input_num; i++) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
CPUKernelUtils::ExpandDimsTo4(&input_shape);
input_shape_list_.push_back(input_shape);
}
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}
bool ConcatCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto buff_size = outputs[0]->size;
size_t dim0 = output_shape_[0];
size_t dim1 = output_shape_[1];
size_t dim2 = output_shape_[2];
if (axis_ == 3) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
for (size_t k = 0; k < dim2; ++k) {
CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size);
}
}
}
} else if (axis_ == 2) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size);
}
}
} else if (axis_ == 1) {
for (size_t i = 0; i < dim0; ++i) {
CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size);
}
} else if (axis_ == 0) {
CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size);
}
return true;
}
void ConcatCPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1,
size_t dim2, float **output_addr, size_t *buff_size) {
for (size_t i = 0; i < input_shape_list_.size(); ++i) {
auto input_i_shape = input_shape_list_[i];
auto input_i_addr = reinterpret_cast<float *>(inputs[i]->addr);
size_t num = CPUKernelUtils::GetElementNumOnAxis(input_i_shape, axis_);
num *= input_i_shape[axis_];
auto pos = CPUKernelUtils::CalcOffset(input_i_shape, dim0, dim1, dim2, 0);
auto ret = memcpy_s(*output_addr, *buff_size, input_i_addr + pos, num * sizeof(float));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy failed.";
}
*output_addr += num;
*buff_size -= num * sizeof(float);
}
}
void ConcatCPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but ConcatCPUKernel olny support 4d or lower.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ConcatCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class ConcatCPUKernel : public CPUKernel {
public:
ConcatCPUKernel() : axis_(0) {}
~ConcatCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
void CheckParam(const CNodePtr &kernel_node);
void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
float **output_addr, size_t *buff_size);
int axis_;
std::vector<std::vector<size_t>> input_shape_list_;
std::vector<size_t> output_shape_;
};
MS_REG_CPU_KERNEL(Concat,
KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ConcatCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册