diff --git a/lite/backends/huawei_ascend_npu/device.cc b/lite/backends/huawei_ascend_npu/device.cc index c8dc3d1de46fe12c3cb41257f864bcb1ff82bd9a..61fa05b0201d5b3cd9b33308996fba6b3cd95111 100644 --- a/lite/backends/huawei_ascend_npu/device.cc +++ b/lite/backends/huawei_ascend_npu/device.cc @@ -67,6 +67,15 @@ bool Device::Build(std::vector& input_nodes, // NOLINT std::lock_guard lock(device_mutex_); // Convert the HiAI IR graph to the HiAI om model ge::Graph ir_graph("graph"); + // set input node attr index is node size > 1 + if (input_nodes.size() > 1) { + int idx = 0; + for (auto node : input_nodes) { + node.SetAttr("index", idx); + idx++; + } + } + VLOG(3) << "Getting input node size " << input_nodes.size(); ir_graph.SetInputs(input_nodes).SetOutputs(output_nodes); // Build IR model diff --git a/lite/backends/huawei_ascend_npu/model_client.cc b/lite/backends/huawei_ascend_npu/model_client.cc index 02a8014210b24f8ae143ee68341aec0281d5a570..96b5e208f290357db1484e8d8d6046590b9a88ff 100644 --- a/lite/backends/huawei_ascend_npu/model_client.cc +++ b/lite/backends/huawei_ascend_npu/model_client.cc @@ -24,50 +24,28 @@ bool AclModelClient::LoadFromMem(const void* data, uint32_t size) { return true; } - auto ret = aclmdlQuerySizeFromMem( - data, size, &model_memory_size_, &model_weight_size_); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] query model size from memory failed!"; - return false; - } - ret = aclrtMalloc( - &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model memory " - "failed, require size is " - << model_memory_size_; - return false; - } - ret = aclrtMalloc( - &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model weigth " - "failed, require size is " - << model_weight_size_; - return false; - } - ret = aclmdlLoadFromMemWithMem(data, - size, - &model_id_, - model_memory_ptr_, - model_memory_size_, - model_weight_ptr_, - model_weight_size_); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Load model from memory failed!"; - return false; - } + ACL_CALL(aclmdlQuerySizeFromMem( + data, size, &model_memory_size_, &model_weight_size_)); + ACL_CALL(aclrtMalloc( + &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CALL(aclrtMalloc( + &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CALL(aclmdlLoadFromMemWithMem(data, + size, + &model_id_, + model_memory_ptr_, + model_memory_size_, + model_weight_ptr_, + model_weight_size_)); + model_desc_ = aclmdlCreateDesc(); if (model_desc_ == nullptr) { LOG(WARNING) << "[HUAWEI_ASCEND_NPU] create model description failed!"; return false; } - ret = aclmdlGetDesc(model_desc_, model_id_); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] get model description failed!"; - return false; - } - VLOG(3) << "[HUAWEI_ASCEND_NPU] AclModelClient LoadFromMem success."; + ACL_CALL(aclmdlGetDesc(model_desc_, model_id_)); + + VLOG(3) << "[HUAWEI_ASCEND_NPU] Load model form memeory success."; load_flag_ = true; return true; } @@ -77,49 +55,28 @@ bool AclModelClient::LoadFromFile(const char* model_path) { LOG(WARNING) << "[HUAWEI_ASCEND_NPU] model is already loaded!"; return true; } - auto ret = - aclmdlQuerySize(model_path, &model_memory_size_, &model_weight_size_); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] query model size from file failed!"; - return false; - } - ret = aclrtMalloc( - &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model memory " - "failed, require size is " - << model_memory_size_; - return false; - } - ret = aclrtMalloc( - &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model weigth " - "failed, require size is " - << model_weight_size_; - return false; - } - ret = aclmdlLoadFromFileWithMem(model_path, - &model_id_, - model_memory_ptr_, - model_memory_size_, - model_weight_ptr_, - model_weight_size_); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Load model from file failed!"; - return false; - } + + ACL_CALL( + aclmdlQuerySize(model_path, &model_memory_size_, &model_weight_size_)); + ACL_CALL(aclrtMalloc( + &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CALL(aclrtMalloc( + &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CALL(aclmdlLoadFromFileWithMem(model_path, + &model_id_, + model_memory_ptr_, + model_memory_size_, + model_weight_ptr_, + model_weight_size_)); + model_desc_ = aclmdlCreateDesc(); if (model_desc_ == nullptr) { LOG(WARNING) << "[HUAWEI_ASCEND_NPU] create model description failed!"; return false; } - ret = aclmdlGetDesc(model_desc_, model_id_); - if (ret != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] get model description failed!"; - return false; - } - VLOG(3) << "[HUAWEI_ASCEND_NPU] Loading model file success:" << model_path; + ACL_CALL(aclmdlGetDesc(model_desc_, model_id_)); + + VLOG(3) << "[HUAWEI_ASCEND_NPU] Load model form file success: " << model_path; load_flag_ = true; return true; } @@ -132,33 +89,25 @@ bool AclModelClient::GetModelIOTensorDim( return false; } size_t input_num = aclmdlGetNumInputs(model_desc_); - VLOG(3) << "[HUAWEI_ASCEND_NPU] input numher is " << input_num; + VLOG(3) << "[HUAWEI_ASCEND_NPU] input number is " << input_num; for (size_t i = 0; i < input_num; i++) { VLOG(3) << "[HUAWEI_ASCEND_NPU] printing input [" << i << "] ...."; aclmdlIODims input_dim; - aclmdlGetInputDims(model_desc_, i, &input_dim); + ACL_CALL(aclmdlGetInputDims(model_desc_, i, &input_dim)); aclDataType data_type = aclmdlGetInputDataType(model_desc_, i); - VLOG(3) << "[HUAWEI_ASCEND_NPU] data_type of inputs[" << i << "] is " - << data_type; aclFormat data_format = aclmdlGetInputFormat(model_desc_, i); - VLOG(3) << "[HUAWEI_ASCEND_NPU] data_format of inputs[" << i << "] is " - << data_format; TensorDesc tensor_desc = TensorDesc(data_type, input_dim, data_format); input_tensor->push_back(tensor_desc); } size_t output_num = aclmdlGetNumOutputs(model_desc_); - VLOG(3) << "[HUAWEI_ASCEND_NPU] output numher is " << output_num; + VLOG(3) << "[HUAWEI_ASCEND_NPU] output number is " << output_num; for (size_t i = 0; i < output_num; i++) { VLOG(3) << "[HUAWEI_ASCEND_NPU] printing output [" << i << "] ...."; aclmdlIODims output_dim; - aclmdlGetOutputDims(model_desc_, i, &output_dim); + ACL_CALL(aclmdlGetOutputDims(model_desc_, i, &output_dim)); aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i); - VLOG(3) << "[HUAWEI_ASCEND_NPU] data_type of outputs[" << i << "] is " - << data_type; aclFormat data_format = aclmdlGetOutputFormat(model_desc_, i); - VLOG(3) << "[HUAWEI_ASCEND_NPU] data_format of outputs[" << i << "] is " - << data_format; TensorDesc tensor_desc = TensorDesc(data_type, output_dim, data_format); output_tensor->push_back(tensor_desc); } @@ -181,28 +130,16 @@ bool AclModelClient::GetTensorFromDataset( uint32_t device_size = aclGetDataBufferSize(buffer_device); void* tensor_data = nullptr; - aclError ret = aclrtMallocHost(&tensor_data, device_size); - if (ret != ACL_ERROR_NONE) { - LOG(ERROR) << "[HUAWEI_ASCEND_NPU] aclrtMallocHost failed, ret " << ret; - return false; - } - ret = aclrtMemcpy(tensor_data, - device_size, - device_data, - device_size, - ACL_MEMCPY_DEVICE_TO_HOST); - if (ret != ACL_ERROR_NONE) { - LOG(ERROR) << "[HUAWEI_ASCEND_NPU] aclrtMemcpy failed, ret " << ret; - return false; - } - if (output_tensor->at(i)->SetData(reinterpret_cast(tensor_data), - device_size) != ge::GRAPH_SUCCESS) { - LOG(ERROR) << "[HUAWEI_ASCEND_NPU] SetData to output tensor failed"; - return false; - } - } - VLOG(3) - << "[HUAWEI_ASCEND_NPU] Get output tensor from output dataset succeed."; + ACL_CALL(aclrtMallocHost(&tensor_data, device_size)); + ACL_CALL(aclrtMemcpy(tensor_data, + device_size, + device_data, + device_size, + ACL_MEMCPY_DEVICE_TO_HOST)); + ATC_CALL(output_tensor->at(i)->SetData( + reinterpret_cast(tensor_data), device_size)); + } + VLOG(3) << "[HUAWEI_ASCEND_NPU] Get output tensor from dataset succeed."; return true; } @@ -218,37 +155,33 @@ void AclModelClient::CreateInputDataset( auto item = input_tensor->at(i); size_t buffer_size = item->GetSize(); void* buffer_device = nullptr; - aclError ret = - aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY); - if (ret != ACL_ERROR_NONE) { - LOG(ERROR) - << "[HUAWEI_ASCEND_NPU] input malloc device buffer failed. size is " - << buffer_size; - return; - } + + ACL_CALL( + aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY)); + void* buffer_data = reinterpret_cast(item->GetData()); - ret = aclrtMemcpy(buffer_device, - buffer_size, - buffer_data, - buffer_size, - ACL_MEMCPY_HOST_TO_DEVICE); + auto ret = aclrtMemcpy(buffer_device, + buffer_size, + buffer_data, + buffer_size, + ACL_MEMCPY_HOST_TO_DEVICE); if (ret != ACL_ERROR_NONE) { LOG(ERROR) << "[HUAWEI_ASCEND_NPU] input memcpy failed, buffer size is " << buffer_size; - aclrtFree(buffer_device); + ACL_CALL(aclrtFree(buffer_device)); return; } aclDataBuffer* data_buffer = aclCreateDataBuffer(buffer_device, buffer_size); if (data_buffer == nullptr) { LOG(ERROR) << "[HUAWEI_ASCEND_NPU] output aclCreateDataBuffer failed!"; - aclrtFree(buffer_device); + ACL_CALL(aclrtFree(buffer_device)); return; } if (aclmdlAddDatasetBuffer(input_dataset_, data_buffer) != ACL_ERROR_NONE) { LOG(ERROR) << "[HUAWEI_ASCEND_NPU] input aclmdlAddDatasetBuffer failed!"; - aclrtFree(buffer_device); - aclDestroyDataBuffer(data_buffer); + ACL_CALL(aclrtFree(buffer_device)); + ACL_CALL(aclDestroyDataBuffer(data_buffer)); return; } } @@ -266,26 +199,20 @@ void AclModelClient::CreateOutputDataset( for (size_t i = 0; i < output_size; i++) { size_t buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i); void* buffer_device = nullptr; - aclError ret = - aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY); - if (ret != ACL_ERROR_NONE) { - LOG(ERROR) - << "[HUAWEI_ASCEND_NPU] output malloc device buffer failed. size is " - << buffer_size; - return; - } + ACL_CALL( + aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY)); aclDataBuffer* data_buffer = aclCreateDataBuffer(buffer_device, buffer_size); if (data_buffer == nullptr) { LOG(ERROR) << "[HUAWEI_ASCEND_NPU] output aclCreateDataBuffer failed!"; - aclrtFree(buffer_device); + ACL_CALL(aclrtFree(buffer_device)); return; } if (aclmdlAddDatasetBuffer(output_dataset_, data_buffer) != ACL_ERROR_NONE) { LOG(ERROR) << "[HUAWEI_ASCEND_NPU] output aclmdlAddDatasetBuffer failed!"; - aclrtFree(buffer_device); - aclDestroyDataBuffer(data_buffer); + ACL_CALL(aclrtFree(buffer_device)); + ACL_CALL(aclDestroyDataBuffer(data_buffer)); return; } } @@ -332,21 +259,13 @@ void AclModelClient::DestroyDataset(aclmdlDataset** dataset) { aclDataBuffer* buffer_device = aclmdlGetDatasetBuffer(*dataset, i); void* device_data = aclGetDataBufferAddr(buffer_device); if (device_data == nullptr) { - LOG(WARNING) - << "[HUAWEI_ASCEND_NPU] failed to get data buffer of deivce data!"; + LOG(WARNING) << "[HUAWEI_ASCEND_NPU] failed to get data buffer!"; } else { - if (aclrtFree(device_data) != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] failed to free deivce data!"; - } - } - if (aclDestroyDataBuffer(buffer_device) != ACL_ERROR_NONE) { - LOG(WARNING) - << "[HUAWEI_ASCEND_NPU] failed to destroy deivce data buffer!"; + ACL_CALL(aclrtFree(device_data)); } + ACL_CALL(aclDestroyDataBuffer(buffer_device)); } - if (aclmdlDestroyDataset(*dataset) != ACL_ERROR_NONE) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] failed to destroy dataset!"; - } + ACL_CALL(aclmdlDestroyDataset(*dataset)); *dataset = nullptr; VLOG(3) << "[HUAWEI_ASCEND_NPU] Destroy dataset success."; } @@ -361,24 +280,20 @@ bool AclModelClient::UnloadModel() { DestroyDataset(&input_dataset_); DestroyDataset(&output_dataset_); - aclError ret = aclmdlUnload(model_id_); - if (ret != ACL_ERROR_NONE) { - LOG(ERROR) << "unload model failed, model id is " << model_id_; - return false; - } + ACL_CALL(aclmdlUnload(model_id_)); if (model_desc_ != nullptr) { - (void)aclmdlDestroyDesc(model_desc_); + ACL_CALL(aclmdlDestroyDesc(model_desc_)); model_desc_ = nullptr; } if (model_memory_ptr_ != nullptr) { - aclrtFree(model_memory_ptr_); + ACL_CALL(aclrtFree(model_memory_ptr_)); model_memory_ptr_ = nullptr; model_memory_size_ = 0; } if (model_weight_ptr_ != nullptr) { - aclrtFree(model_weight_ptr_); + ACL_CALL(aclrtFree(model_weight_ptr_)); model_weight_ptr_ = nullptr; model_weight_size_ = 0; } diff --git a/lite/backends/huawei_ascend_npu/model_client.h b/lite/backends/huawei_ascend_npu/model_client.h index 5cf19b26261a4ff0301b493c7edf2de6ce3f7ec1..85643c917de08ca1745b7649011a9ecdab6afeb0 100644 --- a/lite/backends/huawei_ascend_npu/model_client.h +++ b/lite/backends/huawei_ascend_npu/model_client.h @@ -35,32 +35,39 @@ class TensorDesc { ge_tensor_desc_ = new ge::TensorDesc( GetGeShape(dims), GetGeFormat(format), GetGeDataType(data_type)); CHECK(ge_tensor_desc_ != nullptr); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Getting data shape : " << repr(); } ~TensorDesc() { ge_tensor_desc_ = nullptr; } - int64_t GetNumber() const { - return ge_tensor_desc_->GetShape().GetDim(dim_order[0]); - } - int64_t GetChannel() const { - return ge_tensor_desc_->GetShape().GetDim(dim_order[1]); - } - int64_t GetHeight() const { - return ge_tensor_desc_->GetShape().GetDim(dim_order[2]); + + const ge::TensorDesc& GetGeTensorDesc() const { return *ge_tensor_desc_; } + + std::string repr() const { + STL::stringstream ss; + size_t dim_size = ge_tensor_desc_->GetShape().GetDimNum(); + if (dim_size == 0) { + ss << "{}"; + return ss.str(); + } + ss << "{"; + for (size_t i = 0; i < dim_size - 1; i++) { + ss << ge_tensor_desc_->GetShape().GetDim(i) << ","; + } + ss << ge_tensor_desc_->GetShape().GetDim(dim_size - 1); + ss << "}"; + return ss.str(); } - int64_t GetWidth() const { - return ge_tensor_desc_->GetShape().GetDim(dim_order[3]); + + int64_t production() const { + return ge_tensor_desc_->GetShape().GetShapeSize(); } - const ge::TensorDesc& GetGeTensorDesc() const { return *ge_tensor_desc_; } private: ge::Shape GetGeShape(aclmdlIODims dims) { - ge::Shape ge_shape({0, 0, 0, 0}); + auto shape_data = std::vector({1L, 1L, 1L, 1L}); + shape_data.resize(dims.dimCount); + ge::Shape ge_shape(shape_data); for (size_t i = 0; i < dims.dimCount; i++) { - if (ge_shape.SetDim(i, dims.dims[i]) != ge::GRAPH_SUCCESS) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] ge::Shape SetDim failed!"; - } else { - VLOG(3) << "[HUAWEI_ASCEND_NPU] Setting Ge Shape[" << i << "] = <" - << dims.dims[i] << ">"; - } + ATC_CALL(ge_shape.SetDim(i, dims.dims[i])); } return ge_shape; } @@ -80,6 +87,8 @@ class TensorDesc { LOG(FATAL) << "[HUAWEI_ASCEND_NPU] format not supported:" << format; break; } + VLOG(3) << "[HUAWEI_ASCEND_NPU] Getting data format : " + << CvtFormat(ge_format); return ge_format; } ge::DataType GetGeDataType(aclDataType data_type) { @@ -110,6 +119,8 @@ class TensorDesc { LOG(FATAL) << "[HUAWEI_ASCEND_NPU] data type not supported!"; break; } + VLOG(3) << "[HUAWEI_ASCEND_NPU] Getting data type : " + << CvtDataType(ge_datatype); return ge_datatype; } diff --git a/lite/backends/huawei_ascend_npu/utils.h b/lite/backends/huawei_ascend_npu/utils.h index e2bff3f87e0831f7b98be60ef3980f10da610f10..61df063fc418c3aa648dd029e18ab68627bbc937 100644 --- a/lite/backends/huawei_ascend_npu/utils.h +++ b/lite/backends/huawei_ascend_npu/utils.h @@ -13,6 +13,8 @@ // limitations under the License. #pragma once + +#include #include "acl/acl.h" #include "ge/ge_api_types.h" #include "ge/ge_ir_build.h" @@ -21,11 +23,16 @@ #include "graph/tensor.h" #include "graph/types.h" #include "lite/utils/cp_logging.h" +#include "lite/utils/replace_stl/stream.h" /* * This file contains some Huawei Ascend NPU specific uitls. */ +namespace paddle { +namespace lite { +namespace huawei_ascend_npu { + #define ACL_CALL(msg) \ CHECK_EQ(reinterpret_cast(msg), ACL_ERROR_NONE) \ << (msg) << " Huawei Ascend NPU ACL Error: " \ @@ -38,10 +45,6 @@ << ::paddle::lite::huawei_ascend_npu::AtcErrorInfo( \ reinterpret_cast(msg)) -namespace paddle { -namespace lite { -namespace huawei_ascend_npu { - static const char* AtcErrorInfo(uint32_t error) { switch (error) { #define LITE_ATC_ERROR_INFO(xx) \ @@ -123,6 +126,61 @@ static const char* AclErrorInfo(int error) { } } +static const std::string& CvtFormat(ge::Format format) { + static const int MAX_FORMAT_LENGTH = 25; + static const std::string format2string[] = { + "FORMAT_NCHW = 0", + "FORMAT_NHWC = 1", + "FORMAT_ND = 2", + "FORMAT_NC1HWC0 = 3", + "FORMAT_FRACTAL_Z = 4", + "FORMAT_NC1C0HWPAD = 5", + "FORMAT_NHWC1C0 = 6", + "FORMAT_FSR_NCHW = 7", + "FORMAT_FRACTAL_DECONV = 8", + "FORMAT_C1HWNC0 = 9", + "FORMAT_FRACTAL_DECONV_TRANSPOSE = 10", + "FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11", + "FORMAT_NC1HWC0_C04 = 12", + "FORMAT_FRACTAL_Z_C04 = 13", + "FORMAT_CHWN = 14", + "FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15", + "FORMAT_HWCN = 16", + "FORMAT_NC1KHKWHWC0 = 17", + "FORMAT_BN_WEIGHT = 18", + "FORMAT_FILTER_HWCK = 19", + "FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20", + "FORMAT_HASHTABLE_LOOKUP_KEYS = 21", + "FORMAT_HASHTABLE_LOOKUP_VALUE = 22", + "FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23", + "FORMAT_HASHTABLE_LOOKUP_HITS = 24"}; + auto x = static_cast(format); + CHECK_LT(x, MAX_FORMAT_LENGTH); + return format2string[x]; +} + +static const std::string& CvtDataType(ge::DataType data_type) { + static const int MAX_DATATYPE_LENGTH = 14; + static const std::string datatype2string[] = {"DT_FLOAT=0", + "DT_FLOAT16=1", + "DT_INT8=2", + "DT_INT32=3", + "DT_UINT8=4", + "Unknown=5", + "DT_INT16=6", + "DT_UINT16=7", + "DT_UINT32=8", + "DT_INT64=9", + "DT_UINT64=10", + "DT_DOUBLE=11", + "DT_BOOL=12", + "DT_STRING=13"}; + + auto x = static_cast(data_type); + CHECK_LT(x, MAX_DATATYPE_LENGTH); + return datatype2string[x]; +} + } // namespace huawei_ascend_npu } // namespace lite } // namespace paddle diff --git a/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt b/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt index 14f67ca1c70e21dd52b0bd1e7f34c890b5ce6f33..b0a50a2a549ab910c27d2c4148a0d323780d4cf5 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt +++ b/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt @@ -10,6 +10,7 @@ set(huawei_ascend_npu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bri lite_cc_library(subgraph_bridge_act_op_huawei_ascend_npu SRCS act_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_conv_op_huawei_ascend_npu SRCS conv_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_interpolate_op_huawei_ascend_npu SRCS interpolate_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_concat_op_huawei_ascend_npu SRCS concat_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) set(huawei_ascend_npu_subgraph_bridges subgraph_bridge_registry @@ -18,4 +19,5 @@ set(huawei_ascend_npu_subgraph_bridges subgraph_bridge_act_op_huawei_ascend_npu subgraph_bridge_conv_op_huawei_ascend_npu subgraph_bridge_interpolate_op_huawei_ascend_npu + subgraph_bridge_concat_op_huawei_ascend_npu CACHE INTERNAL "huawei_ascend_npu_subgraph_bridges") diff --git a/lite/kernels/huawei_ascend_npu/bridges/act_op.cc b/lite/kernels/huawei_ascend_npu/bridges/act_op.cc index 0293515356a13035fcdc4725c5de132ea06ceb67..6b149cb6eeaac6c032bc64a70130628b772ddad1 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/act_op.cc +++ b/lite/kernels/huawei_ascend_npu/bridges/act_op.cc @@ -49,6 +49,10 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto act_node = graph->template Add(out_name); auto act_op = act_node->template data(); act_op->set_input_x(*x_node->data()); + TENSOR_UPDATE_INPUT( + act_op, x, ge::FORMAT_NCHW, CvtPrecisionType(x_node->precision())); + TENSOR_UPDATE_OUTPUT( + act_op, y, ge::FORMAT_NCHW, CvtPrecisionType(act_node->precision())); return SUCCESS; } @@ -84,6 +88,10 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { // only for leaky_relu auto alpha = op_info->GetAttr("alpha"); act_op->set_attr_negative_slope(alpha); + TENSOR_UPDATE_INPUT( + act_op, x, ge::FORMAT_NCHW, CvtPrecisionType(x_node->precision())); + TENSOR_UPDATE_OUTPUT( + act_op, y, ge::FORMAT_NCHW, CvtPrecisionType(act_node->precision())); return SUCCESS; } diff --git a/lite/kernels/huawei_ascend_npu/bridges/concat_op.cc b/lite/kernels/huawei_ascend_npu/bridges/concat_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8a0e5bc4a90aa94262d5d552d99b77452b187ff7 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/concat_op.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[NPU] Converting " << op_type << " ... "; + + // Get input and output vars and op attributes + auto x_names = op_info->Input("X"); + auto axis = op_info->GetAttr("axis"); + auto out_name = op_info->Output("Out").front(); + auto num = x_names.size(); + + if (op_info->HasInput("AxisTensor")) { + // axis node + auto axis_name = op_info->Input("AxisTensor").front(); + auto axis_tensor = scope->FindMutableTensor(axis_name); + std::shared_ptr axis_node = nullptr; + if (graph->Has(axis_name)) { + axis_node = graph->Get(axis_name); + } else { + axis_node = graph->Add(axis_name, *axis_tensor); + } + // concat node + auto concat_node = graph->Add(out_name); + auto concat_op = concat_node->data(); + // set axis input + concat_op->set_input_concat_dim(*axis_node->data()); + TENSOR_UPDATE_INPUT(concat_op, + concat_dim, + ge::FORMAT_NCHW, + CvtPrecisionType(axis_node->precision())); + // set dynamic input + concat_op->set_attr_N(num); + concat_op->create_dynamic_input_x(num); + int idx = 0; + for (auto& x_name : x_names) { + auto x = scope->FindMutableTensor(x_name); + auto x_dims = x->dims(); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + concat_op->set_dynamic_input_x(idx, *x_node->data()); + TENSOR_UPDATE_DYNAMIC_INPUT(concat_op, + x, + idx, + ge::FORMAT_NCHW, + CvtPrecisionType(x_node->precision())); + idx++; + } + TENSOR_UPDATE_OUTPUT(concat_op, + y, + ge::FORMAT_NCHW, + CvtPrecisionType(concat_node->precision())); + } else { + auto concat_node = graph->Add(out_name); + auto concat_op = concat_node->data(); + concat_op->set_attr_concat_dim(axis); + concat_op->set_attr_N(num); + concat_op->create_dynamic_input_x(num); + int idx = 0; + for (auto& x_name : x_names) { + auto x = scope->FindMutableTensor(x_name); + auto x_dims = x->dims(); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + concat_op->set_dynamic_input_x(idx, *x_node->data()); + TENSOR_UPDATE_DYNAMIC_INPUT(concat_op, + x, + idx, + ge::FORMAT_NCHW, + CvtPrecisionType(x_node->precision())); + idx++; + } + TENSOR_UPDATE_OUTPUT(concat_op, + y, + ge::FORMAT_NCHW, + CvtPrecisionType(concat_node->precision())); + } + + return SUCCESS; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + concat, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::ConcatConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc b/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc index 075bbca8bd63a3c12d74b3624c6a1d51d7edfb76..e63a80de948c9d2cbae66dde67c57e505fc7f1c3 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc +++ b/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc @@ -35,7 +35,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto input_name = op_info->Input("Input").front(); auto input = scope->FindMutableTensor(input_name); auto input_dims = input->dims(); - ge::DataType ge_data_type = CvtPrecisionType(input->precision()); auto filter_name = op_info->Input("Filter").front(); auto filter = scope->FindMutableTensor(filter_name); @@ -99,6 +98,22 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { input_dims, filter_dims); + // Check Restrictions: HxW(input) == HxW(filter) if output feature h*w = 1*1 + if (output_dims[2] == 1 && output_dims[3] == 1) { + int input_h = input_dims[2] + paddings[0] + paddings[1]; + int input_w = input_dims[3] + paddings[2] + paddings[3]; + int filter_h = (filter_dims[2] - 1) * dilations[0] + 1; + int filter_w = (filter_dims[3] - 1) * dilations[1] + 1; + CHECK_EQ(input_h, filter_h) << "[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK " + "restriction: if output HxW = 1x1, then " + "input height after padding should equal to " + "filter height after dilation"; + CHECK_EQ(input_w, filter_w) << "[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK " + "restriction: if output HxW = 1x1, then " + "input width after padding should equal to " + "filter width after dilation"; + } + // Check depthwise mode, and decide whether use DepthwiseConv2D Op bool use_depthwise_conv = false; bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1); @@ -148,20 +163,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { } } - // Ascend must update convop desc, or IR model build will fail - ge::TensorDesc conv2d_input_desc_x( - ge::Shape(CvtShape(input_dims)), ge::FORMAT_NCHW, ge_data_type); - ge::TensorDesc conv2d_input_desc_filter( - ge::Shape(CvtShape(filter_dims)), ge::FORMAT_NCHW, ge_data_type); - ge::TensorDesc conv2d_input_desc_bias( - ge::Shape(bias_shape), ge::FORMAT_ND, ge_data_type); - ge::TensorDesc conv2d_output_desc_y( - ge::Shape(CvtShape(output_dims)), ge::FORMAT_NCHW, ge_data_type); - // Setting desc name - conv2d_input_desc_x.SetName("conv2d_input_desc_x"); - conv2d_input_desc_filter.SetName("conv2d_input_desc_filter"); - conv2d_input_desc_bias.SetName("conv2d_input_desc_bias"); - conv2d_output_desc_y.SetName("conv2d_output_desc_y"); // Conv node std::shared_ptr conv_node = nullptr; if (use_depthwise_conv && is_depthwise_mode) { @@ -177,12 +178,19 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { conv_op->set_attr_data_format("NCHW"); if (bias_node != nullptr && is_channel_bias) { conv_op->set_input_bias(*bias_node->data()); - conv_op->update_input_desc_bias(conv2d_input_desc_bias); + TENSOR_UPDATE_INPUT(conv_op, + bias, + ge::FORMAT_NCHW, + CvtPrecisionType(bias_node->precision())); } - // update tensor desc to conv2d - conv_op->update_input_desc_x(conv2d_input_desc_x); - conv_op->update_input_desc_filter(conv2d_input_desc_filter); - conv_op->update_output_desc_y(conv2d_output_desc_y); + TENSOR_UPDATE_INPUT( + conv_op, x, ge::FORMAT_NCHW, CvtPrecisionType(input_node->precision())); + TENSOR_UPDATE_INPUT(conv_op, + filter, + ge::FORMAT_NCHW, + CvtPrecisionType(filter_node->precision())); + TENSOR_UPDATE_OUTPUT( + conv_op, y, ge::FORMAT_NCHW, CvtPrecisionType(conv_node->precision())); } else { conv_node = graph->Add(output_name); auto conv_op = conv_node->data(); @@ -198,12 +206,19 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { conv_op->set_attr_data_format("NCHW"); if (bias_node != nullptr && is_channel_bias) { conv_op->set_input_bias(*bias_node->data()); - conv_op->update_input_desc_bias(conv2d_input_desc_bias); + TENSOR_UPDATE_INPUT(conv_op, + bias, + ge::FORMAT_NCHW, + CvtPrecisionType(bias_node->precision())); } - // update tensor desc to conv2d - conv_op->update_input_desc_x(conv2d_input_desc_x); - conv_op->update_input_desc_filter(conv2d_input_desc_filter); - conv_op->update_output_desc_y(conv2d_output_desc_y); + TENSOR_UPDATE_INPUT( + conv_op, x, ge::FORMAT_NCHW, CvtPrecisionType(input_node->precision())); + TENSOR_UPDATE_INPUT(conv_op, + filter, + ge::FORMAT_NCHW, + CvtPrecisionType(filter_node->precision())); + TENSOR_UPDATE_OUTPUT( + conv_op, y, ge::FORMAT_NCHW, CvtPrecisionType(conv_node->precision())); } // append Add node to support bias if (bias_node != nullptr && !is_channel_bias) { diff --git a/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc b/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc index 96303ffad66fc3d1a3aa39334c61ccece098e00f..c298ef50e89e82a01db38765eed68b9aa07aaec0 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc +++ b/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc @@ -53,9 +53,6 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { return FAILED; } - // get ge date type - ge::DataType ge_data_type = CvtPrecisionType(x->precision()); - // X node std::shared_ptr x_node = nullptr; if (graph->Has(x_name)) { @@ -100,10 +97,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { bilinear_interp_op->set_input_x(*x_node->data()); bilinear_interp_op->set_input_size(*out_size_node->data()); bilinear_interp_op->set_attr_align_corners(align_corners); - TENSOR_UPDATE_INPUT(bilinear_interp_op, x, ge::FORMAT_NCHW, ge_data_type); - TENSOR_UPDATE_INPUT( - bilinear_interp_op, size, ge::FORMAT_NCHW, ge_data_type); - TENSOR_UPDATE_OUTPUT(bilinear_interp_op, y, ge::FORMAT_NCHW, ge_data_type); + TENSOR_UPDATE_INPUT(bilinear_interp_op, + x, + ge::FORMAT_NCHW, + CvtPrecisionType(x_node->precision())); + TENSOR_UPDATE_INPUT(bilinear_interp_op, + size, + ge::FORMAT_NCHW, + CvtPrecisionType(out_size_node->precision())); + TENSOR_UPDATE_OUTPUT(bilinear_interp_op, + y, + ge::FORMAT_NCHW, + CvtPrecisionType(bilinear_interp_node->precision())); } else if (interp_method == "nearest") { auto nearest_interp_node = graph->Add(out_name); @@ -112,9 +117,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { nearest_interp_op->set_input_x(*x_node->data()); nearest_interp_op->set_input_size(*out_size_node->data()); nearest_interp_op->set_attr_align_corners(align_corners); - TENSOR_UPDATE_INPUT(nearest_interp_op, x, ge::FORMAT_NCHW, ge_data_type); - TENSOR_UPDATE_INPUT(nearest_interp_op, size, ge::FORMAT_NCHW, ge_data_type); - TENSOR_UPDATE_OUTPUT(nearest_interp_op, y, ge::FORMAT_NCHW, ge_data_type); + TENSOR_UPDATE_INPUT(nearest_interp_op, + x, + ge::FORMAT_NCHW, + CvtPrecisionType(x_node->precision())); + TENSOR_UPDATE_INPUT(nearest_interp_op, + size, + ge::FORMAT_NCHW, + CvtPrecisionType(out_size_node->precision())); + TENSOR_UPDATE_OUTPUT(nearest_interp_op, + y, + ge::FORMAT_NCHW, + CvtPrecisionType(nearest_interp_node->precision())); } else { LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Unsupported interpolate method: " << interp_method; diff --git a/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h b/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h index e94b7561b80545eb1e9c8de6c1c5d3c9a8d07783..f38f2d4bb4e760d5f96b4e6e16d0894d81259db4 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h +++ b/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h @@ -27,3 +27,4 @@ USE_SUBGRAPH_BRIDGE(conv2d, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(bilinear_interp, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(nearest_interp, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(concat, kHuaweiAscendNPU); diff --git a/lite/kernels/huawei_ascend_npu/bridges/utility.cc b/lite/kernels/huawei_ascend_npu/bridges/utility.cc index 2fdaa49b94f48ad12b58036cd89d2f545566cad6..bc5848e7ad1f1429808138bb0794859fd6ccfa5b 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/utility.cc +++ b/lite/kernels/huawei_ascend_npu/bridges/utility.cc @@ -156,61 +156,6 @@ int CvtActMode(std::string act_type) { return act_mode; } -const std::string& CvtFormat(ge::Format format) { - static const int MAX_FORMAT_LENGTH = 25; - static const std::string format2string[] = { - "FORMAT_NCHW = 0", - "FORMAT_NHWC = 1", - "FORMAT_ND = 2", - "FORMAT_NC1HWC0 = 3", - "FORMAT_FRACTAL_Z = 4", - "FORMAT_NC1C0HWPAD = 5", - "FORMAT_NHWC1C0 = 6", - "FORMAT_FSR_NCHW = 7", - "FORMAT_FRACTAL_DECONV = 8", - "FORMAT_C1HWNC0 = 9", - "FORMAT_FRACTAL_DECONV_TRANSPOSE = 10", - "FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11", - "FORMAT_NC1HWC0_C04 = 12", - "FORMAT_FRACTAL_Z_C04 = 13", - "FORMAT_CHWN = 14", - "FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15", - "FORMAT_HWCN = 16", - "FORMAT_NC1KHKWHWC0 = 17", - "FORMAT_BN_WEIGHT = 18", - "FORMAT_FILTER_HWCK = 19", - "FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20", - "FORMAT_HASHTABLE_LOOKUP_KEYS = 21", - "FORMAT_HASHTABLE_LOOKUP_VALUE = 22", - "FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23", - "FORMAT_HASHTABLE_LOOKUP_HITS = 24"}; - auto x = static_cast(format); - CHECK_LT(x, MAX_FORMAT_LENGTH); - return format2string[x]; -} - -const std::string& CvtDataType(ge::DataType data_type) { - static const int MAX_DATATYPE_LENGTH = 14; - static const std::string datatype2string[] = {"DT_FLOAT=0", - "DT_FLOAT16=1", - "DT_INT8=2", - "DT_INT32=3", - "DT_UINT8=4", - "Unknown=5", - "DT_INT16=6", - "DT_UINT16=7", - "DT_UINT32=8", - "DT_INT64=9", - "DT_UINT64=10", - "DT_DOUBLE=11", - "DT_BOOL=12", - "DT_STRING=13"}; - - auto x = static_cast(data_type); - CHECK_LT(x, MAX_DATATYPE_LENGTH); - return datatype2string[x]; -} - } // namespace huawei_ascend_npu } // namespace subgraph } // namespace lite diff --git a/lite/kernels/huawei_ascend_npu/bridges/utility.h b/lite/kernels/huawei_ascend_npu/bridges/utility.h index 4688e05920ee82034c336a45354160ad6a4af107..43405aa601131ccfe8980da36c4bc6b2ac5fa3dc 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/utility.h +++ b/lite/kernels/huawei_ascend_npu/bridges/utility.h @@ -36,6 +36,10 @@ namespace huawei_ascend_npu { #define TENSOR_UPDATE_OUTPUT(op, attr, format, dtype) \ ge::TensorDesc _##op##_output_desc_##attr(ge::Shape(), format, dtype); \ op->update_output_desc_##attr(_##op##_output_desc_##attr); +#define TENSOR_UPDATE_DYNAMIC_INPUT(op, attr, idx, format, dtype) \ + ge::TensorDesc _##op##_input_desc_##attr##_##idx( \ + ge::Shape(), format, dtype); \ + op->update_dynamic_input_desc_##attr(idx, _##op##_input_desc_##attr##_##idx); // Type/tensor converters for converting Paddle type/tensor to HiAI type/tensor bool HasInputArg(const OpInfo* op_info, @@ -57,9 +61,6 @@ ge::Tensor CvtTensor(const Tensor& in_tensor, int CvtActMode(std::string act_type); -const std::string& CvtFormat(ge::Format format); -const std::string& CvtDataType(ge::DataType data_type); - } // namespace huawei_ascend_npu } // namespace subgraph } // namespace lite diff --git a/lite/kernels/huawei_ascend_npu/subgraph_compute.cc b/lite/kernels/huawei_ascend_npu/subgraph_compute.cc index 6e71c71ca28b163f27a9783572d585466335ef87..f40cd8c8ef3b3f2a9b7159b6579f61a56cb82984 100644 --- a/lite/kernels/huawei_ascend_npu/subgraph_compute.cc +++ b/lite/kernels/huawei_ascend_npu/subgraph_compute.cc @@ -241,32 +241,18 @@ bool DeviceProgram::ShareBufferWithOriginTensors( VLOG(3) << "[HUAWEI_ASCEND_NPU] Inputs[" << i << "] name: " << input_names[i] << " origin dims:" << (*origin_itensors)[i]->dims().repr() - << " device dims: {" << device_idims_[i].GetNumber() << "," - << device_idims_[i].GetChannel() << "," - << device_idims_[i].GetHeight() << "," - << device_idims_[i].GetWidth() << "}"; + << " device dims:" << device_idims_[i].repr(); CHECK_EQ((*origin_itensors)[i]->dims().production(), - device_idims_[i].GetNumber() * device_idims_[i].GetChannel() * - device_idims_[i].GetHeight() * device_idims_[i].GetWidth()); + device_idims_[i].production()); // reset tensor desc - if ((*device_itensors)[i]->SetTensorDesc( - device_idims_[i].GetGeTensorDesc()) != ge::GRAPH_SUCCESS) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor " - "SetTensorDesc failed!"; - } else { - VLOG(3) << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetTensorDesc " - "success."; - } + ATC_CALL((*device_itensors)[i]->SetTensorDesc( + device_idims_[i].GetGeTensorDesc())); // copy data from origin to device - if ((*device_itensors)[i]->SetData( - reinterpret_cast((*origin_itensors)[i]->raw_data()), - (*origin_itensors)[i]->memory_size()) != ge::GRAPH_SUCCESS) { - LOG(WARNING) - << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetData failed!"; - } else { - VLOG(3) << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetData success."; - } + ATC_CALL((*device_itensors)[i]->SetData( + reinterpret_cast((*origin_itensors)[i]->raw_data()), + (*origin_itensors)[i]->memory_size())); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Init the input tensors for the device program " "and share their buffers with the origin input tensors"; @@ -285,26 +271,13 @@ bool DeviceProgram::ShareBufferWithOriginTensors( VLOG(3) << "[HUAWEI_ASCEND_NPU] Outputs[" << i << "] name: " << output_names[i] << " origin dims:" << (*origin_otensors)[i]->dims().repr() - << " device dims: {" << device_odims_[i].GetNumber() << "," - << device_odims_[i].GetChannel() << "," - << device_odims_[i].GetHeight() << "," - << device_odims_[i].GetWidth() << "}"; + << " device dims:" << device_odims_[i].repr(); CHECK_EQ((*origin_otensors)[i]->dims().production(), - device_odims_[i].GetNumber() * device_odims_[i].GetChannel() * - device_odims_[i].GetHeight() * device_odims_[i].GetWidth()); + device_odims_[i].production()); // reset tensor desc - if ((*device_otensors)[i]->SetTensorDesc( - device_odims_[i].GetGeTensorDesc()) != ge::GRAPH_SUCCESS) { - LOG(WARNING) << "[HUAWEI_ASCEND_NPU] ge::Tensor output tensor " - "SetTensorDesc failed!"; - } else { - VLOG(3) << "[HUAWEI_ASCEND_NPU] ge::Tensor output tensor SetTensorDesc " - "success."; - } - VLOG(3) - << "[HUAWEI_ASCEND_NPU] Init the output tensors for the device program " - "and share their buffers with the origin output tensors"; + ATC_CALL((*device_otensors)[i]->SetTensorDesc( + device_odims_[i].GetGeTensorDesc())); } return true; } @@ -321,8 +294,7 @@ bool DeviceProgram::SharedBufferWithOutputTensors( for (size_t i = 0; i < output_names.size(); i++) { CHECK_EQ((*origin_otensors)[i]->dims().production(), - device_odims_[i].GetNumber() * device_odims_[i].GetChannel() * - device_odims_[i].GetHeight() * device_odims_[i].GetWidth()); + device_odims_[i].production()); // Share data buf between device_itensor and origin_itensor std::shared_ptr buffer = std::make_shared( diff --git a/lite/tests/kernels/concat_compute_test.cc b/lite/tests/kernels/concat_compute_test.cc index 18e4701bdf3e99fbb6f76ed9ac78bbbbfda60a1c..7c1d5c0e87eae2dde0110ba2e992be2ed373387f 100644 --- a/lite/tests/kernels/concat_compute_test.cc +++ b/lite/tests/kernels/concat_compute_test.cc @@ -147,6 +147,8 @@ TEST(Concat, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); #elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_X86) @@ -157,6 +159,10 @@ TEST(Concat, precision) { for (int axis : {1, 2}) { for (bool is_use_axis_tensor : {false, true}) { + // is_use_axis_tensor = true has bugs in Huawei Ascend NPU DDK + if (place == TARGET(kHuaweiAscendNPU) && is_use_axis_tensor) { + continue; + } LOG(INFO) << "axis:" << axis << ", is_use_axis_tensor:" << is_use_axis_tensor; std::unique_ptr tester( diff --git a/lite/tests/kernels/conv_compute_test.cc b/lite/tests/kernels/conv_compute_test.cc index a4bcf6ea70e3fe719793aa4ebd8fb8cd09e35905..3606853f6ca83322d240ae3cf13590795b369e8d 100644 --- a/lite/tests/kernels/conv_compute_test.cc +++ b/lite/tests/kernels/conv_compute_test.cc @@ -296,6 +296,11 @@ void TestConvStrides(Place place, float abs_error = 2e-5) { for (auto out_channels : {1, 3}) { for (auto strides : std::vector>{{2, 2}, {3, 3}, {1, 2}, {3, 1}}) { + // Check Huawei Ascend NPU restriction if output HxW = 1x1 + // input_w after padding = 4 should equal to fitler_w after dilation = 3 + if (place == TARGET(kHuaweiAscendNPU) && dims[3] == 4) { + continue; + } std::unique_ptr tester(new ConvComputeTester( place, "def", DDim(dims), out_channels, 3, strides)); arena::Arena arena(std::move(tester), place, abs_error); @@ -415,13 +420,16 @@ TEST(Conv2d, precision) { abs_error = 5e-2; // Using fp16 in NPU #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); - abs_error = 5e-2; // Using fp16 in NPU + abs_error = 1e-2; // Using fp16 in NPU #else return; #endif TestConvKsize(place, abs_error); +// Huawei Ascend NPU DDK not support groups > 1 +#if !defined(LITE_WITH_HUAWEI_ASCEND_NPU) TestConvGroups(place, abs_error); +#endif TestConvDilations(place, abs_error); TestConvStrides(place, abs_error); TestConvPaddings(place, abs_error);