diff --git a/lite/backends/huawei_ascend_npu/device.cc b/lite/backends/huawei_ascend_npu/device.cc
index c8dc3d1de46fe12c3cb41257f864bcb1ff82bd9a..61fa05b0201d5b3cd9b33308996fba6b3cd95111 100644
--- a/lite/backends/huawei_ascend_npu/device.cc
+++ b/lite/backends/huawei_ascend_npu/device.cc
@@ -67,6 +67,15 @@ bool Device::Build(std::vector<ge::Operator>& input_nodes,   // NOLINT
   std::lock_guard<std::mutex> lock(device_mutex_);
   // Convert the HiAI IR graph to the HiAI om model
   ge::Graph ir_graph("graph");
+  // set input node attr index is node size > 1
+  if (input_nodes.size() > 1) {
+    int idx = 0;
+    for (auto node : input_nodes) {
+      node.SetAttr("index", idx);
+      idx++;
+    }
+  }
+  VLOG(3) << "Getting input node size " << input_nodes.size();
   ir_graph.SetInputs(input_nodes).SetOutputs(output_nodes);
 
   // Build IR model
diff --git a/lite/backends/huawei_ascend_npu/model_client.cc b/lite/backends/huawei_ascend_npu/model_client.cc
index 02a8014210b24f8ae143ee68341aec0281d5a570..96b5e208f290357db1484e8d8d6046590b9a88ff 100644
--- a/lite/backends/huawei_ascend_npu/model_client.cc
+++ b/lite/backends/huawei_ascend_npu/model_client.cc
@@ -24,50 +24,28 @@ bool AclModelClient::LoadFromMem(const void* data, uint32_t size) {
     return true;
   }
 
-  auto ret = aclmdlQuerySizeFromMem(
-      data, size, &model_memory_size_, &model_weight_size_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] query model size from memory failed!";
-    return false;
-  }
-  ret = aclrtMalloc(
-      &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model memory "
-                    "failed, require size is "
-                 << model_memory_size_;
-    return false;
-  }
-  ret = aclrtMalloc(
-      &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model weigth "
-                    "failed, require size is "
-                 << model_weight_size_;
-    return false;
-  }
-  ret = aclmdlLoadFromMemWithMem(data,
-                                 size,
-                                 &model_id_,
-                                 model_memory_ptr_,
-                                 model_memory_size_,
-                                 model_weight_ptr_,
-                                 model_weight_size_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Load model from memory failed!";
-    return false;
-  }
+  ACL_CALL(aclmdlQuerySizeFromMem(
+      data, size, &model_memory_size_, &model_weight_size_));
+  ACL_CALL(aclrtMalloc(
+      &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST));
+  ACL_CALL(aclrtMalloc(
+      &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST));
+  ACL_CALL(aclmdlLoadFromMemWithMem(data,
+                                    size,
+                                    &model_id_,
+                                    model_memory_ptr_,
+                                    model_memory_size_,
+                                    model_weight_ptr_,
+                                    model_weight_size_));
+
   model_desc_ = aclmdlCreateDesc();
   if (model_desc_ == nullptr) {
     LOG(WARNING) << "[HUAWEI_ASCEND_NPU] create model description failed!";
     return false;
   }
-  ret = aclmdlGetDesc(model_desc_, model_id_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] get model description failed!";
-    return false;
-  }
-  VLOG(3) << "[HUAWEI_ASCEND_NPU] AclModelClient LoadFromMem success.";
+  ACL_CALL(aclmdlGetDesc(model_desc_, model_id_));
+
+  VLOG(3) << "[HUAWEI_ASCEND_NPU] Load model form memeory success.";
   load_flag_ = true;
   return true;
 }
@@ -77,49 +55,28 @@ bool AclModelClient::LoadFromFile(const char* model_path) {
     LOG(WARNING) << "[HUAWEI_ASCEND_NPU] model is already loaded!";
     return true;
   }
-  auto ret =
-      aclmdlQuerySize(model_path, &model_memory_size_, &model_weight_size_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] query model size from file failed!";
-    return false;
-  }
-  ret = aclrtMalloc(
-      &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model memory "
-                    "failed, require size is "
-                 << model_memory_size_;
-    return false;
-  }
-  ret = aclrtMalloc(
-      &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] malloc buffer for model weigth "
-                    "failed, require size is "
-                 << model_weight_size_;
-    return false;
-  }
-  ret = aclmdlLoadFromFileWithMem(model_path,
-                                  &model_id_,
-                                  model_memory_ptr_,
-                                  model_memory_size_,
-                                  model_weight_ptr_,
-                                  model_weight_size_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Load model from file failed!";
-    return false;
-  }
+
+  ACL_CALL(
+      aclmdlQuerySize(model_path, &model_memory_size_, &model_weight_size_));
+  ACL_CALL(aclrtMalloc(
+      &model_memory_ptr_, model_memory_size_, ACL_MEM_MALLOC_HUGE_FIRST));
+  ACL_CALL(aclrtMalloc(
+      &model_weight_ptr_, model_weight_size_, ACL_MEM_MALLOC_HUGE_FIRST));
+  ACL_CALL(aclmdlLoadFromFileWithMem(model_path,
+                                     &model_id_,
+                                     model_memory_ptr_,
+                                     model_memory_size_,
+                                     model_weight_ptr_,
+                                     model_weight_size_));
+
   model_desc_ = aclmdlCreateDesc();
   if (model_desc_ == nullptr) {
     LOG(WARNING) << "[HUAWEI_ASCEND_NPU] create model description failed!";
     return false;
   }
-  ret = aclmdlGetDesc(model_desc_, model_id_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] get model description failed!";
-    return false;
-  }
-  VLOG(3) << "[HUAWEI_ASCEND_NPU] Loading model file success:" << model_path;
+  ACL_CALL(aclmdlGetDesc(model_desc_, model_id_));
+
+  VLOG(3) << "[HUAWEI_ASCEND_NPU] Load model form file success: " << model_path;
   load_flag_ = true;
   return true;
 }
@@ -132,33 +89,25 @@ bool AclModelClient::GetModelIOTensorDim(
     return false;
   }
   size_t input_num = aclmdlGetNumInputs(model_desc_);
-  VLOG(3) << "[HUAWEI_ASCEND_NPU] input numher is " << input_num;
+  VLOG(3) << "[HUAWEI_ASCEND_NPU] input number is " << input_num;
   for (size_t i = 0; i < input_num; i++) {
     VLOG(3) << "[HUAWEI_ASCEND_NPU] printing input [" << i << "] ....";
     aclmdlIODims input_dim;
-    aclmdlGetInputDims(model_desc_, i, &input_dim);
+    ACL_CALL(aclmdlGetInputDims(model_desc_, i, &input_dim));
     aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
-    VLOG(3) << "[HUAWEI_ASCEND_NPU] data_type of inputs[" << i << "] is "
-            << data_type;
     aclFormat data_format = aclmdlGetInputFormat(model_desc_, i);
-    VLOG(3) << "[HUAWEI_ASCEND_NPU] data_format of inputs[" << i << "] is "
-            << data_format;
     TensorDesc tensor_desc = TensorDesc(data_type, input_dim, data_format);
     input_tensor->push_back(tensor_desc);
   }
 
   size_t output_num = aclmdlGetNumOutputs(model_desc_);
-  VLOG(3) << "[HUAWEI_ASCEND_NPU] output numher is " << output_num;
+  VLOG(3) << "[HUAWEI_ASCEND_NPU] output number is " << output_num;
   for (size_t i = 0; i < output_num; i++) {
     VLOG(3) << "[HUAWEI_ASCEND_NPU] printing output [" << i << "] ....";
     aclmdlIODims output_dim;
-    aclmdlGetOutputDims(model_desc_, i, &output_dim);
+    ACL_CALL(aclmdlGetOutputDims(model_desc_, i, &output_dim));
     aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
-    VLOG(3) << "[HUAWEI_ASCEND_NPU] data_type of outputs[" << i << "] is "
-            << data_type;
     aclFormat data_format = aclmdlGetOutputFormat(model_desc_, i);
-    VLOG(3) << "[HUAWEI_ASCEND_NPU] data_format of outputs[" << i << "] is "
-            << data_format;
     TensorDesc tensor_desc = TensorDesc(data_type, output_dim, data_format);
     output_tensor->push_back(tensor_desc);
   }
@@ -181,28 +130,16 @@ bool AclModelClient::GetTensorFromDataset(
     uint32_t device_size = aclGetDataBufferSize(buffer_device);
 
     void* tensor_data = nullptr;
-    aclError ret = aclrtMallocHost(&tensor_data, device_size);
-    if (ret != ACL_ERROR_NONE) {
-      LOG(ERROR) << "[HUAWEI_ASCEND_NPU] aclrtMallocHost failed, ret " << ret;
-      return false;
-    }
-    ret = aclrtMemcpy(tensor_data,
-                      device_size,
-                      device_data,
-                      device_size,
-                      ACL_MEMCPY_DEVICE_TO_HOST);
-    if (ret != ACL_ERROR_NONE) {
-      LOG(ERROR) << "[HUAWEI_ASCEND_NPU] aclrtMemcpy failed, ret " << ret;
-      return false;
-    }
-    if (output_tensor->at(i)->SetData(reinterpret_cast<uint8_t*>(tensor_data),
-                                      device_size) != ge::GRAPH_SUCCESS) {
-      LOG(ERROR) << "[HUAWEI_ASCEND_NPU] SetData to output tensor failed";
-      return false;
-    }
-  }
-  VLOG(3)
-      << "[HUAWEI_ASCEND_NPU] Get output tensor from output dataset succeed.";
+    ACL_CALL(aclrtMallocHost(&tensor_data, device_size));
+    ACL_CALL(aclrtMemcpy(tensor_data,
+                         device_size,
+                         device_data,
+                         device_size,
+                         ACL_MEMCPY_DEVICE_TO_HOST));
+    ATC_CALL(output_tensor->at(i)->SetData(
+        reinterpret_cast<uint8_t*>(tensor_data), device_size));
+  }
+  VLOG(3) << "[HUAWEI_ASCEND_NPU] Get output tensor from dataset succeed.";
   return true;
 }
 
@@ -218,37 +155,33 @@ void AclModelClient::CreateInputDataset(
     auto item = input_tensor->at(i);
     size_t buffer_size = item->GetSize();
     void* buffer_device = nullptr;
-    aclError ret =
-        aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
-    if (ret != ACL_ERROR_NONE) {
-      LOG(ERROR)
-          << "[HUAWEI_ASCEND_NPU] input malloc device buffer failed. size is "
-          << buffer_size;
-      return;
-    }
+
+    ACL_CALL(
+        aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY));
+
     void* buffer_data = reinterpret_cast<void*>(item->GetData());
-    ret = aclrtMemcpy(buffer_device,
-                      buffer_size,
-                      buffer_data,
-                      buffer_size,
-                      ACL_MEMCPY_HOST_TO_DEVICE);
+    auto ret = aclrtMemcpy(buffer_device,
+                           buffer_size,
+                           buffer_data,
+                           buffer_size,
+                           ACL_MEMCPY_HOST_TO_DEVICE);
     if (ret != ACL_ERROR_NONE) {
       LOG(ERROR) << "[HUAWEI_ASCEND_NPU] input memcpy failed, buffer size is "
                  << buffer_size;
-      aclrtFree(buffer_device);
+      ACL_CALL(aclrtFree(buffer_device));
       return;
     }
     aclDataBuffer* data_buffer =
         aclCreateDataBuffer(buffer_device, buffer_size);
     if (data_buffer == nullptr) {
       LOG(ERROR) << "[HUAWEI_ASCEND_NPU] output aclCreateDataBuffer failed!";
-      aclrtFree(buffer_device);
+      ACL_CALL(aclrtFree(buffer_device));
       return;
     }
     if (aclmdlAddDatasetBuffer(input_dataset_, data_buffer) != ACL_ERROR_NONE) {
       LOG(ERROR) << "[HUAWEI_ASCEND_NPU] input aclmdlAddDatasetBuffer failed!";
-      aclrtFree(buffer_device);
-      aclDestroyDataBuffer(data_buffer);
+      ACL_CALL(aclrtFree(buffer_device));
+      ACL_CALL(aclDestroyDataBuffer(data_buffer));
       return;
     }
   }
@@ -266,26 +199,20 @@ void AclModelClient::CreateOutputDataset(
   for (size_t i = 0; i < output_size; i++) {
     size_t buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i);
     void* buffer_device = nullptr;
-    aclError ret =
-        aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
-    if (ret != ACL_ERROR_NONE) {
-      LOG(ERROR)
-          << "[HUAWEI_ASCEND_NPU] output malloc device buffer failed. size is "
-          << buffer_size;
-      return;
-    }
+    ACL_CALL(
+        aclrtMalloc(&buffer_device, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY));
     aclDataBuffer* data_buffer =
         aclCreateDataBuffer(buffer_device, buffer_size);
     if (data_buffer == nullptr) {
       LOG(ERROR) << "[HUAWEI_ASCEND_NPU] output aclCreateDataBuffer failed!";
-      aclrtFree(buffer_device);
+      ACL_CALL(aclrtFree(buffer_device));
       return;
     }
     if (aclmdlAddDatasetBuffer(output_dataset_, data_buffer) !=
         ACL_ERROR_NONE) {
       LOG(ERROR) << "[HUAWEI_ASCEND_NPU] output aclmdlAddDatasetBuffer failed!";
-      aclrtFree(buffer_device);
-      aclDestroyDataBuffer(data_buffer);
+      ACL_CALL(aclrtFree(buffer_device));
+      ACL_CALL(aclDestroyDataBuffer(data_buffer));
       return;
     }
   }
@@ -332,21 +259,13 @@ void AclModelClient::DestroyDataset(aclmdlDataset** dataset) {
     aclDataBuffer* buffer_device = aclmdlGetDatasetBuffer(*dataset, i);
     void* device_data = aclGetDataBufferAddr(buffer_device);
     if (device_data == nullptr) {
-      LOG(WARNING)
-          << "[HUAWEI_ASCEND_NPU] failed to get data buffer of deivce data!";
+      LOG(WARNING) << "[HUAWEI_ASCEND_NPU] failed to get data buffer!";
     } else {
-      if (aclrtFree(device_data) != ACL_ERROR_NONE) {
-        LOG(WARNING) << "[HUAWEI_ASCEND_NPU] failed to free deivce data!";
-      }
-    }
-    if (aclDestroyDataBuffer(buffer_device) != ACL_ERROR_NONE) {
-      LOG(WARNING)
-          << "[HUAWEI_ASCEND_NPU] failed to destroy deivce data buffer!";
+      ACL_CALL(aclrtFree(device_data));
     }
+    ACL_CALL(aclDestroyDataBuffer(buffer_device));
   }
-  if (aclmdlDestroyDataset(*dataset) != ACL_ERROR_NONE) {
-    LOG(WARNING) << "[HUAWEI_ASCEND_NPU] failed to destroy dataset!";
-  }
+  ACL_CALL(aclmdlDestroyDataset(*dataset));
   *dataset = nullptr;
   VLOG(3) << "[HUAWEI_ASCEND_NPU] Destroy dataset success.";
 }
@@ -361,24 +280,20 @@ bool AclModelClient::UnloadModel() {
   DestroyDataset(&input_dataset_);
   DestroyDataset(&output_dataset_);
 
-  aclError ret = aclmdlUnload(model_id_);
-  if (ret != ACL_ERROR_NONE) {
-    LOG(ERROR) << "unload model failed, model id is " << model_id_;
-    return false;
-  }
+  ACL_CALL(aclmdlUnload(model_id_));
   if (model_desc_ != nullptr) {
-    (void)aclmdlDestroyDesc(model_desc_);
+    ACL_CALL(aclmdlDestroyDesc(model_desc_));
     model_desc_ = nullptr;
   }
 
   if (model_memory_ptr_ != nullptr) {
-    aclrtFree(model_memory_ptr_);
+    ACL_CALL(aclrtFree(model_memory_ptr_));
     model_memory_ptr_ = nullptr;
     model_memory_size_ = 0;
   }
 
   if (model_weight_ptr_ != nullptr) {
-    aclrtFree(model_weight_ptr_);
+    ACL_CALL(aclrtFree(model_weight_ptr_));
     model_weight_ptr_ = nullptr;
     model_weight_size_ = 0;
   }
diff --git a/lite/backends/huawei_ascend_npu/model_client.h b/lite/backends/huawei_ascend_npu/model_client.h
index 5cf19b26261a4ff0301b493c7edf2de6ce3f7ec1..85643c917de08ca1745b7649011a9ecdab6afeb0 100644
--- a/lite/backends/huawei_ascend_npu/model_client.h
+++ b/lite/backends/huawei_ascend_npu/model_client.h
@@ -35,32 +35,39 @@ class TensorDesc {
     ge_tensor_desc_ = new ge::TensorDesc(
         GetGeShape(dims), GetGeFormat(format), GetGeDataType(data_type));
     CHECK(ge_tensor_desc_ != nullptr);
+    VLOG(3) << "[HUAWEI_ASCEND_NPU] Getting data shape : " << repr();
   }
   ~TensorDesc() { ge_tensor_desc_ = nullptr; }
-  int64_t GetNumber() const {
-    return ge_tensor_desc_->GetShape().GetDim(dim_order[0]);
-  }
-  int64_t GetChannel() const {
-    return ge_tensor_desc_->GetShape().GetDim(dim_order[1]);
-  }
-  int64_t GetHeight() const {
-    return ge_tensor_desc_->GetShape().GetDim(dim_order[2]);
+
+  const ge::TensorDesc& GetGeTensorDesc() const { return *ge_tensor_desc_; }
+
+  std::string repr() const {
+    STL::stringstream ss;
+    size_t dim_size = ge_tensor_desc_->GetShape().GetDimNum();
+    if (dim_size == 0) {
+      ss << "{}";
+      return ss.str();
+    }
+    ss << "{";
+    for (size_t i = 0; i < dim_size - 1; i++) {
+      ss << ge_tensor_desc_->GetShape().GetDim(i) << ",";
+    }
+    ss << ge_tensor_desc_->GetShape().GetDim(dim_size - 1);
+    ss << "}";
+    return ss.str();
   }
-  int64_t GetWidth() const {
-    return ge_tensor_desc_->GetShape().GetDim(dim_order[3]);
+
+  int64_t production() const {
+    return ge_tensor_desc_->GetShape().GetShapeSize();
   }
-  const ge::TensorDesc& GetGeTensorDesc() const { return *ge_tensor_desc_; }
 
  private:
   ge::Shape GetGeShape(aclmdlIODims dims) {
-    ge::Shape ge_shape({0, 0, 0, 0});
+    auto shape_data = std::vector<int64_t>({1L, 1L, 1L, 1L});
+    shape_data.resize(dims.dimCount);
+    ge::Shape ge_shape(shape_data);
     for (size_t i = 0; i < dims.dimCount; i++) {
-      if (ge_shape.SetDim(i, dims.dims[i]) != ge::GRAPH_SUCCESS) {
-        LOG(WARNING) << "[HUAWEI_ASCEND_NPU] ge::Shape SetDim failed!";
-      } else {
-        VLOG(3) << "[HUAWEI_ASCEND_NPU] Setting Ge Shape[" << i << "] = <"
-                << dims.dims[i] << ">";
-      }
+      ATC_CALL(ge_shape.SetDim(i, dims.dims[i]));
     }
     return ge_shape;
   }
@@ -80,6 +87,8 @@ class TensorDesc {
         LOG(FATAL) << "[HUAWEI_ASCEND_NPU] format not supported:" << format;
         break;
     }
+    VLOG(3) << "[HUAWEI_ASCEND_NPU] Getting data format : "
+            << CvtFormat(ge_format);
     return ge_format;
   }
   ge::DataType GetGeDataType(aclDataType data_type) {
@@ -110,6 +119,8 @@ class TensorDesc {
         LOG(FATAL) << "[HUAWEI_ASCEND_NPU] data type not supported!";
         break;
     }
+    VLOG(3) << "[HUAWEI_ASCEND_NPU] Getting data type : "
+            << CvtDataType(ge_datatype);
     return ge_datatype;
   }
 
diff --git a/lite/backends/huawei_ascend_npu/utils.h b/lite/backends/huawei_ascend_npu/utils.h
index e2bff3f87e0831f7b98be60ef3980f10da610f10..61df063fc418c3aa648dd029e18ab68627bbc937 100644
--- a/lite/backends/huawei_ascend_npu/utils.h
+++ b/lite/backends/huawei_ascend_npu/utils.h
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #pragma once
+
+#include <string>
 #include "acl/acl.h"
 #include "ge/ge_api_types.h"
 #include "ge/ge_ir_build.h"
@@ -21,11 +23,16 @@
 #include "graph/tensor.h"
 #include "graph/types.h"
 #include "lite/utils/cp_logging.h"
+#include "lite/utils/replace_stl/stream.h"
 
 /*
  * This file contains some Huawei Ascend NPU specific uitls.
  */
 
+namespace paddle {
+namespace lite {
+namespace huawei_ascend_npu {
+
 #define ACL_CALL(msg)                                       \
   CHECK_EQ(reinterpret_cast<aclError>(msg), ACL_ERROR_NONE) \
       << (msg) << " Huawei Ascend NPU ACL Error: "          \
@@ -38,10 +45,6 @@
       << ::paddle::lite::huawei_ascend_npu::AtcErrorInfo(             \
              reinterpret_cast<uint32_t>(msg))
 
-namespace paddle {
-namespace lite {
-namespace huawei_ascend_npu {
-
 static const char* AtcErrorInfo(uint32_t error) {
   switch (error) {
 #define LITE_ATC_ERROR_INFO(xx) \
@@ -123,6 +126,61 @@ static const char* AclErrorInfo(int error) {
   }
 }
 
+static const std::string& CvtFormat(ge::Format format) {
+  static const int MAX_FORMAT_LENGTH = 25;
+  static const std::string format2string[] = {
+      "FORMAT_NCHW = 0",
+      "FORMAT_NHWC = 1",
+      "FORMAT_ND = 2",
+      "FORMAT_NC1HWC0 = 3",
+      "FORMAT_FRACTAL_Z = 4",
+      "FORMAT_NC1C0HWPAD = 5",
+      "FORMAT_NHWC1C0 = 6",
+      "FORMAT_FSR_NCHW = 7",
+      "FORMAT_FRACTAL_DECONV = 8",
+      "FORMAT_C1HWNC0 = 9",
+      "FORMAT_FRACTAL_DECONV_TRANSPOSE = 10",
+      "FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11",
+      "FORMAT_NC1HWC0_C04 = 12",
+      "FORMAT_FRACTAL_Z_C04 = 13",
+      "FORMAT_CHWN = 14",
+      "FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15",
+      "FORMAT_HWCN = 16",
+      "FORMAT_NC1KHKWHWC0 = 17",
+      "FORMAT_BN_WEIGHT = 18",
+      "FORMAT_FILTER_HWCK = 19",
+      "FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20",
+      "FORMAT_HASHTABLE_LOOKUP_KEYS = 21",
+      "FORMAT_HASHTABLE_LOOKUP_VALUE = 22",
+      "FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23",
+      "FORMAT_HASHTABLE_LOOKUP_HITS = 24"};
+  auto x = static_cast<int>(format);
+  CHECK_LT(x, MAX_FORMAT_LENGTH);
+  return format2string[x];
+}
+
+static const std::string& CvtDataType(ge::DataType data_type) {
+  static const int MAX_DATATYPE_LENGTH = 14;
+  static const std::string datatype2string[] = {"DT_FLOAT=0",
+                                                "DT_FLOAT16=1",
+                                                "DT_INT8=2",
+                                                "DT_INT32=3",
+                                                "DT_UINT8=4",
+                                                "Unknown=5",
+                                                "DT_INT16=6",
+                                                "DT_UINT16=7",
+                                                "DT_UINT32=8",
+                                                "DT_INT64=9",
+                                                "DT_UINT64=10",
+                                                "DT_DOUBLE=11",
+                                                "DT_BOOL=12",
+                                                "DT_STRING=13"};
+
+  auto x = static_cast<int>(data_type);
+  CHECK_LT(x, MAX_DATATYPE_LENGTH);
+  return datatype2string[x];
+}
+
 }  // namespace huawei_ascend_npu
 }  // namespace lite
 }  // namespace paddle
diff --git a/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt b/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt
index 14f67ca1c70e21dd52b0bd1e7f34c890b5ce6f33..b0a50a2a549ab910c27d2c4148a0d323780d4cf5 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt
+++ b/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt
@@ -10,6 +10,7 @@ set(huawei_ascend_npu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bri
 lite_cc_library(subgraph_bridge_act_op_huawei_ascend_npu SRCS act_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_conv_op_huawei_ascend_npu SRCS conv_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_interpolate_op_huawei_ascend_npu SRCS interpolate_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_concat_op_huawei_ascend_npu SRCS concat_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
 
 set(huawei_ascend_npu_subgraph_bridges
         subgraph_bridge_registry
@@ -18,4 +19,5 @@ set(huawei_ascend_npu_subgraph_bridges
         subgraph_bridge_act_op_huawei_ascend_npu
         subgraph_bridge_conv_op_huawei_ascend_npu
         subgraph_bridge_interpolate_op_huawei_ascend_npu
+        subgraph_bridge_concat_op_huawei_ascend_npu
         CACHE INTERNAL "huawei_ascend_npu_subgraph_bridges")
diff --git a/lite/kernels/huawei_ascend_npu/bridges/act_op.cc b/lite/kernels/huawei_ascend_npu/bridges/act_op.cc
index 0293515356a13035fcdc4725c5de132ea06ceb67..6b149cb6eeaac6c032bc64a70130628b772ddad1 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/act_op.cc
+++ b/lite/kernels/huawei_ascend_npu/bridges/act_op.cc
@@ -49,6 +49,10 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto act_node = graph->template Add<ActType>(out_name);
   auto act_op = act_node->template data<ActType>();
   act_op->set_input_x(*x_node->data());
+  TENSOR_UPDATE_INPUT(
+      act_op, x, ge::FORMAT_NCHW, CvtPrecisionType(x_node->precision()));
+  TENSOR_UPDATE_OUTPUT(
+      act_op, y, ge::FORMAT_NCHW, CvtPrecisionType(act_node->precision()));
 
   return SUCCESS;
 }
@@ -84,6 +88,10 @@ int ActConverter<ge::op::LeakyRelu>(void* ctx, OpLite* op, KernelBase* kernel) {
   // only for leaky_relu
   auto alpha = op_info->GetAttr<float>("alpha");
   act_op->set_attr_negative_slope(alpha);
+  TENSOR_UPDATE_INPUT(
+      act_op, x, ge::FORMAT_NCHW, CvtPrecisionType(x_node->precision()));
+  TENSOR_UPDATE_OUTPUT(
+      act_op, y, ge::FORMAT_NCHW, CvtPrecisionType(act_node->precision()));
 
   return SUCCESS;
 }
diff --git a/lite/kernels/huawei_ascend_npu/bridges/concat_op.cc b/lite/kernels/huawei_ascend_npu/bridges/concat_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8a0e5bc4a90aa94262d5d552d99b77452b187ff7
--- /dev/null
+++ b/lite/kernels/huawei_ascend_npu/bridges/concat_op.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
+#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace huawei_ascend_npu {
+
+int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[NPU] Converting " << op_type << " ... ";
+
+  // Get input and output vars and op attributes
+  auto x_names = op_info->Input("X");
+  auto axis = op_info->GetAttr<int>("axis");
+  auto out_name = op_info->Output("Out").front();
+  auto num = x_names.size();
+
+  if (op_info->HasInput("AxisTensor")) {
+    // axis node
+    auto axis_name = op_info->Input("AxisTensor").front();
+    auto axis_tensor = scope->FindMutableTensor(axis_name);
+    std::shared_ptr<Node> axis_node = nullptr;
+    if (graph->Has(axis_name)) {
+      axis_node = graph->Get(axis_name);
+    } else {
+      axis_node = graph->Add(axis_name, *axis_tensor);
+    }
+    // concat node
+    auto concat_node = graph->Add<ge::op::Concat>(out_name);
+    auto concat_op = concat_node->data<ge::op::Concat>();
+    // set axis input
+    concat_op->set_input_concat_dim(*axis_node->data());
+    TENSOR_UPDATE_INPUT(concat_op,
+                        concat_dim,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(axis_node->precision()));
+    // set dynamic input
+    concat_op->set_attr_N(num);
+    concat_op->create_dynamic_input_x(num);
+    int idx = 0;
+    for (auto& x_name : x_names) {
+      auto x = scope->FindMutableTensor(x_name);
+      auto x_dims = x->dims();
+      std::shared_ptr<Node> x_node = nullptr;
+      if (graph->Has(x_name)) {
+        x_node = graph->Get(x_name);
+      } else {
+        x_node = graph->Add(x_name, *x);
+      }
+      concat_op->set_dynamic_input_x(idx, *x_node->data());
+      TENSOR_UPDATE_DYNAMIC_INPUT(concat_op,
+                                  x,
+                                  idx,
+                                  ge::FORMAT_NCHW,
+                                  CvtPrecisionType(x_node->precision()));
+      idx++;
+    }
+    TENSOR_UPDATE_OUTPUT(concat_op,
+                         y,
+                         ge::FORMAT_NCHW,
+                         CvtPrecisionType(concat_node->precision()));
+  } else {
+    auto concat_node = graph->Add<ge::op::ConcatD>(out_name);
+    auto concat_op = concat_node->data<ge::op::ConcatD>();
+    concat_op->set_attr_concat_dim(axis);
+    concat_op->set_attr_N(num);
+    concat_op->create_dynamic_input_x(num);
+    int idx = 0;
+    for (auto& x_name : x_names) {
+      auto x = scope->FindMutableTensor(x_name);
+      auto x_dims = x->dims();
+      std::shared_ptr<Node> x_node = nullptr;
+      if (graph->Has(x_name)) {
+        x_node = graph->Get(x_name);
+      } else {
+        x_node = graph->Add(x_name, *x);
+      }
+      concat_op->set_dynamic_input_x(idx, *x_node->data());
+      TENSOR_UPDATE_DYNAMIC_INPUT(concat_op,
+                                  x,
+                                  idx,
+                                  ge::FORMAT_NCHW,
+                                  CvtPrecisionType(x_node->precision()));
+      idx++;
+    }
+    TENSOR_UPDATE_OUTPUT(concat_op,
+                         y,
+                         ge::FORMAT_NCHW,
+                         CvtPrecisionType(concat_node->precision()));
+  }
+
+  return SUCCESS;
+}
+
+}  // namespace huawei_ascend_npu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_SUBGRAPH_BRIDGE(
+    concat,
+    kHuaweiAscendNPU,
+    paddle::lite::subgraph::huawei_ascend_npu::ConcatConverter);
diff --git a/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc b/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
index 075bbca8bd63a3c12d74b3624c6a1d51d7edfb76..e63a80de948c9d2cbae66dde67c57e505fc7f1c3 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
+++ b/lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
@@ -35,7 +35,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto input_name = op_info->Input("Input").front();
   auto input = scope->FindMutableTensor(input_name);
   auto input_dims = input->dims();
-  ge::DataType ge_data_type = CvtPrecisionType(input->precision());
 
   auto filter_name = op_info->Input("Filter").front();
   auto filter = scope->FindMutableTensor(filter_name);
@@ -99,6 +98,22 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                       input_dims,
                                       filter_dims);
 
+  // Check Restrictions: HxW(input) == HxW(filter) if output feature h*w = 1*1
+  if (output_dims[2] == 1 && output_dims[3] == 1) {
+    int input_h = input_dims[2] + paddings[0] + paddings[1];
+    int input_w = input_dims[3] + paddings[2] + paddings[3];
+    int filter_h = (filter_dims[2] - 1) * dilations[0] + 1;
+    int filter_w = (filter_dims[3] - 1) * dilations[1] + 1;
+    CHECK_EQ(input_h, filter_h) << "[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK "
+                                   "restriction: if output HxW = 1x1, then "
+                                   "input height after padding should equal to "
+                                   "filter height after dilation";
+    CHECK_EQ(input_w, filter_w) << "[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK "
+                                   "restriction: if output HxW = 1x1, then "
+                                   "input width after padding should equal to "
+                                   "filter width after dilation";
+  }
+
   // Check depthwise mode, and decide whether use DepthwiseConv2D Op
   bool use_depthwise_conv = false;
   bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);
@@ -148,20 +163,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     }
   }
 
-  // Ascend must update convop desc, or IR model build will fail
-  ge::TensorDesc conv2d_input_desc_x(
-      ge::Shape(CvtShape(input_dims)), ge::FORMAT_NCHW, ge_data_type);
-  ge::TensorDesc conv2d_input_desc_filter(
-      ge::Shape(CvtShape(filter_dims)), ge::FORMAT_NCHW, ge_data_type);
-  ge::TensorDesc conv2d_input_desc_bias(
-      ge::Shape(bias_shape), ge::FORMAT_ND, ge_data_type);
-  ge::TensorDesc conv2d_output_desc_y(
-      ge::Shape(CvtShape(output_dims)), ge::FORMAT_NCHW, ge_data_type);
-  // Setting desc name
-  conv2d_input_desc_x.SetName("conv2d_input_desc_x");
-  conv2d_input_desc_filter.SetName("conv2d_input_desc_filter");
-  conv2d_input_desc_bias.SetName("conv2d_input_desc_bias");
-  conv2d_output_desc_y.SetName("conv2d_output_desc_y");
   // Conv node
   std::shared_ptr<Node> conv_node = nullptr;
   if (use_depthwise_conv && is_depthwise_mode) {
@@ -177,12 +178,19 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     conv_op->set_attr_data_format("NCHW");
     if (bias_node != nullptr && is_channel_bias) {
       conv_op->set_input_bias(*bias_node->data());
-      conv_op->update_input_desc_bias(conv2d_input_desc_bias);
+      TENSOR_UPDATE_INPUT(conv_op,
+                          bias,
+                          ge::FORMAT_NCHW,
+                          CvtPrecisionType(bias_node->precision()));
     }
-    // update tensor desc to conv2d
-    conv_op->update_input_desc_x(conv2d_input_desc_x);
-    conv_op->update_input_desc_filter(conv2d_input_desc_filter);
-    conv_op->update_output_desc_y(conv2d_output_desc_y);
+    TENSOR_UPDATE_INPUT(
+        conv_op, x, ge::FORMAT_NCHW, CvtPrecisionType(input_node->precision()));
+    TENSOR_UPDATE_INPUT(conv_op,
+                        filter,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(filter_node->precision()));
+    TENSOR_UPDATE_OUTPUT(
+        conv_op, y, ge::FORMAT_NCHW, CvtPrecisionType(conv_node->precision()));
   } else {
     conv_node = graph->Add<ge::op::Conv2D>(output_name);
     auto conv_op = conv_node->data<ge::op::Conv2D>();
@@ -198,12 +206,19 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     conv_op->set_attr_data_format("NCHW");
     if (bias_node != nullptr && is_channel_bias) {
       conv_op->set_input_bias(*bias_node->data());
-      conv_op->update_input_desc_bias(conv2d_input_desc_bias);
+      TENSOR_UPDATE_INPUT(conv_op,
+                          bias,
+                          ge::FORMAT_NCHW,
+                          CvtPrecisionType(bias_node->precision()));
     }
-    // update tensor desc to conv2d
-    conv_op->update_input_desc_x(conv2d_input_desc_x);
-    conv_op->update_input_desc_filter(conv2d_input_desc_filter);
-    conv_op->update_output_desc_y(conv2d_output_desc_y);
+    TENSOR_UPDATE_INPUT(
+        conv_op, x, ge::FORMAT_NCHW, CvtPrecisionType(input_node->precision()));
+    TENSOR_UPDATE_INPUT(conv_op,
+                        filter,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(filter_node->precision()));
+    TENSOR_UPDATE_OUTPUT(
+        conv_op, y, ge::FORMAT_NCHW, CvtPrecisionType(conv_node->precision()));
   }
   // append Add node to support bias
   if (bias_node != nullptr && !is_channel_bias) {
diff --git a/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc b/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc
index 96303ffad66fc3d1a3aa39334c61ccece098e00f..c298ef50e89e82a01db38765eed68b9aa07aaec0 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc
+++ b/lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc
@@ -53,9 +53,6 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     return FAILED;
   }
 
-  // get ge date type
-  ge::DataType ge_data_type = CvtPrecisionType(x->precision());
-
   // X node
   std::shared_ptr<Node> x_node = nullptr;
   if (graph->Has(x_name)) {
@@ -100,10 +97,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     bilinear_interp_op->set_input_x(*x_node->data());
     bilinear_interp_op->set_input_size(*out_size_node->data());
     bilinear_interp_op->set_attr_align_corners(align_corners);
-    TENSOR_UPDATE_INPUT(bilinear_interp_op, x, ge::FORMAT_NCHW, ge_data_type);
-    TENSOR_UPDATE_INPUT(
-        bilinear_interp_op, size, ge::FORMAT_NCHW, ge_data_type);
-    TENSOR_UPDATE_OUTPUT(bilinear_interp_op, y, ge::FORMAT_NCHW, ge_data_type);
+    TENSOR_UPDATE_INPUT(bilinear_interp_op,
+                        x,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(x_node->precision()));
+    TENSOR_UPDATE_INPUT(bilinear_interp_op,
+                        size,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(out_size_node->precision()));
+    TENSOR_UPDATE_OUTPUT(bilinear_interp_op,
+                         y,
+                         ge::FORMAT_NCHW,
+                         CvtPrecisionType(bilinear_interp_node->precision()));
   } else if (interp_method == "nearest") {
     auto nearest_interp_node =
         graph->Add<ge::op::ResizeNearestNeighborV2>(out_name);
@@ -112,9 +117,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     nearest_interp_op->set_input_x(*x_node->data());
     nearest_interp_op->set_input_size(*out_size_node->data());
     nearest_interp_op->set_attr_align_corners(align_corners);
-    TENSOR_UPDATE_INPUT(nearest_interp_op, x, ge::FORMAT_NCHW, ge_data_type);
-    TENSOR_UPDATE_INPUT(nearest_interp_op, size, ge::FORMAT_NCHW, ge_data_type);
-    TENSOR_UPDATE_OUTPUT(nearest_interp_op, y, ge::FORMAT_NCHW, ge_data_type);
+    TENSOR_UPDATE_INPUT(nearest_interp_op,
+                        x,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(x_node->precision()));
+    TENSOR_UPDATE_INPUT(nearest_interp_op,
+                        size,
+                        ge::FORMAT_NCHW,
+                        CvtPrecisionType(out_size_node->precision()));
+    TENSOR_UPDATE_OUTPUT(nearest_interp_op,
+                         y,
+                         ge::FORMAT_NCHW,
+                         CvtPrecisionType(nearest_interp_node->precision()));
   } else {
     LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Unsupported interpolate method: "
                  << interp_method;
diff --git a/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h b/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h
index e94b7561b80545eb1e9c8de6c1c5d3c9a8d07783..f38f2d4bb4e760d5f96b4e6e16d0894d81259db4 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h
+++ b/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h
@@ -27,3 +27,4 @@ USE_SUBGRAPH_BRIDGE(conv2d, kHuaweiAscendNPU);
 USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kHuaweiAscendNPU);
 USE_SUBGRAPH_BRIDGE(bilinear_interp, kHuaweiAscendNPU);
 USE_SUBGRAPH_BRIDGE(nearest_interp, kHuaweiAscendNPU);
+USE_SUBGRAPH_BRIDGE(concat, kHuaweiAscendNPU);
diff --git a/lite/kernels/huawei_ascend_npu/bridges/utility.cc b/lite/kernels/huawei_ascend_npu/bridges/utility.cc
index 2fdaa49b94f48ad12b58036cd89d2f545566cad6..bc5848e7ad1f1429808138bb0794859fd6ccfa5b 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/utility.cc
+++ b/lite/kernels/huawei_ascend_npu/bridges/utility.cc
@@ -156,61 +156,6 @@ int CvtActMode(std::string act_type) {
   return act_mode;
 }
 
-const std::string& CvtFormat(ge::Format format) {
-  static const int MAX_FORMAT_LENGTH = 25;
-  static const std::string format2string[] = {
-      "FORMAT_NCHW = 0",
-      "FORMAT_NHWC = 1",
-      "FORMAT_ND = 2",
-      "FORMAT_NC1HWC0 = 3",
-      "FORMAT_FRACTAL_Z = 4",
-      "FORMAT_NC1C0HWPAD = 5",
-      "FORMAT_NHWC1C0 = 6",
-      "FORMAT_FSR_NCHW = 7",
-      "FORMAT_FRACTAL_DECONV = 8",
-      "FORMAT_C1HWNC0 = 9",
-      "FORMAT_FRACTAL_DECONV_TRANSPOSE = 10",
-      "FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11",
-      "FORMAT_NC1HWC0_C04 = 12",
-      "FORMAT_FRACTAL_Z_C04 = 13",
-      "FORMAT_CHWN = 14",
-      "FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15",
-      "FORMAT_HWCN = 16",
-      "FORMAT_NC1KHKWHWC0 = 17",
-      "FORMAT_BN_WEIGHT = 18",
-      "FORMAT_FILTER_HWCK = 19",
-      "FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20",
-      "FORMAT_HASHTABLE_LOOKUP_KEYS = 21",
-      "FORMAT_HASHTABLE_LOOKUP_VALUE = 22",
-      "FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23",
-      "FORMAT_HASHTABLE_LOOKUP_HITS = 24"};
-  auto x = static_cast<int>(format);
-  CHECK_LT(x, MAX_FORMAT_LENGTH);
-  return format2string[x];
-}
-
-const std::string& CvtDataType(ge::DataType data_type) {
-  static const int MAX_DATATYPE_LENGTH = 14;
-  static const std::string datatype2string[] = {"DT_FLOAT=0",
-                                                "DT_FLOAT16=1",
-                                                "DT_INT8=2",
-                                                "DT_INT32=3",
-                                                "DT_UINT8=4",
-                                                "Unknown=5",
-                                                "DT_INT16=6",
-                                                "DT_UINT16=7",
-                                                "DT_UINT32=8",
-                                                "DT_INT64=9",
-                                                "DT_UINT64=10",
-                                                "DT_DOUBLE=11",
-                                                "DT_BOOL=12",
-                                                "DT_STRING=13"};
-
-  auto x = static_cast<int>(data_type);
-  CHECK_LT(x, MAX_DATATYPE_LENGTH);
-  return datatype2string[x];
-}
-
 }  // namespace huawei_ascend_npu
 }  // namespace subgraph
 }  // namespace lite
diff --git a/lite/kernels/huawei_ascend_npu/bridges/utility.h b/lite/kernels/huawei_ascend_npu/bridges/utility.h
index 4688e05920ee82034c336a45354160ad6a4af107..43405aa601131ccfe8980da36c4bc6b2ac5fa3dc 100644
--- a/lite/kernels/huawei_ascend_npu/bridges/utility.h
+++ b/lite/kernels/huawei_ascend_npu/bridges/utility.h
@@ -36,6 +36,10 @@ namespace huawei_ascend_npu {
 #define TENSOR_UPDATE_OUTPUT(op, attr, format, dtype)                    \
   ge::TensorDesc _##op##_output_desc_##attr(ge::Shape(), format, dtype); \
   op->update_output_desc_##attr(_##op##_output_desc_##attr);
+#define TENSOR_UPDATE_DYNAMIC_INPUT(op, attr, idx, format, dtype) \
+  ge::TensorDesc _##op##_input_desc_##attr##_##idx(               \
+      ge::Shape(), format, dtype);                                \
+  op->update_dynamic_input_desc_##attr(idx, _##op##_input_desc_##attr##_##idx);
 
 // Type/tensor converters for converting Paddle type/tensor to HiAI type/tensor
 bool HasInputArg(const OpInfo* op_info,
@@ -57,9 +61,6 @@ ge::Tensor CvtTensor(const Tensor& in_tensor,
 
 int CvtActMode(std::string act_type);
 
-const std::string& CvtFormat(ge::Format format);
-const std::string& CvtDataType(ge::DataType data_type);
-
 }  // namespace huawei_ascend_npu
 }  // namespace subgraph
 }  // namespace lite
diff --git a/lite/kernels/huawei_ascend_npu/subgraph_compute.cc b/lite/kernels/huawei_ascend_npu/subgraph_compute.cc
index 6e71c71ca28b163f27a9783572d585466335ef87..f40cd8c8ef3b3f2a9b7159b6579f61a56cb82984 100644
--- a/lite/kernels/huawei_ascend_npu/subgraph_compute.cc
+++ b/lite/kernels/huawei_ascend_npu/subgraph_compute.cc
@@ -241,32 +241,18 @@ bool DeviceProgram::ShareBufferWithOriginTensors(
     VLOG(3) << "[HUAWEI_ASCEND_NPU] Inputs[" << i
             << "] name: " << input_names[i]
             << " origin dims:" << (*origin_itensors)[i]->dims().repr()
-            << " device dims: {" << device_idims_[i].GetNumber() << ","
-            << device_idims_[i].GetChannel() << ","
-            << device_idims_[i].GetHeight() << ","
-            << device_idims_[i].GetWidth() << "}";
+            << " device dims:" << device_idims_[i].repr();
     CHECK_EQ((*origin_itensors)[i]->dims().production(),
-             device_idims_[i].GetNumber() * device_idims_[i].GetChannel() *
-                 device_idims_[i].GetHeight() * device_idims_[i].GetWidth());
+             device_idims_[i].production());
 
     // reset tensor desc
-    if ((*device_itensors)[i]->SetTensorDesc(
-            device_idims_[i].GetGeTensorDesc()) != ge::GRAPH_SUCCESS) {
-      LOG(WARNING) << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor "
-                      "SetTensorDesc failed!";
-    } else {
-      VLOG(3) << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetTensorDesc "
-                 "success.";
-    }
+    ATC_CALL((*device_itensors)[i]->SetTensorDesc(
+        device_idims_[i].GetGeTensorDesc()));
     // copy data from origin to device
-    if ((*device_itensors)[i]->SetData(
-            reinterpret_cast<uint8_t*>((*origin_itensors)[i]->raw_data()),
-            (*origin_itensors)[i]->memory_size()) != ge::GRAPH_SUCCESS) {
-      LOG(WARNING)
-          << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetData failed!";
-    } else {
-      VLOG(3) << "[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetData success.";
-    }
+    ATC_CALL((*device_itensors)[i]->SetData(
+        reinterpret_cast<uint8_t*>((*origin_itensors)[i]->raw_data()),
+        (*origin_itensors)[i]->memory_size()));
+
     VLOG(3)
         << "[HUAWEI_ASCEND_NPU] Init the input tensors for the device program "
            "and share their buffers with the origin input tensors";
@@ -285,26 +271,13 @@ bool DeviceProgram::ShareBufferWithOriginTensors(
     VLOG(3) << "[HUAWEI_ASCEND_NPU] Outputs[" << i
             << "] name: " << output_names[i]
             << " origin dims:" << (*origin_otensors)[i]->dims().repr()
-            << " device dims: {" << device_odims_[i].GetNumber() << ","
-            << device_odims_[i].GetChannel() << ","
-            << device_odims_[i].GetHeight() << ","
-            << device_odims_[i].GetWidth() << "}";
+            << " device dims:" << device_odims_[i].repr();
     CHECK_EQ((*origin_otensors)[i]->dims().production(),
-             device_odims_[i].GetNumber() * device_odims_[i].GetChannel() *
-                 device_odims_[i].GetHeight() * device_odims_[i].GetWidth());
+             device_odims_[i].production());
 
     // reset tensor desc
-    if ((*device_otensors)[i]->SetTensorDesc(
-            device_odims_[i].GetGeTensorDesc()) != ge::GRAPH_SUCCESS) {
-      LOG(WARNING) << "[HUAWEI_ASCEND_NPU] ge::Tensor output tensor "
-                      "SetTensorDesc failed!";
-    } else {
-      VLOG(3) << "[HUAWEI_ASCEND_NPU] ge::Tensor output tensor SetTensorDesc "
-                 "success.";
-    }
-    VLOG(3)
-        << "[HUAWEI_ASCEND_NPU] Init the output tensors for the device program "
-           "and share their buffers with the origin output tensors";
+    ATC_CALL((*device_otensors)[i]->SetTensorDesc(
+        device_odims_[i].GetGeTensorDesc()));
   }
   return true;
 }
@@ -321,8 +294,7 @@ bool DeviceProgram::SharedBufferWithOutputTensors(
 
   for (size_t i = 0; i < output_names.size(); i++) {
     CHECK_EQ((*origin_otensors)[i]->dims().production(),
-             device_odims_[i].GetNumber() * device_odims_[i].GetChannel() *
-                 device_odims_[i].GetHeight() * device_odims_[i].GetWidth());
+             device_odims_[i].production());
 
     // Share data buf between device_itensor and origin_itensor
     std::shared_ptr<Buffer> buffer = std::make_shared<Buffer>(
diff --git a/lite/tests/kernels/concat_compute_test.cc b/lite/tests/kernels/concat_compute_test.cc
index 18e4701bdf3e99fbb6f76ed9ac78bbbbfda60a1c..7c1d5c0e87eae2dde0110ba2e992be2ed373387f 100644
--- a/lite/tests/kernels/concat_compute_test.cc
+++ b/lite/tests/kernels/concat_compute_test.cc
@@ -147,6 +147,8 @@ TEST(Concat, precision) {
 #if defined(LITE_WITH_NPU)
   place = TARGET(kNPU);
   abs_error = 1e-2;  // use fp16 in npu
+#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
+  place = TARGET(kHuaweiAscendNPU);
 #elif defined(LITE_WITH_ARM)
   place = TARGET(kARM);
 #elif defined(LITE_WITH_X86)
@@ -157,6 +159,10 @@ TEST(Concat, precision) {
 
   for (int axis : {1, 2}) {
     for (bool is_use_axis_tensor : {false, true}) {
+      // is_use_axis_tensor = true has bugs in Huawei Ascend NPU DDK
+      if (place == TARGET(kHuaweiAscendNPU) && is_use_axis_tensor) {
+        continue;
+      }
       LOG(INFO) << "axis:" << axis
                 << ", is_use_axis_tensor:" << is_use_axis_tensor;
       std::unique_ptr<arena::TestCase> tester(
diff --git a/lite/tests/kernels/conv_compute_test.cc b/lite/tests/kernels/conv_compute_test.cc
index a4bcf6ea70e3fe719793aa4ebd8fb8cd09e35905..3606853f6ca83322d240ae3cf13590795b369e8d 100644
--- a/lite/tests/kernels/conv_compute_test.cc
+++ b/lite/tests/kernels/conv_compute_test.cc
@@ -296,6 +296,11 @@ void TestConvStrides(Place place, float abs_error = 2e-5) {
     for (auto out_channels : {1, 3}) {
       for (auto strides :
            std::vector<std::vector<int>>{{2, 2}, {3, 3}, {1, 2}, {3, 1}}) {
+        // Check Huawei Ascend NPU restriction if output HxW = 1x1
+        // input_w after padding = 4 should equal to fitler_w after dilation = 3
+        if (place == TARGET(kHuaweiAscendNPU) && dims[3] == 4) {
+          continue;
+        }
         std::unique_ptr<arena::TestCase> tester(new ConvComputeTester(
             place, "def", DDim(dims), out_channels, 3, strides));
         arena::Arena arena(std::move(tester), place, abs_error);
@@ -415,13 +420,16 @@ TEST(Conv2d, precision) {
   abs_error = 5e-2;  // Using fp16 in NPU
 #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
   place = TARGET(kHuaweiAscendNPU);
-  abs_error = 5e-2;  // Using fp16 in NPU
+  abs_error = 1e-2;  // Using fp16 in NPU
 #else
   return;
 #endif
 
   TestConvKsize(place, abs_error);
+// Huawei Ascend NPU DDK not support groups > 1
+#if !defined(LITE_WITH_HUAWEI_ASCEND_NPU)
   TestConvGroups(place, abs_error);
+#endif
   TestConvDilations(place, abs_error);
   TestConvStrides(place, abs_error);
   TestConvPaddings(place, abs_error);