!3807 serving support acl dvpp process

Merge pull request !3807 from 徐永飞/master

!3807 serving support acl dvpp process
Merge pull request !3807 from 徐永飞/master
98c415a1 · mindspore-ci-bot · Gitee · e0d14446 · c0b8df4f · 98c415a1
39 changed file
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)

-if (ENABLE_DEBUGGER OR ENABLE_SERVING)
+if (ENABLE_DEBUGGER OR ENABLE_SERVING OR ENABLE_TESTCASES)
    # build dependencies of gRPC
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
@@ -69,7 +69,6 @@ endif()

 if (ENABLE_MINDDATA)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/icu4c.cmake)
-    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/jpeg_turbo.cmake)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/libtiff.cmake)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/opencv.cmake)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sqlite.cmake)
@@ -78,6 +77,10 @@ if (ENABLE_MINDDATA)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sentencepiece.cmake)
 endif()

+if (ENABLE_MINDDATA OR ENABLE_SERVING)
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/jpeg_turbo.cmake)
+endif()
+
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/gtest.cmake)
 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/onnx.cmake)
 set(CMAKE_CXX_FLAGS ${_ms_tmp_CMAKE_CXX_FLAGS_F})
--- a/include/infer_log.h
+++ b/include/infer_log.h
@@ -26,19 +26,10 @@

 #ifndef ENABLE_ACL
 #include "mindspore/core/utils/log_adapter.h"
-namespace mindspore::inference {
-#define MSI_LOG(level) MS_LOG(level)
-
-#define MSI_LOG_DEBUG MSI_LOG(DEBUG)
-#define MSI_LOG_INFO MSI_LOG(INFO)
-#define MSI_LOG_WARNING MSI_LOG(WARNING)
-#define MSI_LOG_ERROR MSI_LOG(ERROR)
-
-#define MSI_ASSERT(item) MS_ASSERT(item)
-}  // namespace mindspore::inference
-
 #else  // ENABLE_ACL
 #include "acl/acl.h"
+#endif
+
 namespace mindspore::inference {

 class LogStream {
@@ -58,15 +49,23 @@ class LogStream {
  }

  friend class LogWriter;
+  friend class Status;

 private:
  std::shared_ptr<std::stringstream> sstream_;
 };

-template <class T, typename std::enable_if<std::is_enum<T>::value, int>::type = 0>
-constexpr std::ostream &operator<<(std::ostream &stream, const T &value) {
-  return stream << static_cast<typename std::underlying_type<T>::type>(value);
-}
+#ifndef ENABLE_ACL
+#define MSI_LOG(level) MS_LOG(level)
+
+#define MSI_LOG_DEBUG MSI_LOG(DEBUG)
+#define MSI_LOG_INFO MSI_LOG(INFO)
+#define MSI_LOG_WARNING MSI_LOG(WARNING)
+#define MSI_LOG_ERROR MSI_LOG(ERROR)
+
+#define MSI_ASSERT(item) MS_ASSERT(item)
+
+#else  // ENABLE_ACL

 class LogWriter {
 public:
@@ -100,8 +99,10 @@ class LogWriter {

 #define MSI_ASSERT(item)

-}  // namespace mindspore::inference
-
 #endif  // ENABLE_ACL

+#define INFER_STATUS(code) inference::Status(code) < inference::LogStream()
+
+}  // namespace mindspore::inference
+
 #endif  // MINDSPORE_INFERENCE_LOG_H_
--- a/include/infer_tensor.h
+++ b/include/infer_tensor.h
@@ -129,12 +129,25 @@ class InferTensor : public InferTensorBase {
  void *mutable_data() override { return data_.data(); }
 };

+class InferImagesBase {
+ public:
+  virtual size_t batch_size() const = 0;
+  virtual bool get(size_t index, const void *&pic_buffer, uint32_t &pic_size) const = 0;
+  virtual size_t input_index() const = 0;  // the index of images as input in model
+};
+
 class RequestBase {
 public:
  virtual size_t size() const = 0;
  virtual const InferTensorBase *operator[](size_t index) const = 0;
 };

+class ImagesRequestBase {
+ public:
+  virtual size_t size() const = 0;
+  virtual const InferImagesBase *operator[](size_t index) const = 0;
+};
+
 class ReplyBase {
 public:
  virtual size_t size() const = 0;

--- a/include/inference.h
+++ b/include/inference.h
@@ -21,10 +21,36 @@
 #include <vector>
 #include <string>
 #include "include/infer_tensor.h"
+#include "include/infer_log.h"

 namespace mindspore {
 namespace inference {
-enum Status { SUCCESS = 0, FAILED, INVALID_INPUTS };
+
+enum StatusCode { SUCCESS = 0, FAILED, INVALID_INPUTS };
+
+class Status {
+ public:
+  Status() : status_code_(FAILED) {}
+  Status(enum StatusCode status_code, const std::string &status_msg = "")
+      : status_code_(status_code), status_msg_(status_msg) {}
+  bool IsSuccess() const { return status_code_ == SUCCESS; }
+  enum StatusCode StatusCode() const { return status_code_; }
+  std::string StatusMessage() const { return status_msg_; }
+  bool operator==(const Status &other) const { return status_code_ == other.status_code_; }
+  bool operator==(enum StatusCode other_code) const { return status_code_ == other_code; }
+  bool operator!=(const Status &other) const { return status_code_ != other.status_code_; }
+  bool operator!=(enum StatusCode other_code) const { return status_code_ != other_code; }
+  operator bool() const = delete;
+  Status &operator<(const LogStream &stream) noexcept __attribute__((visibility("default"))) {
+    status_msg_ = stream.sstream_->str();
+    return *this;
+  }
+
+ private:
+  enum StatusCode status_code_;
+  std::string status_msg_;
+};
+
 class MS_API InferSession {
 public:
  InferSession() = default;
@@ -42,7 +68,12 @@ class MS_API InferSession {
    VectorInferTensorWrapReply reply(outputs);
    return ExecuteModel(model_id, request, reply);
  }
-
+  // default not support input data preprocess(decode, resize, crop, crop&paste, etc.)
+  virtual Status ExecuteModel(uint32_t /*model_id*/,
+                              const ImagesRequestBase & /*images_inputs*/,  // images for preprocess
+                              const RequestBase & /*request*/, ReplyBase & /*reply*/) {
+    return FAILED;
+  }
  static std::shared_ptr<InferSession> CreateSession(const std::string &device, uint32_t device_id);
 };


--- a/mindspore/ccsrc/backend/session/ascend_inference_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.cc
@@ -87,7 +87,8 @@ GraphId AscendInferenceSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
  return graph_id;
 }

-bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const {
+bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs,
+                                              std::string *error_msg) const {
  MS_LOG(INFO) << "Start check client inputs, graph id : " << graph_id;
  auto kernel_graph = GetGraph(graph_id);
  MS_EXCEPTION_IF_NULL(kernel_graph);
@@ -113,12 +114,25 @@ bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, const std::vect
      MS_LOG(ERROR) << "Input number is inconsistent. The actual input number [" << inputs.size()
                    << "] but the graph input number is [" << paras.size() << "]";
      MS_LOG(ERROR) << "InputsInfo --" << InputsInfo(paras, inputs);
+      if (error_msg != nullptr) {
+        std::stringstream str_stream;
+        str_stream << "Input number is inconsistent. The given input number [" << inputs.size()
+                   << "] but the graph input number is [" << paras.size() << "]\n";
+        str_stream << "InputsInfo --" << InputsInfo(paras, inputs);
+        *error_msg = str_stream.str();
+      }
      return false;
    }
    auto input = inputs[no_weight_input++];
    if (!CompareInput(input, paras[i])) {
      MS_LOG(ERROR) << "Please check the input information.";
      MS_LOG(ERROR) << "InputsInfo --" << InputsInfo(paras, inputs);
+      if (error_msg != nullptr) {
+        std::stringstream str_stream;
+        str_stream << "Please check the input information.\n";
+        str_stream << "InputsInfo --" << InputsInfo(paras, inputs);
+        *error_msg = str_stream.str();
+      }
      return false;
    }
  }
@@ -165,17 +179,35 @@ std::string AscendInferenceSession::PrintInputShape(std::vector<T> shape) const

 std::string AscendInferenceSession::InputsInfo(const std::vector<ParameterPtr> &paras,
                                               const std::vector<tensor::TensorPtr> &inputs) const {
+  const std::map<TypeId, std::string> dtype_name_map{
+    {TypeId::kNumberTypeBegin, "Unknown"},   {TypeId::kNumberTypeBool, "Bool"},
+    {TypeId::kNumberTypeFloat64, "Float64"}, {TypeId::kNumberTypeInt8, "Int8"},
+    {TypeId::kNumberTypeUInt8, "Uint8"},     {TypeId::kNumberTypeInt16, "Int16"},
+    {TypeId::kNumberTypeUInt16, "Uint16"},   {TypeId::kNumberTypeInt32, "Int32"},
+    {TypeId::kNumberTypeUInt32, "Uint32"},   {TypeId::kNumberTypeInt64, "Int64"},
+    {TypeId::kNumberTypeUInt64, "Uint64"},   {TypeId::kNumberTypeFloat16, "Float16"},
+    {TypeId::kNumberTypeFloat32, "Float32"},
+  };
+  auto data_type_to_string = [&dtype_name_map](TypeId type_id) {
+    auto it = dtype_name_map.find(type_id);
+    if (it == dtype_name_map.end()) {
+      return std::string("Unknown");
+    }
+    return it->second;
+  };
+
  std::string graph = "graph inputs:{ ";
  for (size_t i = 0; i < paras.size(); ++i) {
-    graph += std::to_string(i) + ": dims " + std::to_string(AnfAlgo::GetOutputDeviceShape(paras[i], 0).size()) +
-             ", shape " + PrintInputShape(AnfAlgo::GetOutputDeviceShape(paras[i], 0)) + ", data type " +
-             std::to_string(AnfAlgo::GetSelectKernelBuildInfo(paras[i])->GetOutputDeviceType(0)) + " }";
+    auto &para = paras[i];
+    graph += std::to_string(i) + ": dims " + std::to_string(AnfAlgo::GetOutputDeviceShape(para, 0).size()) +
+             ", shape " + PrintInputShape(AnfAlgo::GetOutputDeviceShape(para, 0)) + ", data type " +
+             data_type_to_string(AnfAlgo::GetSelectKernelBuildInfo(para)->GetOutputDeviceType(0)) + " }";
  }

-  std::string actual = "actual inputs:{ ";
+  std::string actual = "given inputs:{ ";
  for (size_t i = 0; i < inputs.size(); ++i) {
    actual += std::to_string(i) + ": dims " + std::to_string(inputs[i]->shape().size()) + ", shape " +
-              PrintInputShape(inputs[i]->shape()) + ", data type " + std::to_string(inputs[i]->data_type()) + " }";
+              PrintInputShape(inputs[i]->shape()) + ", data type " + data_type_to_string(inputs[i]->data_type()) + " }";
  }
  return graph + "   " + actual;
 }

--- a/mindspore/ccsrc/backend/session/ascend_inference_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.h
@@ -39,7 +39,8 @@ class AscendInferenceSession : public AscendSession {
  void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                     const std::vector<tensor::TensorPtr> &inputs_const) const;
  GraphId CompileGraph(NotNull<FuncGraphPtr> func_graph) override;
-  bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const override;
+  bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs,
+                        std::string *error_msg) const override;
  bool CompareInput(const tensor::TensorPtr &input, const ParameterPtr &parameter) const;
  template <typename T>
  std::string PrintInputShape(std::vector<T> shape) const;

--- a/mindspore/ccsrc/backend/session/infer_session.cc
+++ b/mindspore/ccsrc/backend/session/infer_session.cc
@@ -116,7 +116,7 @@ Status MSInferSession::LoadModelFromFile(const std::string &file_name, uint32_t

 Status MSInferSession::UnloadModel(uint32_t model_id) { return SUCCESS; }

-tensor::TensorPtr ServingTensor2MSTensor(const InferTensorBase &out_tensor) {
+Status ServingTensor2MSTensor(size_t index, const InferTensorBase &out_tensor, tensor::TensorPtr &ms_tensor) {
  std::vector<int> shape;
  for (auto dim : out_tensor.shape()) {
    shape.push_back(static_cast<int>(dim));
@@ -134,14 +134,22 @@ tensor::TensorPtr ServingTensor2MSTensor(const InferTensorBase &out_tensor) {
  auto it = type2id_map.find(out_tensor.data_type());
  if (it == type2id_map.end()) {
    MSI_LOG_WARNING << "undefined MSI data type " << out_tensor.data_type();
-    return nullptr;
+    return FAILED;
  } else {
    data_type = it->second;
  }

-  auto ms_tensor = std::make_shared<tensor::Tensor>(data_type, shape);
+  ms_tensor = std::make_shared<tensor::Tensor>(data_type, shape);
+  if (ms_tensor->Size() != out_tensor.data_size()) {
+    MSI_LOG_ERROR << "input " << std::to_string(index)
+                  << " data size not match shape and dtype, calculated required size " << ms_tensor->Size()
+                  << ", given " << out_tensor.data_size();
+    return INFER_STATUS(INVALID_INPUTS) << "input " << std::to_string(index)
+                                        << " data size not match shape and dtype, calculated required size "
+                                        << ms_tensor->Size() << ", given " << out_tensor.data_size();
+  }
  memcpy_s(ms_tensor->data_c(), ms_tensor->Size(), out_tensor.data(), out_tensor.data_size());
-  return ms_tensor;
+  return SUCCESS;
 }

 void MSTensor2ServingTensor(tensor::TensorPtr ms_tensor, InferTensorBase &out_tensor) {
@@ -189,16 +197,18 @@ Status MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &reques
      MS_LOG(ERROR) << "Execute Model " << model_id << " Failed， input tensor is null, index " << i;
      return FAILED;
    }
-    auto input = ServingTensor2MSTensor(*request[i]);
-    if (input == nullptr) {
+    tensor::TensorPtr input = nullptr;
+    auto ret = ServingTensor2MSTensor(i, *request[i], input);
+    if (ret != SUCCESS) {
      MS_LOG(ERROR) << "Tensor convert failed";
-      return FAILED;
+      return ret;
    }
    inputs.push_back(input);
  }
-  if (!CheckModelInputs(model_id, inputs)) {
+  auto ret = CheckModelInputs(model_id, inputs);
+  if (ret != SUCCESS) {
    MS_LOG(ERROR) << "Check Model " << model_id << " Inputs Failed";
-    return INVALID_INPUTS;
+    return ret;
  }
  vector<tensor::TensorPtr> outputs = RunGraph(model_id, inputs);
  if (outputs.empty()) {
@@ -354,9 +364,13 @@ Status MSInferSession::InitEnv(const std::string &device, uint32_t device_id) {
  return SUCCESS;
 }

-bool MSInferSession::CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const {
+Status MSInferSession::CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const {
  MS_ASSERT(session_impl_ != nullptr);
-  return session_impl_->CheckModelInputs(graph_id, inputs);
+  std::string error_msg;
+  if (!session_impl_->CheckModelInputs(graph_id, inputs, &error_msg)) {
+    return INFER_STATUS(INVALID_INPUTS) << error_msg;
+  }
+  return SUCCESS;
 }

 }  // namespace mindspore::inference
--- a/mindspore/ccsrc/backend/session/infer_session.h
+++ b/mindspore/ccsrc/backend/session/infer_session.h
@@ -58,7 +58,7 @@ class MSInferSession : public InferSession {
  static void RegAllOp();
  string AjustTargetName(const std::string &device);
  Status CompileGraph(std::shared_ptr<FuncGraph> funcGraphPtr, uint32_t &model_id);
-  bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const;
+  Status CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const;
  std::vector<tensor::TensorPtr> RunGraph(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs);
 };
 }  // namespace inference

--- a/mindspore/ccsrc/backend/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@@ -97,7 +97,10 @@ class SessionBasic {
  virtual GraphId GetFinalRunGraph() const { return kInvalidGraphId; }
  void AssignParamKey(const KernelGraphPtr &kernel_graph);
  void InitPSParamAndOptim(const KernelGraphPtr &kernel_graph, const std::vector<tensor::TensorPtr> &inputs_const);
-  virtual bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const { return true; }
+  virtual bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs,
+                                std::string *error_msg) const {
+    return true;
+  }

 #ifdef ENABLE_DEBUGGER
  // set debugger

--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@@ -103,9 +103,12 @@ endif ()

 if (ENABLE_ACL)
    add_compile_definitions(ENABLE_ACL)
+    add_compile_definitions(ENABLE_DVPP_INTERFACE)
    set(ALC_LIB_SO ${ACL_LIB_DIR}/lib64/libruntime.so ${ACL_LIB_DIR}/lib64/libascendcl.so
-            ${ACL_LIB_DIR}/lib64/libacl_retr.so ${ACL_LIB_DIR}/lib64/libacl_cblas.so)
+            ${ACL_LIB_DIR}/lib64/libacl_retr.so ${ACL_LIB_DIR}/lib64/libacl_cblas.so
+            ${ACL_LIB_DIR}/lib64/libacl_dvpp.so)
    target_link_libraries(ms_serving ${ALC_LIB_SO})
+    target_link_libraries(ms_serving jpeg_turbo::jpeg)
 else ()
    target_link_libraries(ms_serving inference mindspore_gvar)
 endif ()
--- a/serving/acl/acl_session.cc
+++ b/serving/acl/acl_session.cc
@@ -16,6 +16,7 @@

 #include <memory>
 #include <algorithm>
+#include <fstream>
 #include "serving/acl/acl_session.h"
 #include "include/infer_log.h"

@@ -25,7 +26,7 @@ std::shared_ptr<InferSession> InferSession::CreateSession(const std::string &dev
  try {
    auto session = std::make_shared<AclSession>();
    auto ret = session->InitEnv(device, device_id);
-    if (!ret) {
+    if (ret != SUCCESS) {
      return nullptr;
    }
    return session;
@@ -36,22 +37,123 @@ std::shared_ptr<InferSession> InferSession::CreateSession(const std::string &dev
 }

 Status AclSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) {
-  return model_process_.LoadModelFromFile(file_name, model_id) ? SUCCESS : FAILED;
+  Status ret = model_process_.LoadModelFromFile(file_name, model_id);
+  if (ret != SUCCESS) {
+    MSI_LOG_ERROR << "Load model from file failed, model file " << file_name;
+    return FAILED;
+  }
+  std::string dvpp_config_file;
+  auto index = file_name.rfind(".");
+  if (index == std::string::npos) {
+    dvpp_config_file = file_name;
+  } else {
+    dvpp_config_file = file_name.substr(0, index);
+  }
+  dvpp_config_file += "_dvpp_config.json";
+  std::ifstream fp(dvpp_config_file);
+  if (!fp.is_open()) {
+    MSI_LOG_INFO << "Dvpp config file not exist, model will execute with tensors as inputs, dvpp config file "
+                 << dvpp_config_file;
+    return SUCCESS;
+  }
+  fp.close();
+  if (dvpp_process_.InitWithJsonConfig(dvpp_config_file) != SUCCESS) {
+    MSI_LOG_ERROR << "Dvpp config file parse error, dvpp config file " << dvpp_config_file;
+    return FAILED;
+  }
+  execute_with_dvpp_ = true;
+  MSI_LOG_INFO << "Dvpp config success";
+  return SUCCESS;
 }

-Status AclSession::UnloadModel(uint32_t model_id) {
+Status AclSession::UnloadModel(uint32_t /*model_id*/) {
  model_process_.UnLoad();
  return SUCCESS;
 }

-Status AclSession::ExecuteModel(uint32_t model_id, const RequestBase &request,
+Status AclSession::ExecuteModel(uint32_t /*model_id*/, const RequestBase &request,
                                ReplyBase &reply) {  // set d context
  aclError rt_ret = aclrtSetCurrentContext(context_);
  if (rt_ret != ACL_ERROR_NONE) {
    MSI_LOG_ERROR << "set the ascend device context failed";
    return FAILED;
  }
-  return model_process_.Execute(request, reply) ? SUCCESS : FAILED;
+  return model_process_.Execute(request, reply);
+}
+
+Status AclSession::PreProcess(uint32_t /*model_id*/, const InferImagesBase *images_input,
+                              ImagesDvppOutput &dvpp_output) {
+  if (images_input == nullptr) {
+    MSI_LOG_ERROR << "images input is nullptr";
+    return FAILED;
+  }
+  auto batch_size = images_input->batch_size();
+  if (batch_size <= 0) {
+    MSI_LOG_ERROR << "invalid batch size " << images_input->batch_size();
+    return FAILED;
+  }
+  std::vector<const void *> pic_buffer_list;
+  std::vector<size_t> pic_size_list;
+  for (size_t i = 0; i < batch_size; i++) {
+    const void *pic_buffer = nullptr;
+    uint32_t pic_size = 0;
+    if (!images_input->get(i, pic_buffer, pic_size) || pic_buffer == nullptr || pic_size == 0) {
+      MSI_LOG_ERROR << "Get request " << 0 << "th buffer failed";
+      return FAILED;
+    }
+    pic_buffer_list.push_back(pic_buffer);
+    pic_size_list.push_back(pic_size);
+  }
+  auto ret = dvpp_process_.Process(pic_buffer_list, pic_size_list, dvpp_output.buffer_device, dvpp_output.buffer_size);
+  if (ret != SUCCESS) {
+    MSI_LOG_ERROR << "dvpp process failed";
+    return ret;
+  }
+  return SUCCESS;
+}
+
+Status AclSession::ExecuteModel(uint32_t model_id, const ImagesRequestBase &images_inputs,  // images for preprocess
+                                const RequestBase &request, ReplyBase &reply) {
+  if (!execute_with_dvpp_) {
+    MSI_LOG_ERROR << "Unexpected images as inputs, DVPP not config";
+    return INFER_STATUS(INVALID_INPUTS) << "Unexpected images as inputs, DVPP not config";
+  }
+  aclError rt_ret = aclrtSetCurrentContext(context_);
+  if (rt_ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "set the ascend device context failed";
+    return FAILED;
+  }
+  if (images_inputs.size() != 1) {
+    MSI_LOG_ERROR << "Only support one input to do DVPP preprocess";
+    return INFER_STATUS(INVALID_INPUTS) << "Only support one input to do DVPP preprocess";
+  }
+  if (images_inputs[0] == nullptr) {
+    MSI_LOG_ERROR << "Get first images input failed";
+    return FAILED;
+  }
+  if (images_inputs[0]->batch_size() != model_process_.GetBatchSize()) {
+    MSI_LOG_ERROR << "Input batch size " << images_inputs[0]->batch_size() << " not match Model batch size "
+                  << model_process_.GetBatchSize();
+    return INFER_STATUS(INVALID_INPUTS) << "Input batch size " << images_inputs[0]->batch_size()
+                                        << " not match Model batch size " << model_process_.GetBatchSize();
+  }
+  if (request.size() != 0) {
+    MSI_LOG_ERROR << "only support one input, images input size is 1, tensor inputs is not 0 " << request.size();
+    return INFER_STATUS(INVALID_INPUTS) << "only support one input, images input size is 1, tensor inputs is not 0 "
+                                        << request.size();
+  }
+  ImagesDvppOutput dvpp_output;
+  Status ret = PreProcess(model_id, images_inputs[0], dvpp_output);
+  if (ret != SUCCESS) {
+    MSI_LOG_ERROR << "DVPP preprocess failed";
+    return ret;
+  }
+  ret = model_process_.Execute(dvpp_output.buffer_device, dvpp_output.buffer_size, reply);
+  if (ret != SUCCESS) {
+    MSI_LOG_ERROR << "Execute model failed";
+    return ret;
+  }
+  return SUCCESS;
 }

 Status AclSession::InitEnv(const std::string &device_type, uint32_t device_id) {
@@ -95,11 +197,16 @@ Status AclSession::InitEnv(const std::string &device_type, uint32_t device_id) {
  model_process_.SetIsDevice(is_device);
  MSI_LOG_INFO << "get run mode success is device input/output " << is_device;

+  if (dvpp_process_.InitResource(stream_) != SUCCESS) {
+    MSI_LOG_ERROR << "dvpp init resource failed";
+    return FAILED;
+  }
  MSI_LOG_INFO << "Init acl success, device id " << device_id_;
  return SUCCESS;
 }

 Status AclSession::FinalizeEnv() {
+  dvpp_process_.Finalize();
  aclError ret;
  if (stream_ != nullptr) {
    ret = aclrtDestroyStream(stream_);

--- a/serving/acl/acl_session.h
+++ b/serving/acl/acl_session.h
@@ -25,9 +25,11 @@

 #include "include/inference.h"
 #include "serving/acl/model_process.h"
+#include "serving/acl/dvpp_process.h"

 namespace mindspore {
 namespace inference {
+
 class AclSession : public InferSession {
 public:
  AclSession();
@@ -37,6 +39,8 @@ class AclSession : public InferSession {
  Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override;
  Status UnloadModel(uint32_t model_id) override;
  Status ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) override;
+  Status ExecuteModel(uint32_t model_id, const ImagesRequestBase &images_inputs,  // images for preprocess
+                      const RequestBase &request, ReplyBase &reply) override;

 private:
  std::string device_type_;
@@ -44,6 +48,10 @@ class AclSession : public InferSession {
  aclrtStream stream_ = nullptr;
  aclrtContext context_ = nullptr;
  ModelProcess model_process_;
+  bool execute_with_dvpp_ = false;
+  DvppProcess dvpp_process_;
+
+  Status PreProcess(uint32_t model_id, const InferImagesBase *images_input, ImagesDvppOutput &dvpp_output);
 };
 }  // namespace inference
 }  // namespace mindspore

--- a/serving/acl/dvpp_process.cc
+++ b/serving/acl/dvpp_process.cc
--- a/serving/acl/dvpp_process.h
+++ b/serving/acl/dvpp_process.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_DVPP_PROCESS_ACL
+#define INC_DVPP_PROCESS_ACL
+#include <vector>
+#include <string>
+#include "acl/acl.h"
+#include "acl/acl_mdl.h"
+#include "acl/acl_rt.h"
+#include "acl/ops/acl_dvpp.h"
+#include "include/inference.h"
+
+namespace mindspore::inference {
+
+struct DvppDecodePara {
+  acldvppPixelFormat pixel_format = PIXEL_FORMAT_YUV_SEMIPLANAR_420;
+};
+
+struct DvppResizePara {
+  uint32_t output_width = 0;
+  uint32_t output_height = 0;
+};
+
+enum DvppCropType {
+  // crop left,top,right,bottom is given in config
+  kDvppCropTypeOffset = 0,
+  // crop left,top,right,bottom is calculated by image width/height and output crop width/height
+  kDvppCropTypeCentre = 1,
+};
+
+struct DvppRoiArea {
+  uint32_t left = 0;
+  uint32_t top = 0;
+  uint32_t right = 0;
+  uint32_t bottom = 0;
+};
+
+struct DvppCropInfo {
+  DvppCropType crop_type = kDvppCropTypeOffset;
+  DvppRoiArea crop_area;     // when kDvppCropTypeOffset
+  uint32_t crop_width = 0;   // when kDvppCropTypeCentre
+  uint32_t crop_height = 0;  // when kDvppCropTypeCentre
+};
+
+struct DvppCropPara {
+  DvppCropInfo crop_info;
+  uint32_t output_width = 0;
+  uint32_t output_height = 0;
+};
+
+struct DvppCropAndPastePara {
+  DvppCropInfo crop_info;
+  DvppRoiArea paste_area;
+  uint32_t output_width = 0;
+  uint32_t output_height = 0;
+};
+
+class DvppProcess {
+ public:
+  DvppProcess();
+  ~DvppProcess();
+
+  Status InitResource(aclrtStream stream);
+  void Finalize();
+  Status InitJpegDecodePara(const DvppDecodePara &decode_para);                  // jpeg decode + (resize | crop)
+  Status InitResizePara(const DvppResizePara &resize_para);                      // jpeg decode + resize
+  Status InitCropPara(const DvppCropPara &crop_para);                            // jpeg decode + crop
+  Status InitCropAndPastePara(const DvppCropAndPastePara &crop_and_paste_para);  // jpeg decode + crop&paste
+
+  Status InitWithJsonConfig(const std::string &json_config);
+
+  // output device buffer will be destroy by DvppProcess itself.
+  Status Process(const void *pic_buffer, size_t pic_buffer_size, void *&output_device_buffer, size_t &output_size);
+  Status Process(const std::vector<const void *> &pic_buffer_list, const std::vector<size_t> &pic_buffer_size_list,
+                 void *&output_device_buffer, size_t &output_size);
+
+ private:
+  uint32_t pic_width_ = 0;
+  uint32_t pic_height_ = 0;
+
+  DvppDecodePara decode_para_;
+  DvppResizePara resize_para_;
+  DvppCropPara crop_para_;
+  DvppCropAndPastePara crop_and_paste_para_;
+  // only one of the resize or crop flag can be true
+  bool to_resize_flag_ = false;
+  bool to_crop_flag_ = false;
+  bool to_crop_and_paste_flag_ = false;
+
+  void *input_pic_dev_buffer_ = nullptr;
+  uint32_t input_pic_buffer_size_ = 0;
+
+  uint32_t decode_output_buffer_size_ = 0;
+  void *decode_output_buffer_dev_ = nullptr;
+  acldvppPicDesc *decode_output_desc_ = nullptr;
+
+  acldvppResizeConfig *resize_config_ = nullptr;
+  acldvppRoiConfig *crop_area_ = nullptr;
+  acldvppRoiConfig *paste_area_ = nullptr;
+
+  acldvppPicDesc *vpc_output_desc_ = nullptr;
+  void *vpc_output_buffer_dev_ = nullptr;  // vpc_output_buffer_size_ length
+  uint32_t vpc_output_buffer_size_ = 0;
+
+  void *batch_vpc_output_buffer_dev_ = nullptr;  // batch_size_ * vpc_output_buffer_size_ length
+  uint32_t batch_size_ = 0;
+
+  aclrtStream stream_ = nullptr;
+  acldvppChannelDesc *dvpp_channel_desc_ = nullptr;
+
+  uint32_t AlignmentHelper(uint32_t org_size, uint32_t alignment) const;
+  uint32_t GetImageBufferSize(uint32_t stride_width, uint32_t stride_height, acldvppPixelFormat pixel_format) const;
+  Status GetPicDescStride(uint32_t width, uint32_t height, uint32_t &stride_width, uint32_t &stride_height);
+  Status GetPicDescStrideDecode(uint32_t width, uint32_t height, uint32_t &stride_width, uint32_t &stride_height);
+  Status InputInputBuffer(const void *pic_buffer, size_t pic_buffer_size);
+  Status InitDecodeOutputDesc(uint32_t image_width,
+                              uint32_t image_height);  // decode_output_desc_, decode_output_buffer_dev_
+  Status CheckRoiAreaWidthHeight(uint32_t width, uint32_t height);
+  Status CheckAndAdjustRoiArea(DvppRoiArea &area);
+  Status UpdateCropArea(uint32_t image_width, uint32_t image_height);
+  Status CheckResizeImageInfo(uint32_t image_width, uint32_t image_height) const;
+  void DestroyDecodeDesc();
+
+  Status InitVpcOutputDesc(uint32_t output_width, uint32_t output_height,
+                           acldvppPixelFormat pixel_format);  // vpc_output_desc_, vpc_output_buffer_dev_batch_
+  Status InitRoiAreaConfig(acldvppRoiConfig *&roi_area, const DvppRoiArea &init_para);
+  Status InitCommonCropPara(DvppCropInfo &crop_info, uint32_t out_width, uint32_t out_height);
+  Status InitResizeOutputDesc();        // vpc_output_desc_, vpc_output_buffer_dev_, resize_config
+  Status InitCropOutputDesc();          // vpc_output_desc_, vpc_output_buffer_dev_, crop_area_
+  Status InitCropAndPasteOutputDesc();  // vpc_output_desc_, vpc_output_buffer_dev_, crop_area_, paste_area_
+  void DestroyVpcOutputDesc();
+
+  Status ProcessDecode();
+  Status ProcessResize();
+  Status ProcessCrop();
+  Status ProcessCropAndPaste();
+  void DestroyResource();
+
+  Status GetJpegWidthHeight(const void *pic_buffer, size_t pic_buffer_size, uint32_t &image_width,
+                            uint32_t &image_height);
+};
+
+}  // namespace mindspore::inference
+
+#endif  // INC_DVPP_PROCESS_ACL
--- a/serving/acl/model_info_example.json
+++ b/serving/acl/model_info_example.json
+{
+  "preprocess": [
+    {
+      "input": {
+        "index": 0
+      },
+      "decode_para": {
+        "out_pixel_format": "YUV420SP"
+      },
+      "dvpp_process": {
+        "op_name": "resize",
+        "out_width": 224,
+        "out_height": 224
+      },
+      "sample of dvpp_process content": [
+        {
+          "op_name": "resize",
+          "out_width": 224,
+          "out_height": 224
+        },
+        {
+          "op_name": "crop",
+          "crop_type": "offset",
+          "crop_left": 10,
+          "crop_top": 10,
+          "crop_right": 100,
+          "crop_bottom": 200,
+          "out_width": 224,
+          "out_height": 224
+        },
+        {
+          "op_name": "crop",
+          "crop_type": "centre",
+          "crop_width": 100,
+          "crop_height": 100,
+          "out_width": 224,
+          "out_height": 224
+        },
+        {
+          "op_name": "crop_and_paste",
+          "crop_type": "offset",
+          "crop_left": 10,
+          "crop_top": 10,
+          "crop_right": 100,
+          "crop_bottom": 200,
+          "paste_left": 10,
+          "paste_top": 10,
+          "paste_right": 100,
+          "paste_bottom": 200,
+          "out_width": 224,
+          "out_height": 224
+        },
+        {
+          "op_name": "crop_and_paste",
+          "crop_type": "centre",
+          "crop_width": 100,
+          "crop_height": 100,
+          "paste_left": 10,
+          "paste_top": 10,
+          "paste_right": 100,
+          "paste_bottom": 200,
+          "out_width": 224,
+          "out_height": 224
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
--- a/serving/acl/model_process.cc
+++ b/serving/acl/model_process.cc
@@ -23,41 +23,44 @@
 namespace mindspore {
 namespace inference {

-bool ModelProcess::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) {
-  aclError acl_ret = aclmdlLoadFromFile(file_name.c_str(), &model_id);
-  if (acl_ret != ACL_ERROR_NONE) {
-    MSI_LOG_ERROR << "Read model file failed, file name is " << file_name;
-    return false;
-  }
-  MSI_LOG_INFO << "Load model success " << file_name;
-
+Status ModelProcess::PreInitModelResource() {
  model_desc_ = aclmdlCreateDesc();
-  acl_ret = aclmdlGetDesc(model_desc_, model_id);
+  aclError acl_ret = aclmdlGetDesc(model_desc_, model_id_);
  if (acl_ret != ACL_ERROR_NONE) {
    MSI_LOG_ERROR << "Read model desc failed";
-    return false;
+    return FAILED;
  }
-  bool ret = InitInputsBuffer();
-  if (!ret) {
+  Status ret = InitInputsBuffer();
+  if (ret != SUCCESS) {
    MSI_LOG_ERROR << "Create input buffer failed";
-    return false;
+    return FAILED;
  }
  ret = InitOutputsBuffer();
-  if (!ret) {
+  if (ret != SUCCESS) {
    MSI_LOG_ERROR << "Create output buffer failed";
-    return false;
+    return FAILED;
  }
+  return SUCCESS;
+}
+
+Status ModelProcess::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) {
+  aclError acl_ret = aclmdlLoadFromFile(file_name.c_str(), &model_id);
+  if (acl_ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "Read model file failed, file name is " << file_name;
+    return FAILED;
+  }
+  MSI_LOG_INFO << "Load model success " << file_name;
  model_id_ = model_id;
-  return true;
+  if (PreInitModelResource() != SUCCESS) {
+    aclmdlUnload(model_id_);
+    MSI_LOG_ERROR << "Pre init model resource failed, file name is " << file_name;
+    return FAILED;
+  }
+  return SUCCESS;
 }

-bool ModelProcess::InitInputsBuffer() {
+Status ModelProcess::InitInputsBuffer() {
  aclError ret;
-  inputs_ = aclmdlCreateDataset();
-  if (inputs_ == nullptr) {
-    MSI_LOG_ERROR << "Create input dataset failed";
-    return false;
-  }
  size_t input_size = aclmdlGetNumInputs(model_desc_);

  for (size_t i = 0; i < input_size; ++i) {
@@ -67,7 +70,7 @@ bool ModelProcess::InitInputsBuffer() {
      ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
      if (ret != ACL_ERROR_NONE) {
        MSI_LOG_ERROR << "Malloc device input buffer faild , input size " << buffer_size;
-        return false;
+        return FAILED;
      }
    }

@@ -75,17 +78,20 @@ bool ModelProcess::InitInputsBuffer() {
    ret = aclmdlGetInputDims(model_desc_, i, &dims);
    if (ret != ACL_ERROR_NONE) {
      MSI_LOG_ERROR << "Get input shape failed";
-      return false;
+      if (!is_run_on_device_) {
+        aclrtFree(data_mem_buffer);
+      }
+      return FAILED;
    }
-    aclDataType dataType = aclmdlGetInputDataType(model_desc_, i);
+    aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
    std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    input_infos_.emplace_back(AclTensorInfo{data_mem_buffer, buffer_size, dataType, shape});
+    input_infos_.emplace_back(AclTensorInfo{data_mem_buffer, buffer_size, data_type, shape});
  }
  MSI_LOG_INFO << "Create model inputs success";
-  return true;
+  return SUCCESS;
 }

-bool ModelProcess::CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) {
+Status ModelProcess::CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) {
  aclError ret;
  auto free_data_buffer = [this](void *dataMemBuffer) {
    if (!is_run_on_device_) {
@@ -98,13 +104,13 @@ bool ModelProcess::CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size,
    ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
    if (ret != ACL_ERROR_NONE) {
      MSI_LOG_ERROR << "Malloc device buffer faild , buffer size " << buffer_size;
-      return false;
+      return FAILED;
    }
  } else {
    ret = aclrtMallocHost(&data_mem_buffer, buffer_size);
    if (ret != ACL_ERROR_NONE) {
      MSI_LOG_ERROR << "Malloc device buffer faild , buffer size " << buffer_size;
-      return false;
+      return FAILED;
    }
  }

@@ -112,46 +118,51 @@ bool ModelProcess::CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size,
  if (data_buffer == nullptr) {
    MSI_LOG_ERROR << "Create Data Buffer failed";
    free_data_buffer(data_mem_buffer);
-    return false;
+    return FAILED;
  }
  ret = aclmdlAddDatasetBuffer(dataset, data_buffer);
  if (ret != ACL_ERROR_NONE) {
    MSI_LOG_ERROR << "add data buffer failed";
    free_data_buffer(data_mem_buffer);
    aclDestroyDataBuffer(data_buffer);
-    return false;
+    return FAILED;
  }
-  return true;
+  return SUCCESS;
 }

-bool ModelProcess::InitOutputsBuffer() {
+Status ModelProcess::InitOutputsBuffer() {
  aclError ret;
  outputs_ = aclmdlCreateDataset();
  if (outputs_ == nullptr) {
    MSI_LOG_ERROR << "Create input dataset failed";
-    return false;
+    return FAILED;
  }
  size_t output_size = aclmdlGetNumOutputs(model_desc_);
  for (size_t i = 0; i < output_size; ++i) {
    auto buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i);

    void *data_mem_buffer = nullptr;
-    if (CreateDataBuffer(data_mem_buffer, buffer_size, outputs_) != true) {
+    if (CreateDataBuffer(data_mem_buffer, buffer_size, outputs_) != SUCCESS) {
      MSI_LOG_ERROR << "add output data buffer failed, buffer size " << buffer_size;
-      return false;
+      return FAILED;
    }
    aclmdlIODims dims;
    ret = aclmdlGetOutputDims(model_desc_, i, &dims);
    if (ret != ACL_ERROR_NONE) {
      MSI_LOG_ERROR << "Get input shape failed";
-      return false;
+      if (!is_run_on_device_) {
+        aclrtFree(data_mem_buffer);
+      } else {
+        aclrtFreeHost(data_mem_buffer);
+      }
+      return FAILED;
    }
-    aclDataType dataType = aclmdlGetOutputDataType(model_desc_, i);
+    aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
    std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    output_infos_.emplace_back(AclTensorInfo{data_mem_buffer, buffer_size, dataType, shape});
+    output_infos_.emplace_back(AclTensorInfo{data_mem_buffer, buffer_size, data_type, shape});
  }
  MSI_LOG_INFO << "Create model output success";
-  return true;
+  return SUCCESS;
 }

 void ModelProcess::DestroyInputsDataset() {
@@ -176,27 +187,29 @@ void ModelProcess::DestroyInputsDataMem() {
 }

 void ModelProcess::DestroyInputsBuffer() {
-  DestroyInputsDataset();
  DestroyInputsDataMem();
+  DestroyInputsDataset();
 }

 void ModelProcess::DestroyOutputsBuffer() {
+  for (const auto &item : output_infos_) {
+    if (!is_run_on_device_) {
+      aclrtFree(item.device_data);
+    } else {
+      aclrtFreeHost(item.device_data);
+    }
+  }
+  output_infos_.clear();
+
  if (outputs_ == nullptr) {
    return;
  }
  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(outputs_); i++) {
    auto dataBuffer = aclmdlGetDatasetBuffer(outputs_, i);
-    auto data = aclGetDataBufferAddr(dataBuffer);
-    if (!is_run_on_device_) {
-      aclrtFree(data);
-    } else {
-      aclrtFreeHost(data);
-    }
    aclDestroyDataBuffer(dataBuffer);
  }
  aclmdlDestroyDataset(outputs_);
  outputs_ = nullptr;
-  output_infos_.clear();
 }

 void ModelProcess::UnLoad() {
@@ -213,24 +226,26 @@ void ModelProcess::UnLoad() {
  MSI_LOG_INFO << "End unload model " << model_id_;
 }

-bool ModelProcess::CheckAndInitInput(const RequestBase &request) {
+Status ModelProcess::CheckAndInitInput(const RequestBase &request) {
  aclError ret;
  inputs_ = aclmdlCreateDataset();
  // check inputs
  if (request.size() != input_infos_.size()) {
    MSI_LOG_ERROR << "inputs count not match, required count " << input_infos_.size() << ", given count "
                  << request.size();
-    return false;
+    return INFER_STATUS(INVALID_INPUTS) << "inputs count not match, required count " << input_infos_.size()
+                                        << ", given count " << request.size();
  }
  for (size_t i = 0; i < input_infos_.size(); i++) {
    if (request[i] == nullptr) {
      MSI_LOG_ERROR << "input " << i << " cannot be null";
-      return false;
+      return FAILED;
    }
    if (request[i]->data_size() != input_infos_[i].buffer_size) {
      MSI_LOG_ERROR << "input " << i << " data size not match, required size " << input_infos_[i].buffer_size
                    << ", given count " << request[i]->data_size();
-      return false;
+      return INFER_STATUS(INVALID_INPUTS) << "input " << i << " data size not match, required size "
+                                          << input_infos_[i].buffer_size << ", given count " << request[i]->data_size();
    }
  }
  // copy inputs
@@ -242,7 +257,7 @@ bool ModelProcess::CheckAndInitInput(const RequestBase &request) {
      ret = aclrtMemcpy(info.device_data, info.buffer_size, data, request[i]->data_size(), ACL_MEMCPY_HOST_TO_DEVICE);
      if (ret != ACL_ERROR_NONE) {
        MSI_LOG_ERROR << "memcpy input " << i << " data to device failed, buffer size " << request[i]->data_size();
-        return false;
+        return FAILED;
      }
      input_buffer = info.device_data;
    } else {
@@ -251,32 +266,70 @@ bool ModelProcess::CheckAndInitInput(const RequestBase &request) {
    auto data_buffer = aclCreateDataBuffer(input_buffer, info.buffer_size);
    if (data_buffer == nullptr) {
      MSI_LOG_ERROR << "Create Data Buffer failed";
-      return false;
+      return FAILED;
    }
    ret = aclmdlAddDatasetBuffer(inputs_, data_buffer);
    if (ret != ACL_ERROR_NONE) {
      MSI_LOG_ERROR << "add data buffer failed";
      aclDestroyDataBuffer(data_buffer);
-      return false;
+      return FAILED;
    }
  }
-  return true;
+  return SUCCESS;
+}
+
+Status ModelProcess::CheckAndInitDvppInput(const void *dvpp_outputs_buffer_dev, size_t dvpp_outputs_buffer_size,
+                                           size_t input_index) {
+  aclError ret;
+  inputs_ = aclmdlCreateDataset();
+  // check inputs
+  if (input_index >= input_infos_.size()) {
+    MSI_LOG_ERROR << "inputs count not match, required count " << input_infos_.size() << ", given index "
+                  << input_index;
+    return INFER_STATUS(INVALID_INPUTS) << "inputs count not match, required count " << input_infos_.size()
+                                        << ", given index " << input_index;
+  }
+  if (dvpp_outputs_buffer_dev == nullptr) {
+    MSI_LOG_ERROR << "input " << 0 << " cannot be null";
+    return FAILED;
+  }
+  if (dvpp_outputs_buffer_size != input_infos_[input_index].buffer_size) {
+    MSI_LOG_ERROR << "input " << 0 << " data size not match, required size " << input_infos_[input_index].buffer_size
+                  << ", given count " << dvpp_outputs_buffer_size;
+    return INFER_STATUS(INVALID_INPUTS) << "input " << 0 << " data size not match, required size "
+                                        << input_infos_[input_index].buffer_size << ", given count "
+                                        << dvpp_outputs_buffer_size;
+  }
+  // copy inputs
+  auto &info = input_infos_[input_index];
+  auto data_buffer = aclCreateDataBuffer(const_cast<void *>(dvpp_outputs_buffer_dev), info.buffer_size);
+  if (data_buffer == nullptr) {
+    MSI_LOG_ERROR << "Create Data Buffer failed";
+    return FAILED;
+  }
+  ret = aclmdlAddDatasetBuffer(inputs_, data_buffer);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "add data buffer failed";
+    aclDestroyDataBuffer(data_buffer);
+    return FAILED;
+  }
+  return SUCCESS;
 }

-bool ModelProcess::BuildOutputs(ReplyBase &reply) {
+Status ModelProcess::BuildOutputs(ReplyBase &reply) {
  aclError ret;
  // copy outputs
  reply.clear();

-  std::unordered_map<aclDataType, inference::DataType> dataTypeMap = {
+  std::unordered_map<aclDataType, inference::DataType> data_type_map = {
    {ACL_FLOAT16, inference::kMSI_Float16}, {ACL_FLOAT, inference::kMSI_Float32}, {ACL_DOUBLE, inference::kMSI_Float64},
    {ACL_INT8, inference::kMSI_Int8},       {ACL_INT16, inference::kMSI_Int16},   {ACL_INT32, inference::kMSI_Int32},
    {ACL_INT64, inference::kMSI_Int64},     {ACL_UINT8, inference::kMSI_Uint8},   {ACL_UINT16, inference::kMSI_Uint16},
    {ACL_UINT32, inference::kMSI_Uint32},   {ACL_UINT64, inference::kMSI_Uint64}, {ACL_BOOL, inference::kMSI_Bool},
  };
-  auto trans_to_serving_type = [&dataTypeMap](aclDataType data_type) {
-    auto it = dataTypeMap.find(data_type);
-    if (it == dataTypeMap.end()) {
+  auto trans_to_serving_type = [&data_type_map](aclDataType data_type) {
+    auto it = data_type_map.find(data_type);
+    if (it == data_type_map.end()) {
      return inference::kMSI_Unknown;
    } else {
      return it->second;
@@ -287,53 +340,93 @@ bool ModelProcess::BuildOutputs(ReplyBase &reply) {
    auto output = reply.add();
    if (output == nullptr) {
      MSI_LOG_ERROR << "add new output failed";
-      return false;
+      return FAILED;
    }
    output->set_data_type(trans_to_serving_type(info.data_type));
    output->set_shape(info.dims);
    if (!output->resize_data(info.buffer_size)) {
      MSI_LOG_ERROR << "new output data buffer failed, data size " << info.buffer_size;
-      return false;
+      return FAILED;
    }
    if (!is_run_on_device_) {
      ret = aclrtMemcpy(output->mutable_data(), output->data_size(), info.device_data, info.buffer_size,
                        ACL_MEMCPY_DEVICE_TO_HOST);
      if (ret != ACL_ERROR_NONE) {
        MSI_LOG_ERROR << "Memcpy output " << i << " to host failed, memory size " << info.buffer_size;
-        return false;
+        return FAILED;
      }
    } else {
      ret = aclrtMemcpy(output->mutable_data(), output->data_size(), info.device_data, info.buffer_size,
                        ACL_MEMCPY_HOST_TO_HOST);
      if (ret != ACL_ERROR_NONE) {
        MSI_LOG_ERROR << "Memcpy output " << i << " to host failed, memory size " << info.buffer_size;
-        return false;
+        return FAILED;
      }
    }
  }
-  return true;
+  return SUCCESS;
+}
+
+Status ModelProcess::Execute(const RequestBase &request, ReplyBase &reply) {
+  aclError acl_ret;
+  Status ret = CheckAndInitInput(request);
+  if (ret != SUCCESS) {
+    MSI_LOG_ERROR << "check or init input failed";
+    DestroyInputsDataset();
+    return ret;  // forward status error
+  }
+  acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
+  DestroyInputsDataset();
+  if (acl_ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "Execute Model Failed";
+    return FAILED;
+  }
+  ret = BuildOutputs(reply);
+  if (ret != SUCCESS) {
+    MSI_LOG_ERROR << "Build outputs faield";
+    return FAILED;
+  }
+  MSI_LOG_INFO << "excute model success";
+  return SUCCESS;
 }

-bool ModelProcess::Execute(const RequestBase &request, ReplyBase &reply) {
+Status ModelProcess::Execute(const void *dvpp_outputs_buffer_dev, size_t dvpp_outputs_buffer_size, ReplyBase &reply) {
  aclError acl_ret;
-  if (CheckAndInitInput(request) != true) {
+  if (input_infos_.size() != 1) {
+    MSI_LOG_ERROR << "can only support input size 1, now model inputs size is " << input_infos_.size();
+    return INFER_STATUS(INVALID_INPUTS) << "can only support input size 1, now model inputs size is "
+                                        << input_infos_.size();
+  }
+  Status ret = CheckAndInitDvppInput(dvpp_outputs_buffer_dev, dvpp_outputs_buffer_size, 0);
+  if (ret != SUCCESS) {
    MSI_LOG_ERROR << "check or init input failed";
    DestroyInputsDataset();
-    return false;
+    return ret;  // forward status msg
  }
  acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
  DestroyInputsDataset();
  if (acl_ret != ACL_ERROR_NONE) {
    MSI_LOG_ERROR << "Execute Model Failed";
-    return false;
+    return INFER_STATUS(FAILED) << "Execute Model Failed";
  }
-  bool ret = BuildOutputs(reply);
-  if (!ret) {
+  ret = BuildOutputs(reply);
+  if (ret != SUCCESS) {
    MSI_LOG_ERROR << "Build outputs faield";
-    return false;
+    return FAILED;
  }
  MSI_LOG_INFO << "excute model success";
-  return true;
+  return SUCCESS;
+}
+
+size_t ModelProcess::GetBatchSize() const {
+  if (input_infos_.empty()) {
+    MSI_LOG_ERROR << "Model is not loaded";
+    return 0;
+  }
+  if (input_infos_[0].dims.empty()) {
+    return 1;
+  }
+  return static_cast<size_t>(input_infos_[0].dims[0]);
 }

 }  // namespace inference

--- a/serving/acl/model_process.h
+++ b/serving/acl/model_process.h
@@ -21,7 +21,6 @@
 #include "acl/acl.h"
 #include "acl/acl_mdl.h"
 #include "acl/acl_rt.h"
-#include "serving/core/util/status.h"
 #include "include/inference.h"

 namespace mindspore {
@@ -34,21 +33,30 @@ struct AclTensorInfo {
  std::vector<int64_t> dims;
 };

+struct ImagesDvppOutput {
+  void *buffer_device = nullptr;
+  size_t buffer_size = 0;
+  size_t input_index = 0;
+};
+
 class ModelProcess {
 public:
  ModelProcess() {}
  ~ModelProcess() {}

-  bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id);
+  Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id);
  void UnLoad();

  // override this method to avoid request/reply data copy
-  bool Execute(const RequestBase &request, ReplyBase &reply);
-
+  Status Execute(const RequestBase &request, ReplyBase &reply);
+  Status Execute(const void *dvpp_outputs_buffer_dev, size_t dvpp_outputs_buffer_size, ReplyBase &reply);
  void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; }

+  size_t GetBatchSize() const;
+
 private:
  uint32_t model_id_ = 0xffffffff;
+  // if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device
  bool is_run_on_device_ = false;
  aclmdlDesc *model_desc_ = nullptr;
  aclmdlDataset *inputs_ = nullptr;
@@ -56,12 +64,15 @@ class ModelProcess {
  std::vector<AclTensorInfo> input_infos_;
  std::vector<AclTensorInfo> output_infos_;

-  bool CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
-  bool CheckAndInitInput(const RequestBase &request);
-  bool BuildOutputs(ReplyBase &reply);
+  Status PreInitModelResource();
+  Status CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
+  Status CheckAndInitInput(const RequestBase &request);
+  Status CheckAndInitDvppInput(const void *dvpp_outputs_buffer_dev, size_t dvpp_outputs_buffer_size,
+                               size_t input_index);
+  Status BuildOutputs(ReplyBase &reply);

-  bool InitInputsBuffer();
-  bool InitOutputsBuffer();
+  Status InitInputsBuffer();
+  Status InitOutputsBuffer();
  void DestroyInputsDataset();
  void DestroyInputsDataMem();
  void DestroyInputsBuffer();

--- a/serving/core/server.cc
+++ b/serving/core/server.cc
@@ -31,7 +31,6 @@
 #include "core/version_control/version_controller.h"
 #include "core/util/file_system_operation.h"
 #include "core/serving_tensor.h"
-#include "util/status.h"

 using ms_serving::MSService;
 using ms_serving::PredictReply;
@@ -45,7 +44,7 @@ namespace serving {
  {                                                                                                          \
    auto time_end_##name = std::chrono::steady_clock::now();                                                 \
    auto time_cost = std::chrono::duration<double, std::milli>(time_end_##name - time_start_##name).count(); \
-    MSI_LOG_INFO << #name " Time Cost " << time_cost << "ms ---------------------";                          \
+    MSI_LOG_INFO << #name " Time Cost # " << time_cost << " ms ---------------------";                          \
  }

 Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) {
@@ -75,15 +74,26 @@ Status Session::Predict(const PredictRequest &request, PredictReply &reply) {
  std::lock_guard<std::mutex> lock(mutex_);
  MSI_LOG(INFO) << "run Predict";

-  ServingRequest serving_request(request);
-  ServingReply serving_reply(reply);
+  if (request.images_size() > 0) {
+    ServingImagesRequest serving_images(request);
+    ServingRequest serving_request(request);
+    ServingReply serving_reply(reply);
+    Status ret = session_->ExecuteModel(graph_id_, serving_images, serving_request, serving_reply);
+    if (ret != SUCCESS) {
+      MSI_LOG(ERROR) << "execute model with images return failed";
+      return ret;
+    }
+  } else if (request.data_size() > 0) {
+    ServingRequest serving_request(request);
+    ServingReply serving_reply(reply);
+    Status ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply);
+    if (ret != SUCCESS) {
+      MSI_LOG(ERROR) << "execute model with datas return failed";
+      return ret;
+    }
+  }

-  auto ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply);
  MSI_LOG(INFO) << "run Predict finished";
-  if (Status(ret) != SUCCESS) {
-    MSI_LOG(ERROR) << "execute model return failed";
-    return Status(ret);
-  }
  return SUCCESS;
 }

@@ -98,9 +108,9 @@ Status Session::Warmup(const MindSporeModelPtr model) {
  MSI_TIME_STAMP_START(LoadModelFromFile)
  auto ret = session_->LoadModelFromFile(file_name, graph_id_);
  MSI_TIME_STAMP_END(LoadModelFromFile)
-  if (Status(ret) != SUCCESS) {
+  if (ret != SUCCESS) {
    MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str();
-    return Status(ret);
+    return ret;
  }
  model_loaded_ = true;
  MSI_LOG(INFO) << "Session Warmup finished";
@@ -123,14 +133,19 @@ std::promise<void> exit_requested;
 void ClearEnv() { Session::Instance().Clear(); }
 void HandleSignal(int sig) { exit_requested.set_value(); }

-grpc::Status CreatGRPCStatus(Status status) {
-  switch (status) {
+grpc::Status CreatGRPCStatus(const Status &status) {
+  switch (status.StatusCode()) {
    case SUCCESS:
      return grpc::Status::OK;
    case FAILED:
      return grpc::Status::CANCELLED;
-    case INVALID_INPUTS:
-      return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "The Predict Inputs do not match the Model Request!");
+    case INVALID_INPUTS: {
+      auto status_msg = status.StatusMessage();
+      if (status_msg.empty()) {
+        status_msg = "The Predict Inputs do not match the Model Request!";
+      }
+      return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, status_msg);
+    }
    default:
      return grpc::Status::CANCELLED;
  }

--- a/serving/core/server.h
+++ b/serving/core/server.h
@@ -31,6 +31,10 @@ namespace serving {

 using ms_serving::PredictReply;
 using ms_serving::PredictRequest;
+using inference::Status;
+using inference::SUCCESS;
+using inference::FAILED;
+using inference::INVALID_INPUTS;

 class Session {
 public:

--- a/serving/core/serving_tensor.cc
+++ b/serving/core/serving_tensor.cc
@@ -120,7 +120,7 @@ ServingRequest::ServingRequest(const ms_serving::PredictRequest &request) : requ
                 [](const ms_serving::Tensor &item) { return ServingTensor(const_cast<ms_serving::Tensor &>(item)); });
 }

-size_t ServingRequest::size() const { return request_.data_size(); }
+size_t ServingRequest::size() const { return cache_.size(); }

 const InferTensorBase *ServingRequest::operator[](size_t index) const {
  if (index >= cache_.size()) {
@@ -130,6 +130,22 @@ const InferTensorBase *ServingRequest::operator[](size_t index) const {
  return &(cache_[index]);
 }

+ServingImages::ServingImages(const ms_serving::Images &images) : images_(images) {}
+
+size_t ServingImages::batch_size() const { return images_.images_size(); }
+
+bool ServingImages::get(size_t index, const void *&pic_buffer, uint32_t &pic_size) const {
+  if (index >= static_cast<size_t>(images_.images_size())) {
+    MSI_LOG_ERROR << "visit invalid index " << index << " total size " << images_.images_size();
+    return false;
+  }
+  pic_buffer = images_.images(index).data();
+  pic_size = images_.images(index).size();
+  return true;
+}
+
+size_t ServingImages::input_index() const { return static_cast<size_t>(images_.input_index()); }
+
 size_t ServingReply::size() const { return cache_.size(); }

 InferTensorBase *ServingReply::operator[](size_t index) {
@@ -160,5 +176,21 @@ InferTensorBase *ServingReply::add() {

 void ServingReply::clear() { reply_.mutable_result()->Clear(); }

+ServingImagesRequest::ServingImagesRequest(const ms_serving::PredictRequest &request) : request_(request) {
+  auto &images_inputs = request_.images();
+  std::transform(images_inputs.begin(), images_inputs.end(), std::back_inserter(cache_),
+                 [](const ms_serving::Images &item) { return ServingImages(const_cast<ms_serving::Images &>(item)); });
+}
+
+size_t ServingImagesRequest::size() const { return cache_.size(); }
+
+const inference::InferImagesBase *ServingImagesRequest::operator[](size_t index) const {
+  if (index >= cache_.size()) {
+    MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size();
+    return nullptr;
+  }
+  return &(cache_[index]);
+}
+
 }  // namespace serving
 }  // namespace mindspore
--- a/serving/core/serving_tensor.h
+++ b/serving/core/serving_tensor.h
@@ -47,6 +47,18 @@ class MS_API ServingTensor : public inference::InferTensorBase {
  ms_serving::Tensor &tensor_;
 };

+class ServingImages : public inference::InferImagesBase {
+ public:
+  explicit ServingImages(const ms_serving::Images &images);
+
+  size_t batch_size() const override;
+  bool get(size_t index, const void *&pic_buffer, uint32_t &pic_size) const override;
+  size_t input_index() const override;
+
+ private:
+  const ms_serving::Images &images_;
+};
+
 class ServingRequest : public inference::RequestBase {
 public:
  explicit ServingRequest(const ms_serving::PredictRequest &request);
@@ -74,6 +86,18 @@ class ServingReply : public inference::ReplyBase {
  std::vector<ServingTensor> cache_;
 };

+class ServingImagesRequest : public inference::ImagesRequestBase {
+ public:
+  explicit ServingImagesRequest(const ms_serving::PredictRequest &request);
+
+  size_t size() const override;
+  const inference::InferImagesBase *operator[](size_t index) const override;
+
+ private:
+  const ms_serving::PredictRequest &request_;
+  std::vector<ServingImages> cache_;
+};
+
 }  // namespace serving
 }  // namespace mindspore
 #endif  // MINDSPORE_SERVING_TENSOR_H_
--- a/serving/core/util/status.h
+++ b/serving/core/util/status.h
@@ -15,10 +15,14 @@
 */
 #ifndef MINDSPORE_STATUS_H
 #define MINDSPORE_STATUS_H
+#include "include/inference.h"
+
 namespace mindspore {
 namespace serving {
-using Status = uint32_t;
-enum ServingStatus { SUCCESS = 0, FAILED, INVALID_INPUTS };
+using inference::Status;
+using inference::SUCCESS;
+using inference::FAILED;
+using inference::INVALID_INPUTS;
 }  // namespace serving
 }  // namespace mindspore


--- a/serving/ms_service.proto
+++ b/serving/ms_service.proto
@@ -20,17 +20,19 @@ syntax = "proto3";
 package ms_serving;

 service MSService {
-    rpc Predict(PredictRequest) returns (PredictReply) {}
-    rpc Test(PredictRequest) returns (PredictReply) {}
+  rpc Predict(PredictRequest) returns (PredictReply) {}
+  rpc Test(PredictRequest) returns (PredictReply) {}
 }

 message PredictRequest {
-    repeated Tensor data = 1;
+  repeated Tensor data = 1;
+  repeated Images images = 2;
 }

 message PredictReply {
-    repeated Tensor result = 1;
+  repeated Tensor result = 1;
 }
+
 enum DataType {
  MS_UNKNOWN = 0;
  MS_BOOL = 1;
@@ -62,3 +64,7 @@ message Tensor {
  bytes data = 3;
 }

+message Images{
+  repeated bytes images = 1;
+  uint32 input_index = 2;
+}
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -51,6 +51,10 @@ else()
        endif()
    endforeach ()
 endif()
+# removing serving ut
+file(GLOB_RECURSE SERVING_ACL_UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} serving/*.cc)
+list(REMOVE_ITEM UT_SRCS ${SERVING_ACL_UT_SRCS})
+add_subdirectory(serving)

 file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "../../../mindspore/core/base/*.cc"
@@ -163,7 +167,7 @@ file(GLOB_RECURSE UT_SUTB_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "stub/ge/*.cc"
        )

-add_executable(ut_tests ${UT_SRCS} ${MINDSPORE_SRC_LIST} ${UT_SUTB_SRC_LIST})
+add_executable(ut_tests ${UT_SRCS} ${MINDSPORE_SRC_LIST} ${UT_SUTB_SRC_LIST} $<TARGET_OBJECTS:ut_serving_obj>)

 if (ENABLE_GE)
    if(ENABLE_TRAIN)
@@ -188,3 +192,14 @@ if (USE_GLOG)
 endif()

 target_link_libraries(ut_tests PRIVATE securec graph)
+
+# link grpc
+if (EXISTS ${grpc_ROOT}/lib64)
+    set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc")
+else ()
+    set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc")
+endif ()
+find_package(gRPC CONFIG REQUIRED)
+target_link_libraries(ut_tests PRIVATE gRPC::grpc++)
+target_link_libraries(ut_tests PRIVATE gRPC::grpc++_reflection)
+target_link_libraries(ut_tests PRIVATE protobuf::libprotobuf)
\ No newline at end of file
--- a/tests/ut/cpp/serving/CMakeLists.txt
+++ b/tests/ut/cpp/serving/CMakeLists.txt
+find_package(Threads REQUIRED)
+
+# This branch assumes that gRPC and all its dependencies are already installed
+# on this system, so they can be located by find_package().
+
+# Find Protobuf installation
+# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation.
+
+#set(protobuf_MODULE_COMPATIBLE TRUE)
+#find_package(Protobuf CONFIG REQUIRED)
+#message(STATUS "Using protobuf ${protobuf_VERSION}")
+add_library(protobuf::libprotobuf ALIAS protobuf::protobuf)
+add_executable(protobuf::libprotoc ALIAS protobuf::protoc)
+
+set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
+if (CMAKE_CROSSCOMPILING)
+    find_program(_PROTOBUF_PROTOC protoc)
+else ()
+    set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
+endif ()
+
+# Find gRPC installation
+# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
+if (EXISTS ${grpc_ROOT}/lib64)
+    set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc")
+else ()
+    set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc")
+endif ()
+message("serving ut using grpc_DIR : " ${gPRC_DIR})
+
+find_package(gRPC CONFIG REQUIRED)
+message(STATUS "Using gRPC ${gRPC_VERSION}")
+
+set(_GRPC_GRPCPP gRPC::grpc++)
+set(_REFLECTION gRPC::grpc++_reflection)
+
+if (CMAKE_CROSSCOMPILING)
+    find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
+    find_program(_GRPC_PYTHON_PLUGIN_EXECUTABLE grpc_python_plugin)
+else ()
+    set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
+    set(_GRPC_PYTHON_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_python_plugin>)
+endif ()
+
+# Proto file
+get_filename_component(hw_proto "ms_service.proto" ABSOLUTE)
+get_filename_component(hw_proto_path ${hw_proto} PATH)
+# Generated sources
+set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.pb.cc")
+set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.pb.h")
+set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.grpc.pb.cc")
+set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.grpc.pb.h")
+set(hw_py_pb2 "${CMAKE_CURRENT_BINARY_DIR}/ms_service_pb2.py")
+set(hw_py_pb2_grpc "${CMAKE_CURRENT_BINARY_DIR}/ms_service_pb2_grpc.py")
+add_custom_command(
+        OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}" "${hw_py_pb2}" "${hw_py_pb2_grpc}"
+        COMMAND ${_PROTOBUF_PROTOC}
+        ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
+        --cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
+        -I "${hw_proto_path}"
+        --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
+        "${hw_proto}"
+        COMMAND ${_PROTOBUF_PROTOC}
+        ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
+        --python_out "${CMAKE_CURRENT_BINARY_DIR}"
+        -I "${hw_proto_path}"
+        --plugin=protoc-gen-grpc="${_GRPC_PYTHON_PLUGIN_EXECUTABLE}"
+        "${hw_proto}"
+        DEPENDS "${hw_proto}")
+
+list(APPEND SERVING_SRC_TEST ${hw_proto_srcs} ${hw_grpc_srcs})
+
+file(GLOB_RECURSE ACL_SESSION_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+        "../../../../serving/acl/*.cc"
+        "../../../../serving/core/*.cc")
+list(APPEND SERVING_SRC_TEST ${ACL_SESSION_SRC_LIST})
+
+# utest files
+file(GLOB_RECURSE ACL_UTEST_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+list(APPEND SERVING_SRC_TEST ${ACL_UTEST_SRC_LIST})
+
+include_directories(${CMAKE_SOURCE_DIR}/serving/core)
+include_directories(${CMAKE_SOURCE_DIR}/serving/acl)
+include_directories(${CMAKE_SOURCE_DIR}/serving)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/../)
+add_library(ut_serving_obj OBJECT ${SERVING_SRC_TEST})
+
+
--- a/tests/ut/cpp/serving/acl/acl.h
+++ b/tests/ut/cpp/serving/acl/acl.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ACL_STUB_INC_ACL_H
+#define ACL_STUB_INC_ACL_H
+#include "acl_base.h"
+#include "acl_mdl.h"
+#include "acl_rt.h"
+
+aclError aclInit(const char *configPath);
+aclError aclFinalize();
+
+#endif // ACL_STUB_INC_ACL_H
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl/acl_base.h
+++ b/tests/ut/cpp/serving/acl/acl_base.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ACL_STUB_INC_ACL_BASE
+#define ACL_STUB_INC_ACL_BASE
+#include <stdint.h>
+#include <stddef.h>
+
+typedef void *aclrtStream;
+typedef void *aclrtEvent;
+typedef void *aclrtContext;
+typedef int aclError;
+typedef uint16_t aclFloat16;
+typedef struct aclDataBuffer aclDataBuffer;
+typedef struct aclTensorDesc aclTensorDesc;
+
+const int ACL_ERROR_NONE = 0;
+
+typedef enum {
+  ACL_DT_UNDEFINED = -1,
+  ACL_FLOAT = 0,
+  ACL_FLOAT16 = 1,
+  ACL_INT8 = 2,
+  ACL_INT32 = 3,
+  ACL_UINT8 = 4,
+  ACL_INT16 = 6,
+  ACL_UINT16 = 7,
+  ACL_UINT32 = 8,
+  ACL_INT64 = 9,
+  ACL_UINT64 = 10,
+  ACL_DOUBLE = 11,
+  ACL_BOOL = 12,
+} aclDataType;
+
+typedef enum {
+  ACL_FORMAT_UNDEFINED = -1,
+  ACL_FORMAT_NCHW = 0,
+  ACL_FORMAT_NHWC = 1,
+  ACL_FORMAT_ND = 2,
+  ACL_FORMAT_NC1HWC0 = 3,
+  ACL_FORMAT_FRACTAL_Z = 4,
+  ACL_FORMAT_FRACTAL_NZ = 29,
+
+} aclFormat;
+
+typedef enum {
+  ACL_DEBUG,
+  ACL_INFO,
+  ACL_WARNING,
+  ACL_ERROR,
+} aclLogLevel;
+
+aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);
+aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);
+void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);
+uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);
+size_t aclDataTypeSize(aclDataType dataType);
+
+aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+void aclDestroyTensorDesc(const aclTensorDesc *desc);
+aclDataType aclGetTensorDescType(const aclTensorDesc *desc);
+aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);
+size_t aclGetTensorDescSize(const aclTensorDesc *desc);
+size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);
+size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);
+int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);
+
+void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, const char *fmt, ...);
+
+#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#endif
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl/acl_mdl.h
+++ b/tests/ut/cpp/serving/acl/acl_mdl.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ACL_STUB_INC_ACL_MDL
+#define ACL_STUB_INC_ACL_MDL
+#include "acl_base.h"
+
+#define ACL_MAX_DIM_CNT 128
+#define ACL_MAX_TENSOR_NAME_LEN 128
+#define ACL_MAX_BATCH_NUM 128
+#define ACL_MAX_HW_NUM 128
+#define ACL_MAX_SHAPE_COUNT 128
+
+typedef struct aclmdlDataset aclmdlDataset;
+typedef struct aclmdlDesc aclmdlDesc;
+
+typedef struct aclmdlIODims {
+  char name[ACL_MAX_TENSOR_NAME_LEN];
+  size_t dimCount;
+  int64_t dims[ACL_MAX_DIM_CNT];
+} aclmdlIODims;
+
+aclmdlDesc *aclmdlCreateDesc();
+aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc);
+aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId);
+
+size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc);
+size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc);
+size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+aclmdlDataset *aclmdlCreateDataset();
+aclError aclmdlDestroyDataset(const aclmdlDataset *dataSet);
+aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataSet, aclDataBuffer *dataBuffer);
+size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataSet);
+aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataSet, size_t index);
+
+aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId);
+aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
+aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, size_t workSize,
+                                   void *weightPtr, size_t weightSize);
+aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, void *workPtr,
+                                  size_t workSize, void *weightPtr, size_t weightSize);
+
+aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output);
+aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, aclrtStream stream);
+aclError aclmdlUnload(uint32_t modelId);
+
+aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize);
+aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize, size_t *weightSize);
+
+aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index);
+aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index);
+aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+#endif
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl/acl_rt.h
+++ b/tests/ut/cpp/serving/acl/acl_rt.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ACL_STUB_INC_ACL_RT_H
+#define ACL_STUB_INC_ACL_RT_H
+#include "acl_base.h"
+
+typedef enum aclrtRunMode {
+  ACL_DEVICE,
+  ACL_HOST,
+} aclrtRunMode;
+
+typedef enum aclrtTsId {
+  ACL_TS_ID_AICORE,
+  ACL_TS_ID_AIVECTOR,
+  ACL_TS_ID_RESERVED,
+} aclrtTsId;
+
+typedef enum aclrtEventStatus {
+  ACL_EVENT_STATUS_COMPLETE,
+  ACL_EVENT_STATUS_NOT_READY,
+  ACL_EVENT_STATUS_RESERVED,
+} aclrtEventStatus;
+
+typedef enum aclrtCallbackBlockType {
+  ACL_CALLBACK_NO_BLOCK,
+  ACL_CALLBACK_BLOCK,
+} aclrtCallbackBlockType;
+
+typedef enum aclrtMemcpyKind {
+  ACL_MEMCPY_HOST_TO_HOST,
+  ACL_MEMCPY_HOST_TO_DEVICE,
+  ACL_MEMCPY_DEVICE_TO_HOST,
+  ACL_MEMCPY_DEVICE_TO_DEVICE,
+} aclrtMemcpyKind;
+
+typedef enum aclrtMemMallocPolicy {
+  ACL_MEM_MALLOC_HUGE_FIRST,
+  ACL_MEM_MALLOC_HUGE_ONLY,
+  ACL_MEM_MALLOC_NORMAL_ONLY,
+} aclrtMemMallocPolicy;
+
+typedef struct rtExceptionInfo aclrtExceptionInfo;
+typedef void (*aclrtCallback)(void *userData);
+typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);
+
+aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);
+aclError aclrtDestroyContext(aclrtContext context);
+aclError aclrtSetCurrentContext(aclrtContext context);
+aclError aclrtGetCurrentContext(aclrtContext *context);
+aclError aclrtSetDevice(int32_t deviceId);
+aclError aclrtResetDevice(int32_t deviceId);
+aclError aclrtGetDevice(int32_t *deviceId);
+aclError aclrtGetRunMode(aclrtRunMode *runMode);
+aclError aclrtSynchronizeDevice(void);
+aclError aclrtSetTsDevice(aclrtTsId tsId);
+aclError aclrtGetDeviceCount(uint32_t *count);
+
+aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+aclError aclrtFree(void *devPtr);
+
+aclError aclrtMallocHost(void **hostPtr, size_t size);
+aclError aclrtFreeHost(void *hostPtr);
+
+aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, aclrtMemcpyKind kind);
+aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);
+aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, aclrtMemcpyKind kind,
+                          aclrtStream stream);
+aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, aclrtStream stream);
+
+aclError aclrtCreateStream(aclrtStream *stream);
+aclError aclrtDestroyStream(aclrtStream stream);
+aclError aclrtSynchronizeStream(aclrtStream stream);
+aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);
+
+#endif
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl/ops/acl_dvpp.h
+++ b/tests/ut/cpp/serving/acl/ops/acl_dvpp.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ACL_STUB_INC_ACL_DVPP_H
+#define ACL_STUB_INC_ACL_DVPP_H
+#include <stddef.h>
+#include "acl/acl.h"
+#include "acl/acl_base.h"
+
+typedef struct acldvppPicDesc acldvppPicDesc;
+typedef struct acldvppRoiConfig acldvppRoiConfig;
+typedef struct acldvppResizeConfig acldvppResizeConfig;
+typedef struct acldvppChannelDesc acldvppChannelDesc;
+typedef struct acldvppStreamDesc acldvppStreamDesc;
+typedef struct acldvppBatchPicDesc acldvppBatchPicDesc;
+
+enum acldvppPixelFormat {
+  PIXEL_FORMAT_YUV_400 = 0,
+  PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1,  // YUV
+  PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2,  // YVU
+  PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3,  // YUV
+  PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4,  // YVU
+  PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5,  // YUV
+  PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6,  // YVU
+
+};
+
+enum acldvppStreamFormat {
+  H265_MAIN_LEVEL = 0,
+  H254_BASELINE_LEVEL = 1,
+  H254_MAIN_LEVEL,
+  H254_HIGH_LEVEL,
+};
+
+enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
+
+aclError acldvppMalloc(void **devPtr, size_t size);
+aclError acldvppFree(void *devPtr);
+acldvppChannelDesc *acldvppCreateChannelDesc();
+aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc);
+acldvppPicDesc *acldvppCreatePicDesc();
+aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc);
+aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size);
+aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format);
+aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width);
+aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height);
+aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev);
+aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride);
+aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride);
+aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode);
+
+uint32_t acldvppGetPicDescSize(acldvppPicDesc *picDesc);
+acldvppPixelFormat acldvppGetPicDescFormat(acldvppPicDesc *picDesc);
+uint32_t acldvppGetPicDescWidth(acldvppPicDesc *picDesc);
+uint32_t acldvppGetPicDescHeight(acldvppPicDesc *picDesc);
+void *acldvppGetPicDescData(acldvppPicDesc *picDesc);
+uint32_t acldvppGetPicDescWidthStride(acldvppPicDesc *picDesc);
+uint32_t acldvppGetPicDescHeightStride(acldvppPicDesc *picDesc);
+uint32_t acldvppGetPicDescRetCode(acldvppPicDesc *picDesc);
+
+acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom);
+aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig);
+aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *roiConfig, uint32_t left);
+aclError acldvppSetRoiConfigRight(acldvppRoiConfig *roiConfig, uint32_t right);
+aclError acldvppSetRoiConfigTop(acldvppRoiConfig *roiConfig, uint32_t top);
+aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *roiConfig, uint32_t bottom);
+aclError acldvppSetRoiConfig(acldvppRoiConfig *roiConfig, uint32_t left, uint32_t right, uint32_t top, uint32_t bottom);
+
+acldvppResizeConfig *acldvppCreateResizeConfig();
+aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig);
+
+aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, acldvppPixelFormat ouputPixelFormat,
+                                   uint32_t *decSize);
+
+aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc);
+aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc);
+
+aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
+                               acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
+                             acldvppRoiConfig *cropArea, aclrtStream stream);
+
+aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                     acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                     acldvppRoiConfig *pasteArea, aclrtStream stream);
+
+aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchDesc, uint32_t *roiNums,
+                                  uint32_t size, acldvppBatchPicDesc *dstBatchDesc, acldvppRoiConfig *cropAreas[],
+                                  aclrtStream stream);
+
+aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
+                                acldvppPicDesc *outputDesc, aclrtStream stream);
+
+acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize);
+acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index);
+aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc);
+
+#endif  // ACL_STUB_INC_ACL_DVPP_H
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl_session_test_add.cc
+++ b/tests/ut/cpp/serving/acl_session_test_add.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "acl_session_test_common.h"
+
+using namespace std;
+
+namespace mindspore {
+namespace serving {
+
+class AclSessionAddTest : public AclSessionTest {
+ public:
+  AclSessionAddTest() = default;
+  void SetUp() override {
+    AclSessionTest::SetUp();
+    aclmdlDesc model_desc;
+    model_desc.inputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    model_desc.inputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    model_desc.outputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    mock_model_desc_ = MockModelDesc(model_desc);
+    g_acl_model_desc = &mock_model_desc_;
+    g_acl_model = &add_mock_model_;
+  }
+  void CreateDefaultRequest(PredictRequest &request) {
+    auto input0 = request.add_data();
+    CreateTensor(*input0, {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+    auto input1 = request.add_data();
+    CreateTensor(*input1, {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+
+    auto input0_data = reinterpret_cast<float *>(input0->mutable_data()->data());
+    auto input1_data = reinterpret_cast<float *>(input1->mutable_data()->data());
+    for (int i = 0; i < 2 * 24 * 24 * 3; i++) {
+      input0_data[i] = i % 1024;
+      input1_data[i] = i % 1024 + 1;
+    }
+  }
+
+  void CheckDefaultReply(const PredictReply &reply) {
+    EXPECT_TRUE(reply.result().size() == 1);
+    if (reply.result().size() == 1) {
+      CheckTensorItem(reply.result(0), {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+      auto &output = reply.result(0).data();
+      EXPECT_EQ(output.size(), 2 * 24 * 24 * 3 * sizeof(float));
+      if (output.size() == 2 * 24 * 24 * 3 * sizeof(float)) {
+        auto output_data = reinterpret_cast<const float *>(output.data());
+        for (int i = 0; i < 2 * 24 * 24 * 3; i++) {
+          EXPECT_EQ(output_data[i], (i % 1024) + (i % 1024 + 1));
+          if (output_data[i] != (i % 1024) + (i % 1024 + 1)) {
+            break;
+          }
+        }
+      }
+    }
+  }
+  MockModelDesc mock_model_desc_;
+  AddMockAclModel add_mock_model_;
+};
+
+TEST_F(AclSessionAddTest, TestAclSession_OneTime_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  EXPECT_TRUE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+  // create inputs
+  PredictRequest request;
+  CreateDefaultRequest(request);
+
+  PredictReply reply;
+  ServingRequest serving_request(request);
+  ServingReply serving_reply(reply);
+  EXPECT_TRUE(acl_session.ExecuteModel(model_id, serving_request, serving_reply) == SUCCESS);
+  CheckDefaultReply(reply);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionAddTest, TestAclSession_MutilTimes_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  EXPECT_TRUE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+  for (int i = 0; i < 10; i++) {
+    // create inputs
+    PredictRequest request;
+    CreateDefaultRequest(request);
+
+    PredictReply reply;
+    ServingRequest serving_request(request);
+    ServingReply serving_reply(reply);
+    EXPECT_TRUE(acl_session.ExecuteModel(model_id, serving_request, serving_reply) == SUCCESS);
+    CheckDefaultReply(reply);
+  }
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionAddTest, TestAclSession_DeviceRunMode_OneTime_Success) {
+  SetDeviceRunMode();
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  EXPECT_TRUE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+  // create inputs
+  PredictRequest request;
+  CreateDefaultRequest(request);
+
+  PredictReply reply;
+  ServingRequest serving_request(request);
+  ServingReply serving_reply(reply);
+  EXPECT_TRUE(acl_session.ExecuteModel(model_id, serving_request, serving_reply) == SUCCESS);
+  CheckDefaultReply(reply);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionAddTest, TestAclSession_DeviceRunMode_MutilTimes_Success) {
+  SetDeviceRunMode();
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  EXPECT_TRUE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+  for (int i = 0; i < 10; i++) {
+    // create inputs
+    PredictRequest request;
+    CreateDefaultRequest(request);
+
+    PredictReply reply;
+    ServingRequest serving_request(request);
+    ServingReply serving_reply(reply);
+    EXPECT_TRUE(acl_session.ExecuteModel(model_id, serving_request, serving_reply) == SUCCESS);
+    CheckDefaultReply(reply);
+  }
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+}  // namespace serving
+}  // namespace mindspore
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl_session_test_common.h
+++ b/tests/ut/cpp/serving/acl_session_test_common.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_ACL_SESSION_TEST_COMMON_H
+#define MINDSPORE_ACL_SESSION_TEST_COMMON_H
+
+#include "common/common_test.h"
+#include "serving/core/server.h"
+#include "include/inference.h"
+#include "include/infer_tensor.h"
+#include "serving/core/serving_tensor.h"
+#include "serving/acl/acl_session.h"
+#include "serving/acl/model_process.h"
+#include "serving/acl/dvpp_process.h"
+#include "acl_stub.h"
+
+class MockDeviceRunMode : public AclRunMode {
+ public:
+  aclError aclrtGetRunMode(aclrtRunMode *runMode) override {
+    *runMode = aclrtRunMode::ACL_DEVICE;
+    return ACL_ERROR_NONE;
+  }
+};
+
+class AclSessionTest : public testing::Test {
+ public:
+  AclSessionTest() = default;
+  void SetUp() override {
+    g_acl_data_buffer = &g_acl_data_buffer_default;
+    g_acl_env = &g_acl_env_default;
+    g_acl_dataset = &g_acl_dataset_default;
+    g_acl_model = &g_acl_model_default;
+    g_acl_model_desc = &g_acl_model_desc_default;
+    g_acl_device_context_stream = &g_acl_device_context_stream_default;
+    g_acl_memory = &g_acl_memory_default;
+    g_acl_dvpp_pic_desc = &g_acl_dvpp_pic_desc_default;
+    g_acl_dvpp_roi_config = &g_acl_dvpp_roi_config_default;
+    g_acl_dvpp_resize_config = &g_acl_dvpp_resize_config_default;
+    g_acl_dvpp_channel_desc = &g_acl_dvpp_channel_desc_default;
+    g_acl_dvpp_process = &g_acl_dvpp_process_default;
+    g_acl_run_mode = &acl_run_mode_default;
+    g_acl_jpeg_lib = &acl_jpeg_lib_default;
+  }
+  void TearDown() override {
+    EXPECT_TRUE(g_acl_data_buffer->Check());
+    EXPECT_TRUE(g_acl_env->Check());
+    EXPECT_TRUE(g_acl_dataset->Check());
+    EXPECT_TRUE(g_acl_model->Check());
+    EXPECT_TRUE(g_acl_model_desc->Check());
+    EXPECT_TRUE(g_acl_device_context_stream->Check());
+    EXPECT_TRUE(g_acl_memory->Check());
+    EXPECT_TRUE(g_acl_dvpp_pic_desc->Check());
+    EXPECT_TRUE(g_acl_dvpp_roi_config->Check());
+    EXPECT_TRUE(g_acl_dvpp_resize_config->Check());
+    EXPECT_TRUE(g_acl_dvpp_channel_desc->Check());
+    EXPECT_TRUE(g_acl_dvpp_process->Check());
+    EXPECT_TRUE(g_acl_jpeg_lib->Check());
+  }
+
+  AclDataBuffer g_acl_data_buffer_default;
+  AclEnv g_acl_env_default;
+  AclDataSet g_acl_dataset_default;
+  AclModel g_acl_model_default;
+  AclModelDesc g_acl_model_desc_default;
+  AclDeviceContextStream g_acl_device_context_stream_default;
+  AclMemory g_acl_memory_default;
+  AclDvppPicDesc g_acl_dvpp_pic_desc_default;
+  AclDvppRoiConfig g_acl_dvpp_roi_config_default;
+  AclDvppResizeConfig g_acl_dvpp_resize_config_default;
+  AclDvppChannelDesc g_acl_dvpp_channel_desc_default;
+  AclDvppProcess g_acl_dvpp_process_default;
+  AclRunMode acl_run_mode_default;
+  MockDeviceRunMode acl_device_run_mode;
+  AclJpegLib acl_jpeg_lib_default = AclJpegLib(0, 0);
+
+  void SetDeviceRunMode() { g_acl_run_mode = &acl_device_run_mode; }
+  void CreateTensor(ms_serving::Tensor &tensor, const std::vector<int64_t> &shape, ms_serving::DataType data_type,
+                    std::size_t data_size = INT64_MAX) {
+    if (data_size == INT64_MAX) {
+      data_size = GetDataTypeSize(data_type);
+      for (auto item : shape) {
+        data_size *= item;
+      }
+    }
+    tensor.set_data(std::string(data_size, 0));
+    tensor.set_tensor_type(data_type);
+    auto tensor_shape = tensor.mutable_tensor_shape();
+    for (auto item : shape) {
+      tensor_shape->add_dims(item);
+    }
+  }
+
+  size_t GetDataTypeSize(ms_serving::DataType data_type) {
+    const std::map<ms_serving::DataType, size_t> type_size_map{
+      {ms_serving::DataType::MS_BOOL, sizeof(bool)},       {ms_serving::DataType::MS_INT8, sizeof(int8_t)},
+      {ms_serving::DataType::MS_UINT8, sizeof(uint8_t)},   {ms_serving::DataType::MS_INT16, sizeof(int16_t)},
+      {ms_serving::DataType::MS_UINT16, sizeof(uint16_t)}, {ms_serving::DataType::MS_INT32, sizeof(int32_t)},
+      {ms_serving::DataType::MS_UINT32, sizeof(uint32_t)}, {ms_serving::DataType::MS_INT64, sizeof(int64_t)},
+      {ms_serving::DataType::MS_UINT64, sizeof(uint64_t)}, {ms_serving::DataType::MS_FLOAT16, 2},
+      {ms_serving::DataType::MS_FLOAT32, sizeof(float)},   {ms_serving::DataType::MS_FLOAT64, sizeof(double)},
+    };
+    auto it = type_size_map.find(data_type);
+    if (it == type_size_map.end()) {
+      EXPECT_TRUE(false);
+      return 0;
+    }
+    return it->second;
+  }
+
+  void CheckTensorItem(const ms_serving::Tensor &tensor, const std::vector<int64_t> &expect_shape,
+                       ms_serving::DataType expect_data_type) {
+    std::vector<int64_t> tensor_shape;
+    for (auto item : tensor.tensor_shape().dims()) {
+      tensor_shape.push_back(item);
+    }
+    EXPECT_EQ(expect_shape, tensor_shape);
+    EXPECT_EQ(expect_data_type, tensor.tensor_type());
+    int64_t elem_cnt = 1;
+    for (auto item : expect_shape) {
+      elem_cnt *= item;
+    }
+    auto data_size = GetDataTypeSize(expect_data_type);
+    EXPECT_EQ(data_size * elem_cnt, tensor.data().size());
+  }
+};
+
+class MockModelDesc : public AclModelDesc {
+ public:
+  MockModelDesc() {}
+  MockModelDesc(const aclmdlDesc &mock_model_desc) : mock_model_desc_(mock_model_desc) {}
+  aclmdlDesc *aclmdlCreateDesc() override {
+    aclmdlDesc *model_desc = AclModelDesc::aclmdlCreateDesc();
+    *model_desc = mock_model_desc_;
+    return model_desc;
+  }
+  aclmdlDesc mock_model_desc_;
+};
+
+class AddMockAclModel : public AclModel {
+ public:
+  aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output) override {
+    if (AclModel::aclmdlExecute(modelId, input, output) != ACL_ERROR_NONE) {
+      return 1;
+    }
+    if (input->data_buffers.size() != 2) {
+      return 1;
+    }
+    auto &input0 = input->data_buffers[0];
+    auto &input1 = input->data_buffers[1];
+    std::size_t expect_count = input0->size / sizeof(float);
+    if (input0->size != expect_count * sizeof(float) || input1->size != expect_count * sizeof(float)) {
+      return 1;
+    }
+
+    if (output->data_buffers.size() != 1) {
+      return 1;
+    }
+    auto &output0 = output->data_buffers[0];
+    if (output0->size != expect_count * sizeof(float)) {
+      return 1;
+    }
+
+    auto input0_data = reinterpret_cast<const float *>(input0->data);
+    auto input1_data = reinterpret_cast<const float *>(input1->data);
+    auto output0_data = reinterpret_cast<float *>(output0->data);
+    for (size_t i = 0; i < expect_count; i++) {
+      output0_data[i] = input0_data[i] + input1_data[i];
+    }
+    return ACL_ERROR_NONE;
+  }
+
+  aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
+                              aclrtStream stream) override {
+    return aclmdlExecute(modelId, input, output);
+  }
+};
+
+#endif  // MINDSPORE_ACL_SESSION_TEST_COMMON_H
--- a/tests/ut/cpp/serving/acl_session_test_dvpp.cc
+++ b/tests/ut/cpp/serving/acl_session_test_dvpp.cc
--- a/tests/ut/cpp/serving/acl_session_test_model_load.cc
+++ b/tests/ut/cpp/serving/acl_session_test_model_load.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "acl_session_test_common.h"
+
+using namespace std;
+
+namespace mindspore {
+namespace serving {
+
+class MockFailAclDeviceContextStream : public AclDeviceContextStream {
+ public:
+  aclError aclrtSetDevice(int32_t deviceId) override {
+    if (set_device_fail_list_.empty()) {
+      return AclDeviceContextStream::aclrtSetDevice(deviceId);
+    }
+    auto val = set_device_fail_list_.front();
+    set_device_fail_list_.erase(set_device_fail_list_.begin());
+    if (val) {
+      return AclDeviceContextStream::aclrtSetDevice(deviceId);
+    }
+    return 1;
+  }
+
+  aclError aclrtResetDevice(int32_t deviceId) override {
+    auto ret = AclDeviceContextStream::aclrtResetDevice(deviceId);
+    if (ret != ACL_ERROR_NONE) {
+      return ret;
+    }
+    if (reset_device_fail_list_.empty()) {
+      return ret;
+    }
+    auto val = reset_device_fail_list_.front();
+    reset_device_fail_list_.erase(reset_device_fail_list_.begin());
+    return val ? ACL_ERROR_NONE : 1;
+  }
+
+  aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId) override {
+    if (create_context_fail_list_.empty()) {
+      return AclDeviceContextStream::aclrtCreateContext(context, deviceId);
+    }
+    auto val = create_context_fail_list_.front();
+    create_context_fail_list_.erase(create_context_fail_list_.begin());
+    if (val) {
+      return AclDeviceContextStream::aclrtCreateContext(context, deviceId);
+    }
+    return 1;
+  }
+
+  aclError aclrtDestroyContext(aclrtContext context) override {
+    auto ret = AclDeviceContextStream::aclrtDestroyContext(context);
+    if (ret != ACL_ERROR_NONE) {
+      return ret;
+    }
+    if (destroy_context_fail_list_.empty()) {
+      return ret;
+    }
+    auto val = destroy_context_fail_list_.front();
+    destroy_context_fail_list_.erase(destroy_context_fail_list_.begin());
+    return val ? ACL_ERROR_NONE : 1;
+  }
+
+  aclError aclrtCreateStream(aclrtStream *stream) override {
+    if (create_stream_fail_list_.empty()) {
+      return AclDeviceContextStream::aclrtCreateStream(stream);
+    }
+    auto val = create_stream_fail_list_.front();
+    create_stream_fail_list_.erase(create_stream_fail_list_.begin());
+    if (val) {
+      return AclDeviceContextStream::aclrtCreateStream(stream);
+    }
+    return 1;
+  }
+
+  aclError aclrtDestroyStream(aclrtStream stream) override {
+    auto ret = AclDeviceContextStream::aclrtDestroyStream(stream);
+    if (ret != ACL_ERROR_NONE) {
+      return ret;
+    }
+    if (destroy_stream_fail_list_.empty()) {
+      return ret;
+    }
+    auto val = destroy_stream_fail_list_.front();
+    destroy_stream_fail_list_.erase(destroy_stream_fail_list_.begin());
+    return val ? ACL_ERROR_NONE : 1;
+  }
+  std::vector<bool> set_device_fail_list_;
+  std::vector<bool> reset_device_fail_list_;
+  std::vector<bool> create_context_fail_list_;
+  std::vector<bool> destroy_context_fail_list_;
+  std::vector<bool> create_stream_fail_list_;
+  std::vector<bool> destroy_stream_fail_list_;
+};
+
+class MockFailAclMemory : public AclMemory {
+ public:
+  aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy) override {
+    if (device_mem_fail_list_.empty()) {
+      return AclMemory::aclrtMalloc(devPtr, size, policy);
+    }
+    auto val = device_mem_fail_list_.front();
+    device_mem_fail_list_.erase(device_mem_fail_list_.begin());
+    if (val) {
+      return AclMemory::aclrtMalloc(devPtr, size, policy);
+    }
+    return 1;
+  }
+  aclError aclrtMallocHost(void **hostPtr, size_t size) override {
+    if (host_mem_fail_list_.empty()) {
+      return AclMemory::aclrtMallocHost(hostPtr, size);
+    }
+    auto val = host_mem_fail_list_.front();
+    host_mem_fail_list_.erase(host_mem_fail_list_.begin());
+    if (val) {
+      return AclMemory::aclrtMallocHost(hostPtr, size);
+    }
+    return 1;
+  }
+  aclError acldvppMalloc(void **devPtr, size_t size) override {
+    if (dvpp_mem_fail_list_.empty()) {
+      return AclMemory::acldvppMalloc(devPtr, size);
+    }
+    auto val = dvpp_mem_fail_list_.front();
+    dvpp_mem_fail_list_.erase(dvpp_mem_fail_list_.begin());
+    if (val) {
+      return AclMemory::acldvppMalloc(devPtr, size);
+    }
+    return 1;
+  }
+
+  std::vector<bool> device_mem_fail_list_;
+  std::vector<bool> host_mem_fail_list_;
+  std::vector<bool> dvpp_mem_fail_list_;
+};
+
+class AclSessionModelLoadTest : public AclSessionTest {
+ public:
+  AclSessionModelLoadTest() = default;
+  void SetUp() override {
+    AclSessionTest::SetUp();
+    aclmdlDesc model_desc;
+    model_desc.inputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    model_desc.inputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    model_desc.outputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    model_desc.outputs.push_back(
+      AclTensorDesc{.dims = {2, 24, 24, 3}, .data_type = ACL_FLOAT, .size = 2 * 24 * 24 * 3 * sizeof(float)});
+
+    mock_model_desc_ = MockModelDesc(model_desc);
+    g_acl_model_desc = &mock_model_desc_;
+    g_acl_device_context_stream = &fail_acl_device_context_stream_;
+    g_acl_memory = &fail_acl_memory_;
+  }
+  void CreateDefaultRequest(PredictRequest &request) {
+    auto input0 = request.add_data();
+    CreateTensor(*input0, {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+    auto input1 = request.add_data();
+    CreateTensor(*input1, {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+  }
+
+  void CheckDefaultReply(const PredictReply &reply) {
+    EXPECT_TRUE(reply.result().size() == 2);
+    if (reply.result().size() == 2) {
+      CheckTensorItem(reply.result(0), {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+      CheckTensorItem(reply.result(1), {2, 24, 24, 3}, ::ms_serving::DataType::MS_FLOAT32);
+    }
+  }
+  MockModelDesc mock_model_desc_;
+  /* Test Resource will be release on something wrong happens*/
+  MockFailAclDeviceContextStream fail_acl_device_context_stream_;
+  MockFailAclMemory fail_acl_memory_;
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_OneTime_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  EXPECT_TRUE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+  // create inputs
+  PredictRequest request;
+  CreateDefaultRequest(request);
+
+  PredictReply reply;
+  ServingRequest serving_request(request);
+  ServingReply serving_reply(reply);
+  EXPECT_TRUE(acl_session.ExecuteModel(model_id, serving_request, serving_reply) == SUCCESS);
+  CheckDefaultReply(reply);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_SetDeviceFail) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  fail_acl_device_context_stream_.set_device_fail_list_.push_back(false);
+  EXPECT_FALSE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_CreateContextFail) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  fail_acl_device_context_stream_.create_context_fail_list_.push_back(false);
+  EXPECT_FALSE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_CreateStreamFail) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  fail_acl_device_context_stream_.create_stream_fail_list_.push_back(false);
+  EXPECT_FALSE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_ResetDeviceFail) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  fail_acl_device_context_stream_.reset_device_fail_list_.push_back(false);
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  acl_session.FinalizeEnv();
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_DestroyContextFail) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  fail_acl_device_context_stream_.destroy_context_fail_list_.push_back(false);
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  acl_session.FinalizeEnv();
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_DestroyStreamFail) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  fail_acl_device_context_stream_.destroy_stream_fail_list_.push_back(false);
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  acl_session.FinalizeEnv();
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_MallocFail0_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  fail_acl_memory_.device_mem_fail_list_.push_back(false);  // input0 buffer
+  EXPECT_FALSE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_MallocFail1_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  fail_acl_memory_.device_mem_fail_list_.push_back(true);   // input0 buffer
+  fail_acl_memory_.device_mem_fail_list_.push_back(false);  // input1 buffer
+  EXPECT_FALSE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_MallocFail2_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  fail_acl_memory_.device_mem_fail_list_.push_back(true);   // input0 buffer
+  fail_acl_memory_.device_mem_fail_list_.push_back(true);   // input1 buffer
+  fail_acl_memory_.device_mem_fail_list_.push_back(false);  // output0 buffer
+  EXPECT_FALSE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_MallocFail3_Success) {
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  fail_acl_memory_.device_mem_fail_list_.push_back(true);   // input0 buffer
+  fail_acl_memory_.device_mem_fail_list_.push_back(true);   // input1 buffer
+  fail_acl_memory_.device_mem_fail_list_.push_back(true);   // output0 buffer
+  fail_acl_memory_.device_mem_fail_list_.push_back(false);  // output1 buffer
+  EXPECT_FALSE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_RunOnDevice_MallocFail0_Success) {
+  SetDeviceRunMode();
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  fail_acl_memory_.host_mem_fail_list_.push_back(false);  // output0 buffer
+  EXPECT_FALSE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+TEST_F(AclSessionModelLoadTest, TestAclSession_RunOnDevice_MallocFail1_Success) {
+  SetDeviceRunMode();
+  inference::AclSession acl_session;
+  uint32_t device_id = 1;
+  EXPECT_TRUE(acl_session.InitEnv("Ascend", device_id) == SUCCESS);
+  uint32_t model_id = 0;
+  fail_acl_memory_.host_mem_fail_list_.push_back(true);   // output0 buffer
+  fail_acl_memory_.host_mem_fail_list_.push_back(false);  // output1 buffer
+  EXPECT_FALSE(acl_session.LoadModelFromFile("fake_model_path", model_id) == SUCCESS);
+
+  EXPECT_TRUE(acl_session.UnloadModel(model_id) == SUCCESS);
+  EXPECT_TRUE(acl_session.FinalizeEnv() == SUCCESS);
+};
+
+}  // namespace serving
+}  // namespace mindspore
\ No newline at end of file
--- a/tests/ut/cpp/serving/acl_session_test_one_input_output.cc
+++ b/tests/ut/cpp/serving/acl_session_test_one_input_output.cc
--- a/tests/ut/cpp/serving/acl_session_test_two_input_output.cc
+++ b/tests/ut/cpp/serving/acl_session_test_two_input_output.cc
--- a/tests/ut/cpp/serving/acl_stub.cc
+++ b/tests/ut/cpp/serving/acl_stub.cc
--- a/tests/ut/cpp/serving/acl_stub.h
+++ b/tests/ut/cpp/serving/acl_stub.h
--- a/tests/ut/cpp/serving/ms_service.proto
+++ b/tests/ut/cpp/serving/ms_service.proto
+../../../../serving/ms_service.proto
\ No newline at end of file