diff --git a/mace/BUILD.bazel b/mace/BUILD.bazel
index 77e2c532ec7bb563a17af07e9fab1cfce27be58a..ef1c338d0838c12ef2c44035e6b8104baf1d6361 100644
--- a/mace/BUILD.bazel
+++ b/mace/BUILD.bazel
@@ -78,6 +78,17 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "hta_enabled",
+    define_values = {
+        "hta": "true",
+    },
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+    },
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "openmp_enabled",
     define_values = {
diff --git a/mace/core/BUILD.bazel b/mace/core/BUILD.bazel
index 2c905389aec20c51a2e54672411c1a696ff1b3b9..91df4f0f1d0d0a66b2903575a4373b26897628cb 100644
--- a/mace/core/BUILD.bazel
+++ b/mace/core/BUILD.bazel
@@ -12,6 +12,8 @@ load(
     "if_android",
     "if_android_armv7",
     "if_hexagon_enabled",
+    "if_hta_enabled",
+    "if_hexagon_or_hta_enabled",
     "if_neon_enabled",
     "if_not_hexagon_enabled",
     "if_opencl_enabled",
@@ -33,17 +35,24 @@ cc_library(
         [
             "runtime/opencl/*.cc",
         ],
-    )) + if_hexagon_enabled(glob([
-        "runtime/hexagon/*.cc",
-    ])),
+    )) + if_hexagon_enabled([
+        "runtime/hexagon/hexagon_dsp_wrapper.cc",
+    ]) + if_hta_enabled([
+        "runtime/hexagon/hexagon_hta_wrapper.cc",
+    ]),
     hdrs = glob([
         "*.h",
         "runtime/cpu/*.h",
-    ]) + if_opencl_enabled(glob(
-        [
-            "runtime/opencl/*.h",
-        ],
-    )) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])),
+    ]) + if_opencl_enabled(glob([
+        "runtime/opencl/*.h",
+    ])) + if_hexagon_or_hta_enabled(glob([
+        "runtime/hexagon/hexagon_control_wrapper.h",
+        "runtime/hexagon/hexagon_device.h",
+    ])) + if_hexagon_enabled(glob([
+        "runtime/hexagon/*dsp*.h",
+    ])) + if_hta_enabled(glob([
+        "runtime/hexagon/*hta*.h",
+    ])),
     copts = [
         "-Werror",
         "-Wextra",
@@ -57,6 +66,8 @@ cc_library(
         "-DMACE_ENABLE_QUANTIZE",
     ]) + if_hexagon_enabled([
         "-DMACE_ENABLE_HEXAGON",
+    ]) + if_hta_enabled([
+        "-DMACE_ENABLE_HTA",
     ]) + if_neon_enabled([
         "-DMACE_ENABLE_NEON",
     ]) + if_android_armv7([
@@ -77,6 +88,8 @@ cc_library(
         "@gemmlowp",
     ]) + if_hexagon_enabled([
         "//third_party/nnlib:libhexagon",
+    ]) + if_hta_enabled([
+        "//third_party/hta",
     ]),
 )
 
diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h
index c74af2578f345b4d4c5b976811a0f6d64dbdf889..eda740f400e47bab5fac2ab04057522ad9f9b7ce 100644
--- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h
+++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h
@@ -16,50 +16,67 @@
 #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "mace/core/tensor.h"
 #include "mace/public/mace.h"
-#include "third_party/nnlib/hexagon_nn.h"
 
 namespace mace {
 
+struct InOutInfo {
+  InOutInfo(const std::vector<index_t> &shape,
+            const DataType data_type,
+            const float scale,
+            const int32_t zero_point,
+            std::unique_ptr<Tensor> tensor_u8)
+      :  shape(shape),
+         data_type(data_type),
+         scale(scale),
+         zero_point(zero_point),
+         tensor_u8(std::move(tensor_u8)) {}
+
+  std::vector<index_t> shape;
+  DataType data_type;
+  float scale;
+  int32_t zero_point;
+  std::unique_ptr<Tensor> tensor_u8;
+};
+
 class HexagonControlWrapper {
  public:
-  HexagonControlWrapper() {}
-  int GetVersion();
-  bool Config();
-  bool Init();
-  bool Finalize();
-  bool SetupGraph(const NetDef &net_def, const unsigned char *model_data);
-  bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor);
-  bool ExecuteGraphNew(const std::vector<Tensor *> &input_tensors,
-                       std::vector<Tensor *> *output_tensors,
-                       bool hexagon_quantize);
+  HexagonControlWrapper() = default;
+  virtual ~HexagonControlWrapper() = default;
 
-  bool TeardownGraph();
-  void PrintLog();
-  void PrintGraph();
-  void GetPerfInfo();
-  void ResetPerfInfo();
-  void SetDebugLevel(int level);
+  virtual int GetVersion() = 0;
+  virtual bool Config() = 0;
+  virtual bool Init() = 0;
+  virtual bool Finalize() = 0;
+  virtual bool SetupGraph(const NetDef &net_def,
+                          const unsigned char *model_data) = 0;
+  virtual bool ExecuteGraph(const Tensor &input_tensor,
+                            Tensor *output_tensor) = 0;
+  virtual bool ExecuteGraphNew(const std::vector<Tensor *> &input_tensors,
+                               std::vector<Tensor *> *output_tensors) = 0;
+  virtual bool TeardownGraph() = 0;
+  virtual void PrintLog() = 0;
+  virtual void PrintGraph() = 0;
+  virtual void GetPerfInfo() = 0;
+  virtual void ResetPerfInfo() = 0;
+  virtual void SetDebugLevel(int level) = 0;
 
- private:
-  static constexpr int NODE_ID_OFFSET = 10000;
-  static constexpr int NUM_METADATA = 4;
+ protected:
+  static constexpr int kNodeIdOffset = 10000;
+  static constexpr int kNumMetaData = 4;
 
-  inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; }
+  inline uint32_t node_id(uint32_t nodeid) { return kNodeIdOffset + nodeid; }
 
   int nn_id_;
 
-  std::vector<std::vector<index_t>> input_shapes_;
-  std::vector<std::vector<index_t>> output_shapes_;
-  std::vector<DataType> input_data_types_;
-  std::vector<DataType> output_data_types_;
-  uint32_t num_inputs_;
-  uint32_t num_outputs_;
-  std::vector<std::unique_ptr<Tensor>> input_tensors_u8_;
-  std::vector<std::unique_ptr<Tensor>> output_tensors_u8_;
+  std::vector<InOutInfo> input_info_;
+  std::vector<InOutInfo> output_info_;
+  int num_inputs_;
+  int num_outputs_;
 
   MACE_DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper);
 };
diff --git a/mace/core/runtime/hexagon/hexagon_device.h b/mace/core/runtime/hexagon/hexagon_device.h
index 0c933ae0b6ff2171008058cc074c293e1909b819..f80607d3196582f850d0911fec0429784cabaca0 100644
--- a/mace/core/runtime/hexagon/hexagon_device.h
+++ b/mace/core/runtime/hexagon/hexagon_device.h
@@ -15,18 +15,55 @@
 #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
 #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
 
+#include <memory>
+#include <utility>
+
 #include "mace/core/device.h"
+#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
+#ifdef MACE_ENABLE_HEXAGON
+#include "mace/core/runtime/hexagon/hexagon_dsp_wrapper.h"
+#endif
+#ifdef MACE_ENABLE_HTA
+#include "mace/core/runtime/hexagon/hexagon_hta_wrapper.h"
+#endif
 
 namespace mace {
 
 class HexagonDevice : public CPUDevice {
  public:
-  HexagonDevice() : CPUDevice(0, AFFINITY_NONE, false) {}
+  explicit HexagonDevice(DeviceType device_type)
+      : CPUDevice(0, AFFINITY_NONE, false),
+        device_type_(device_type) {}
 
   DeviceType device_type() const override {
-    return DeviceType::HEXAGON;
+    return device_type_;
   };
+
+ private:
+  DeviceType device_type_;
 };
 
+std::unique_ptr<HexagonControlWrapper> CreateHexagonControlWrapper(
+    DeviceType device_type) {
+  std::unique_ptr<HexagonControlWrapper> hexagon_controller;
+
+  switch (device_type) {
+#ifdef MACE_ENABLE_HEXAGON
+    case HEXAGON:
+      hexagon_controller = make_unique<HexagonDSPWrapper>();
+      break;
+#endif
+#ifdef MACE_ENABLE_HTA
+    case HTA:
+      hexagon_controller = make_unique<HexagonHTAWrapper>();
+      break;
+#endif
+    default:
+      LOG(FATAL) << "Not supported Hexagon device type: " << device_type;
+  }
+
+  return hexagon_controller;
+}
+
 }  // namespace mace
 #endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
diff --git a/mace/core/runtime/hexagon/hexagon_nn_ops.h b/mace/core/runtime/hexagon/hexagon_dsp_ops.h
similarity index 89%
rename from mace/core/runtime/hexagon/hexagon_nn_ops.h
rename to mace/core/runtime/hexagon/hexagon_dsp_ops.h
index 3ebedb8eb8d81850cd29383fd7667c42b2369262..1f50e13cb48bb8133fc31d71752a623fed16217f 100644
--- a/mace/core/runtime/hexagon/hexagon_nn_ops.h
+++ b/mace/core/runtime/hexagon/hexagon_dsp_ops.h
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_
-#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_OPS_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_OPS_H_
 
 #include <string>
 #include <unordered_map>
@@ -57,4 +57,4 @@ class OpMap {
 };
 }  // namespace mace
 
-#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_OPS_H_
diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
similarity index 75%
rename from mace/core/runtime/hexagon/hexagon_control_wrapper.cc
rename to mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
index b39bfeed2510ab10f401fe653ac3ad919e8b2619..a98d9ad1499251a15d7b969cecee2eaf28f84347 100644
--- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc
+++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
@@ -14,17 +14,19 @@
 
 #include <algorithm>
 #include <iomanip>
+#include <memory>
 #include <thread>  // NOLINT(build/c++11)
 #include <vector>
 #include <unordered_map>
 #include <string>
 #include <utility>
 
-#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
-#include "mace/core/runtime/hexagon/hexagon_nn_ops.h"
+#include "mace/core/runtime/hexagon/hexagon_dsp_wrapper.h"
+#include "mace/core/runtime/hexagon/hexagon_dsp_ops.h"
 #include "mace/core/types.h"
 #include "mace/port/env.h"
-#include "mace/utils/quantize.h"
+#include "mace/utils/memory.h"
+#include "third_party/nnlib/hexagon_nn.h"
 
 namespace mace {
 
@@ -85,33 +87,33 @@ std::string FloatToString(const FloatType v, const int32_t precision) {
 }
 }  // namespace
 
-int HexagonControlWrapper::GetVersion() {
+int HexagonDSPWrapper::GetVersion() {
   int version;
   MACE_CHECK(hexagon_nn_version(&version) == 0, "get version error");
   return version;
 }
 
-bool HexagonControlWrapper::Config() {
+bool HexagonDSPWrapper::Config() {
   LOG(INFO) << "Hexagon config";
   MACE_CHECK(hexagon_nn_set_powersave_level(0) == 0, "hexagon power error");
   MACE_CHECK(hexagon_nn_config() == 0, "hexagon config error");
   return true;
 }
 
-bool HexagonControlWrapper::Init() {
+bool HexagonDSPWrapper::Init() {
   LOG(INFO) << "Hexagon init";
   MACE_CHECK(hexagon_nn_init(&nn_id_) == 0, "hexagon_nn_init failed");
   ResetPerfInfo();
   return true;
 }
 
-bool HexagonControlWrapper::Finalize() {
+bool HexagonDSPWrapper::Finalize() {
   LOG(INFO) << "Hexagon finalize";
   return hexagon_nn_set_powersave_level(1) == 0;
 }
 
-bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
-                                       unsigned const char *model_data) {
+bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def,
+                                   unsigned const char *model_data) {
   LOG(INFO) << "Hexagon setup graph";
 
   int64_t t0 = NowMicros();
@@ -229,36 +231,40 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
   cached_outputs.clear();
 
   // input info
-  num_inputs_ = 0;
-  for (const InputInfo &input_info : net_def.input_info()) {
+  num_inputs_ = net_def.input_info_size();
+  input_info_.reserve(num_inputs_);
+  for (const InputOutputInfo &input_info : net_def.input_info()) {
     std::vector<index_t> input_shape(input_info.dims().begin(),
                                      input_info.dims().end());
     while (input_shape.size() < 4) {
       input_shape.insert(input_shape.begin(), 1);
     }
-    input_shapes_.push_back(input_shape);
-    input_data_types_.push_back(input_info.data_type());
-    num_inputs_ += 1;
+    input_info_.emplace_back(input_shape,
+                             input_info.data_type(),
+                             input_info.scale(),
+                             input_info.zero_point(),
+                             make_unique<Tensor>());
   }
-  input_tensors_u8_.reserve(num_inputs_);
 
   // output info
-  num_outputs_ = 0;
-  for (const OutputInfo &output_info : net_def.output_info()) {
+  num_outputs_ = net_def.output_info_size();
+  output_info_.reserve(num_outputs_);
+  for (const InputOutputInfo &output_info : net_def.output_info()) {
     std::vector<index_t> output_shape(output_info.dims().begin(),
                                       output_info.dims().end());
     while (output_shape.size() < 4) {
       output_shape.insert(output_shape.begin(), 1);
     }
-    output_shapes_.push_back(output_shape);
-    output_data_types_.push_back(output_info.data_type());
-    num_outputs_ += 1;
+    output_info_.emplace_back(output_shape,
+                              output_info.data_type(),
+                              output_info.scale(),
+                              output_info.zero_point(),
+                              make_unique<Tensor>());
     VLOG(1) << "OutputInfo: "
             << "\n\t shape: " << output_shape[0] << " " << output_shape[1]
             << " " << output_shape[2] << " " << output_shape[3]
             << "\n\t type: " << output_info.data_type();
   }
-  output_tensors_u8_.reserve(num_outputs_);
 
   int64_t t1 = NowMicros();
 
@@ -271,14 +277,14 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
   return true;
 }
 
-bool HexagonControlWrapper::TeardownGraph() {
+bool HexagonDSPWrapper::TeardownGraph() {
   LOG(INFO) << "Hexagon teardown graph";
   return hexagon_nn_teardown(nn_id_) == 0;
 }
 
 #define MACE_PRINT_BUFSIZE (2 * 1024 * 1024)
 
-void HexagonControlWrapper::PrintLog() {
+void HexagonDSPWrapper::PrintLog() {
   char *buf;
   if ((buf = new char[MACE_PRINT_BUFSIZE]) == NULL) return;
   MACE_CHECK(hexagon_nn_getlog(nn_id_, reinterpret_cast<unsigned char *>(buf),
@@ -288,7 +294,7 @@ void HexagonControlWrapper::PrintLog() {
   delete[] buf;
 }
 
-void HexagonControlWrapper::PrintGraph() {
+void HexagonDSPWrapper::PrintGraph() {
   LOG(INFO) << "Print Graph";
   char *buf;
   if ((buf = new char[MACE_PRINT_BUFSIZE]) == NULL) return;
@@ -299,13 +305,13 @@ void HexagonControlWrapper::PrintGraph() {
   delete[] buf;
 }
 
-void HexagonControlWrapper::SetDebugLevel(int level) {
+void HexagonDSPWrapper::SetDebugLevel(int level) {
   LOG(INFO) << "Set debug level: " << level;
   MACE_CHECK(hexagon_nn_set_debug_level(nn_id_, level) == 0,
              "set debug level error");
 }
 
-void HexagonControlWrapper::GetPerfInfo() {
+void HexagonDSPWrapper::GetPerfInfo() {
   LOG(INFO) << "Get perf info";
   std::vector<hexagon_nn_perfinfo> perf_info(MACE_MAX_NODE);
   unsigned int n_items = 0;
@@ -380,20 +386,20 @@ void HexagonControlWrapper::GetPerfInfo() {
   LOG(INFO) << "total duration: " << std::fixed << total_duration;
 }
 
-void HexagonControlWrapper::ResetPerfInfo() {
+void HexagonDSPWrapper::ResetPerfInfo() {
   LOG(INFO) << "Reset perf info";
   MACE_CHECK(hexagon_nn_reset_perfinfo(nn_id_, NN_GRAPH_PERFEVENT_UTIME) == 0,
              "reset perf error");
 }
 
-bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
-                                         Tensor *output_tensor) {
+bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor,
+                                     Tensor *output_tensor) {
   VLOG(2) << "Execute graph: " << nn_id_;
   // single input and single output
   MACE_ASSERT(num_inputs_ == 1, "Wrong inputs num");
   MACE_ASSERT(num_outputs_ == 1, "Wrong outputs num");
-  output_tensor->SetDtype(output_data_types_[0]);
-  output_tensor->Resize(output_shapes_[0]);
+  output_tensor->SetDtype(output_info_[0].data_type);
+  output_tensor->Resize(output_info_[0].shape);
   std::vector<uint32_t> output_shape(4);
   uint32_t output_bytes;
   int res = hexagon_nn_execute(
@@ -413,10 +419,11 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
       &output_bytes);
   MACE_CHECK(res == 0, "execute error");
 
-  MACE_ASSERT(output_shape.size() == output_shapes_[0].size(),
+  MACE_ASSERT(output_shape.size() == output_info_[0].shape.size(),
               "wrong output shape inferred");
   for (size_t i = 0; i < output_shape.size(); ++i) {
-    MACE_ASSERT(static_cast<index_t>(output_shape[i]) == output_shapes_[0][i],
+    MACE_ASSERT(static_cast<index_t>(output_shape[i])
+                    == output_info_[0].shape[i],
                 "wrong output shape inferred");
   }
   MACE_ASSERT(output_bytes == output_tensor->raw_size(),
@@ -424,59 +431,35 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
   return res == 0;
 }
 
-bool HexagonControlWrapper::ExecuteGraphNew(
+bool HexagonDSPWrapper::ExecuteGraphNew(
     const std::vector<Tensor *> &input_tensors,
-    std::vector<Tensor *> *output_tensors,
-    bool hexagon_quantize) {
+    std::vector<Tensor *> *output_tensors) {
   VLOG(2) << "Execute graph new: " << nn_id_;
   uint32_t num_inputs = static_cast<uint32_t>(input_tensors.size());
   uint32_t num_outputs = static_cast<uint32_t>(output_tensors->size());
   MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num");
   MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num");
 
-  std::vector<hexagon_nn_tensordef> inputs(num_inputs * NUM_METADATA);
-  std::vector<hexagon_nn_tensordef> outputs(num_outputs * NUM_METADATA);
+  std::vector<hexagon_nn_tensordef> inputs(num_inputs * kNumMetaData);
+  std::vector<hexagon_nn_tensordef> outputs(num_outputs * kNumMetaData);
   std::vector<InputOutputMetadata> input_metadata(num_inputs);
   std::vector<InputOutputMetadata> output_metadata(num_outputs);
 
   // transform mace input to hexagon input
   for (size_t i = 0; i < num_inputs; ++i) {
     std::vector<index_t> input_shape = input_tensors[i]->shape();
-    size_t index = i * NUM_METADATA;
+    size_t index = i * kNumMetaData;
     inputs[index].batches = static_cast<uint32_t>(input_shape[0]);
     inputs[index].height = static_cast<uint32_t>(input_shape[1]);
     inputs[index].width = static_cast<uint32_t>(input_shape[2]);
     inputs[index].depth = static_cast<uint32_t>(input_shape[3]);
-    if (hexagon_quantize) {
-      inputs[index].data =
-          const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(
-              input_tensors[i]->raw_data()));
-      inputs[index].dataLen = static_cast<int>(input_tensors[i]->raw_size());
-      inputs[index].data_valid_len =
-          static_cast<uint32_t>(input_tensors[i]->raw_size());
-      input_metadata[i].Init(.0f, .0f, 1);
-    } else {
-      if (input_tensors_u8_.size() < i + 1) {
-        input_tensors_u8_.emplace_back(new Tensor());
-        input_tensors_u8_[i]->SetDtype(DT_UINT8);
-        input_tensors_u8_[i]->Resize(input_shape);
-      }
-
-      Quantize<uint8_t>(*input_tensors[i],
-                        input_tensors_u8_[i].get(),
-                        &input_metadata[i].min_val,
-                        &input_metadata[i].max_val);
-
-      inputs[index].data =
-          const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(
-              input_tensors_u8_[i]->raw_data()));
-      inputs[index].dataLen =
-          static_cast<int>(input_tensors_u8_[i]->raw_size());
-      inputs[index].data_valid_len =
-          static_cast<uint32_t>(input_tensors_u8_[i]->raw_size());
-      input_metadata[i].needs_quantization = 0;
-    }
+    inputs[index].data = const_cast<unsigned char *>(
+        reinterpret_cast<const unsigned char *>(input_tensors[i]->raw_data()));
+    inputs[index].dataLen = static_cast<int>(input_tensors[i]->raw_size());
+    inputs[index].data_valid_len =
+        static_cast<uint32_t>(input_tensors[i]->raw_size());
     inputs[index].unused = 0;
+    input_metadata[i].Init(.0f, .0f, 1);
     AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]);
     AddInputMetadata(input_metadata[i].max_val, &inputs[index + 2]);
     AddInputMetadata(input_metadata[i].needs_quantization, &inputs[index + 3]);
@@ -484,29 +467,14 @@ bool HexagonControlWrapper::ExecuteGraphNew(
 
   // transform mace output to hexagon output
   for (size_t i = 0; i < num_outputs; ++i) {
-    size_t index = i * NUM_METADATA;
-    (*output_tensors)[i]->SetDtype(output_data_types_[i]);
-    (*output_tensors)[i]->Resize(output_shapes_[i]);
-
-    if (hexagon_quantize) {
-      outputs[index].data = reinterpret_cast<unsigned char *>(
-          (*output_tensors)[i]->raw_mutable_data());
-      outputs[index].dataLen =
-          static_cast<int>((*output_tensors)[i]->raw_size());
-      output_metadata[i].Init(.0f, .0f, 1);
-    } else {
-      if (output_tensors_u8_.size() < i + 1) {
-        output_tensors_u8_.emplace_back(new Tensor());
-        output_tensors_u8_[i]->SetDtype(DT_UINT8);
-        output_tensors_u8_[i]->Resize(output_shapes_[i]);
-      }
+    size_t index = i * kNumMetaData;
+    (*output_tensors)[i]->SetDtype(output_info_[i].data_type);
+    (*output_tensors)[i]->Resize(output_info_[i].shape);
 
-      outputs[index].data = reinterpret_cast<unsigned char *>(
-          output_tensors_u8_[i]->raw_mutable_data());
-      outputs[index].dataLen =
-          static_cast<int>(output_tensors_u8_[i]->raw_size());
-      output_metadata[i].Init(.0f, .0f, 0);
-    }
+    outputs[index].data = reinterpret_cast<unsigned char *>(
+        (*output_tensors)[i]->raw_mutable_data());
+    outputs[index].dataLen = static_cast<int>((*output_tensors)[i]->raw_size());
+    output_metadata[i].Init(.0f, .0f, 1);
 
     AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]);
     AddOutputMetadata(output_metadata[i].max_val, &outputs[index + 2]);
@@ -517,38 +485,27 @@ bool HexagonControlWrapper::ExecuteGraphNew(
   // Execute graph
   int res = hexagon_nn_execute_new(nn_id_,
                                    inputs.data(),
-                                   num_inputs * NUM_METADATA,
+                                   num_inputs * kNumMetaData,
                                    outputs.data(),
-                                   num_outputs * NUM_METADATA);
+                                   num_outputs * kNumMetaData);
 
   // handle hexagon output
   for (size_t i = 0; i < num_outputs; ++i) {
-    size_t index = i * NUM_METADATA;
+    size_t index = i * kNumMetaData;
     std::vector<uint32_t> output_shape{
         outputs[index].batches, outputs[index].height, outputs[index].width,
         outputs[index].depth};
-    MACE_ASSERT(output_shape.size() == output_shapes_[i].size(),
+    MACE_ASSERT(output_shape.size() == output_info_[i].shape.size(),
                 "wrong output shape inferred");
     for (size_t j = 0; j < output_shape.size(); ++j) {
       MACE_ASSERT(static_cast<index_t>(output_shape[j])
-                      == output_shapes_[i][j],
+                      == output_info_[i].shape[j],
                   "wrong output shape inferred");
     }
 
-    if (hexagon_quantize) {
-      MACE_ASSERT(static_cast<index_t>(outputs[index].data_valid_len)
-                      == (*output_tensors)[i]->raw_size(),
-                  "wrong output bytes inferred.");
-    } else {
-      MACE_ASSERT(static_cast<index_t>(outputs[index].data_valid_len)
-                      == output_tensors_u8_[i]->raw_size(),
-                  "wrong output bytes inferred.");
-
-      DeQuantize<uint8_t>(*output_tensors_u8_[i],
-                          output_metadata[i].min_val,
-                          output_metadata[i].max_val,
-                          (*output_tensors)[i]);
-    }
+    MACE_ASSERT(static_cast<index_t>(outputs[index].data_valid_len)
+                    == (*output_tensors)[i]->raw_size(),
+                "wrong output bytes inferred.");
   }
 
   return res == 0;
diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c46414bf390b87af35f2000e2732b0e50663e95
--- /dev/null
+++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h
@@ -0,0 +1,51 @@
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_
+
+#include <vector>
+
+#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
+#include "mace/core/tensor.h"
+#include "mace/public/mace.h"
+
+namespace mace {
+
+class HexagonDSPWrapper : public HexagonControlWrapper {
+ public:
+  HexagonDSPWrapper() = default;
+
+  int GetVersion() override;
+  bool Config() override;
+  bool Init() override;
+  bool Finalize() override;
+  bool SetupGraph(const NetDef &net_def,
+                  const unsigned char *model_data) override;
+  bool ExecuteGraph(const Tensor &input_tensor,
+                    Tensor *output_tensor) override;
+  bool ExecuteGraphNew(const std::vector<Tensor *> &input_tensors,
+                       std::vector<Tensor *> *output_tensors) override;
+  bool TeardownGraph() override;
+  void PrintLog() override;
+  void PrintGraph() override;
+  void GetPerfInfo() override;
+  void ResetPerfInfo() override;
+  void SetDebugLevel(int level) override;
+
+  MACE_DISABLE_COPY_AND_ASSIGN(HexagonDSPWrapper);
+};
+}  // namespace mace
+
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_
diff --git a/mace/core/runtime/hexagon/hexagon_hta_ops.h b/mace/core/runtime/hexagon/hexagon_hta_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..39a108609d815b2eeaf805d611b5fb4fbd69c564
--- /dev/null
+++ b/mace/core/runtime/hexagon/hexagon_hta_ops.h
@@ -0,0 +1,50 @@
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_OPS_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_OPS_H_
+
+#include <string>
+#include <unordered_map>
+
+#include "mace/utils/logging.h"
+#include "third_party/hta/hta_hexagon_nn_ops.h"
+
+namespace mace {
+
+class OpMap {
+ public:
+  void Init() {
+#define HTA_DEF_OP(NAME) op_map_[#NAME] = HTA_OP_##NAME;
+
+#include "third_party/hta/hta_ops.h"
+
+#undef HTA_DEF_OP
+  }
+
+  hta_op_type GetOpId(const std::string &op_type) {
+    if (op_map_.find(op_type) != end(op_map_)) {
+      return op_map_[op_type];
+    } else {
+      LOG(ERROR) << "HTA unsupported op type: " << op_type;
+      return HTA_NN_OPS_MAX;
+    }
+  }
+
+ private:
+  std::unordered_map<std::string, hta_op_type> op_map_;
+};
+}  // namespace mace
+
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_OPS_H_
diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e3754f19ca8f0528e0679816cd18c0ccfbb1197a
--- /dev/null
+++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
@@ -0,0 +1,318 @@
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mace/core/runtime/hexagon/hexagon_hta_wrapper.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <memory>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+
+#include "mace/core/runtime/hexagon/hexagon_hta_ops.h"
+#include "mace/core/types.h"
+#include "mace/utils/memory.h"
+#include "mace/utils/quantize.h"
+#include "third_party/hta/hta_hexagon_api.h"
+
+namespace mace {
+
+int HexagonHTAWrapper::GetVersion() {
+  int version;
+  MACE_CHECK(hexagon_hta_nn_version(&version) == 0, "get version error");
+  return version;
+}
+
+bool HexagonHTAWrapper::Config() {
+  LOG(INFO) << "HTA config";
+  MACE_CHECK(hexagon_hta_nn_config() == 0, "hexagon config error");
+  return true;
+}
+
+bool HexagonHTAWrapper::Init() {
+  LOG(INFO) << "Hexagon init";
+  MACE_CHECK(hexagon_hta_nn_init(&nn_id_) == 0, "hexagon_nn_init failed");
+  ResetPerfInfo();
+  return true;
+}
+
+bool HexagonHTAWrapper::Finalize() {
+  LOG(INFO) << "Hexagon finalize";
+  return true;
+}
+
+bool HexagonHTAWrapper::SetupGraph(const NetDef &net_def,
+                                   unsigned const char *model_data) {
+  LOG(INFO) << "Hexagon setup graph";
+
+  int64_t t0 = NowMicros();
+
+  // const node
+  for (const ConstTensor &const_tensor : net_def.tensors()) {
+    std::vector<int> tensor_shape(const_tensor.dims().begin(),
+                                  const_tensor.dims().end());
+    while (tensor_shape.size() < 4) {
+      tensor_shape.insert(tensor_shape.begin(), 1);
+    }
+
+    hexagon_nn_const_node const_node;
+    const_node.node_id = node_id(const_tensor.node_id());
+    const_node.tensor.batches = tensor_shape[0];
+    const_node.tensor.height = tensor_shape[1];
+    const_node.tensor.width = tensor_shape[2];
+    const_node.tensor.depth = tensor_shape[3];
+
+    if (const_tensor.data_type() == DataType::DT_INT32 &&
+        const_tensor.data_size() == 0) {
+      const_node.tensor.data = NULL;
+      const_node.tensor.dataLen = 0;
+    } else {
+      const_node.tensor.data =
+          const_cast<unsigned char *>(model_data + const_tensor.offset());
+      const_node.tensor.dataLen = const_tensor.data_size() *
+          GetEnumTypeSize(const_tensor.data_type());
+    }
+
+    hexagon_hta_nn_append_const_node(nn_id_,
+                                     const_node.node_id,
+                                     const_node.tensor.batches,
+                                     const_node.tensor.height,
+                                     const_node.tensor.width,
+                                     const_node.tensor.depth,
+                                     const_node.tensor.data,
+                                     const_node.tensor.dataLen);
+  }
+
+  // op node
+  OpMap op_map;
+  op_map.Init();
+  std::vector<std::vector<hexagon_hta_nn_input>> cached_inputs;
+  std::vector<std::vector<hexagon_hta_nn_output>> cached_outputs;
+  std::vector<hexagon_hta_nn_input> inputs;
+  std::vector<hexagon_hta_nn_output> outputs;
+
+  for (const OperatorDef &op : net_def.op()) {
+    hta_op_type op_id = op_map.GetOpId(op.type());
+    inputs.resize(op.node_input().size());
+    for (int i = 0; i < op.node_input().size(); ++i) {
+      inputs[i].src_id = node_id(op.node_input()[i].node_id());
+      inputs[i].output_idx = op.node_input()[i].output_port();
+    }
+    outputs.resize(op.output_shape().size());
+    for (int i = 0; i < op.output_shape().size(); ++i) {
+      outputs[i].rank = op.output_shape()[i].dims().size();
+      for (size_t j = 0; j < outputs[i].rank; ++j) {
+        outputs[i].max_sizes[j] = op.output_shape()[i].dims()[j];
+      }
+      if (outputs[i].rank == 0) {
+        outputs[i].rank = 1;
+        outputs[i].max_sizes[0] = 1;
+      }
+      outputs[i].max_sizes[outputs[i].rank] = 0;
+      outputs[i].elementsize = GetEnumTypeSize(
+          static_cast<DataType>(op.output_type()[i]));
+      outputs[i].zero_offset = 0;
+      outputs[i].stepsize = 0;
+    }
+    cached_inputs.push_back(inputs);
+    cached_outputs.push_back(outputs);
+
+    auto padding_type = static_cast<hta_padding_type>(op.padding());
+
+    hexagon_nn_op_node op_node;
+    op_node.node_id = node_id(op.node_id());
+    op_node.operation = op_id;
+    op_node.padding = padding_type;
+    op_node.inputs = cached_inputs.back().data();
+    op_node.inputsLen = inputs.size();
+    op_node.outputs = cached_outputs.back().data();
+    op_node.outputsLen = outputs.size();
+
+    hexagon_hta_nn_append_node(nn_id_,
+                               op_node.node_id,
+                               op_node.operation,
+                               op_node.padding,
+                               op_node.inputs,
+                               op_node.inputsLen,
+                               op_node.outputs,
+                               op_node.outputsLen);
+  }
+
+  // input info
+  num_inputs_ = net_def.input_info_size();
+  input_info_.reserve(num_inputs_);
+  for (const InputOutputInfo &input_info : net_def.input_info()) {
+    std::vector<index_t> input_shape(input_info.dims().begin(),
+                                     input_info.dims().end());
+    while (input_shape.size() < 4) {
+      input_shape.insert(input_shape.begin(), 1);
+    }
+    input_info_.emplace_back(input_shape,
+                             input_info.data_type(),
+                             input_info.scale(),
+                             input_info.zero_point(),
+                             make_unique<Tensor>());
+  }
+
+  // output info
+  num_outputs_ = net_def.output_info_size();
+  output_info_.reserve(num_outputs_);
+  for (const InputOutputInfo &output_info : net_def.output_info()) {
+    std::vector<index_t> output_shape(output_info.dims().begin(),
+                                      output_info.dims().end());
+    while (output_shape.size() < 4) {
+      output_shape.insert(output_shape.begin(), 1);
+    }
+    output_info_.emplace_back(output_shape,
+                              output_info.data_type(),
+                              output_info.scale(),
+                              output_info.zero_point(),
+                              make_unique<Tensor>());
+    VLOG(1) << "OutputInfo: "
+            << "\n\t shape: " << output_shape[0] << " " << output_shape[1]
+            << " " << output_shape[2] << " " << output_shape[3]
+            << "\n\t type: " << output_info.data_type();
+  }
+
+  int64_t t1 = NowMicros();
+
+  MACE_CHECK(hexagon_hta_nn_prepare(nn_id_) == 0, "hexagon_nn_prepare failed");
+
+  int64_t t2 = NowMicros();
+
+  VLOG(1) << "Setup time: " << t1 - t0 << " " << t2 - t1;
+
+  return true;
+}
+
+bool HexagonHTAWrapper::TeardownGraph() {
+  LOG(INFO) << "Hexagon teardown graph";
+  return hexagon_hta_nn_teardown(nn_id_) == 0;
+}
+
+void HexagonHTAWrapper::PrintLog() {
+  LOG(INFO) << "Print Log";
+}
+
+void HexagonHTAWrapper::PrintGraph() {
+  LOG(INFO) << "Print Graph";
+}
+
+void HexagonHTAWrapper::SetDebugLevel(int level) {
+  LOG(INFO) << "Set debug level: " << level;
+  MACE_CHECK(hexagon_hta_nn_set_debug_level(nn_id_, level) == 0,
+             "set debug level error");
+}
+
+void HexagonHTAWrapper::GetPerfInfo() {
+  LOG(INFO) << "Get perf info";
+}
+
+void HexagonHTAWrapper::ResetPerfInfo() {
+  LOG(INFO) << "Reset perf info";
+}
+
+bool HexagonHTAWrapper::ExecuteGraph(const Tensor &input_tensor,
+                                     Tensor *output_tensor) {
+  MACE_UNUSED(input_tensor);
+  MACE_UNUSED(output_tensor);
+  MACE_NOT_IMPLEMENTED;
+  return false;
+}
+
+bool HexagonHTAWrapper::ExecuteGraphNew(
+    const std::vector<Tensor *> &input_tensors,
+    std::vector<Tensor *> *output_tensors) {
+  VLOG(2) << "Execute graph new: " << nn_id_;
+  uint32_t num_inputs = static_cast<uint32_t>(input_tensors.size());
+  uint32_t num_outputs = static_cast<uint32_t>(output_tensors->size());
+  MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num");
+  MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num");
+
+  std::vector<hexagon_hta_nn_tensordef> inputs(num_inputs);
+  std::vector<hexagon_hta_nn_tensordef> outputs(num_outputs);
+
+  for (size_t i = 0; i < num_inputs; ++i) {
+    std::vector<index_t> input_shape = input_tensors[i]->shape();
+    inputs[i].batches = static_cast<uint32_t>(input_shape[0]);
+    inputs[i].height = static_cast<uint32_t>(input_shape[1]);
+    inputs[i].width = static_cast<uint32_t>(input_shape[2]);
+    inputs[i].depth = static_cast<uint32_t>(input_shape[3]);
+    input_info_[i].tensor_u8->SetDtype(DT_UINT8);
+    input_info_[i].tensor_u8->Resize(input_shape);
+
+    const float *input_data = input_tensors[i]->data<float>();
+    uint8_t *input_data_u8 = input_info_[i].tensor_u8->mutable_data<uint8_t>();
+    QuantizeWithScaleAndZeropoint(input_data,
+                                  input_tensors[i]->size(),
+                                  input_info_[i].scale,
+                                  input_info_[i].zero_point,
+                                  input_data_u8);
+
+    inputs[i].data = const_cast<unsigned char *>(
+        reinterpret_cast<const unsigned char *>(
+            input_info_[i].tensor_u8->raw_data()));
+    inputs[i].dataLen = static_cast<int>(input_info_[i].tensor_u8->raw_size());
+    inputs[i].data_valid_len = static_cast<uint32_t>(
+        input_info_[i].tensor_u8->raw_size());
+    inputs[i].unused = 0;
+  }
+
+  for (size_t i = 0; i < num_outputs; ++i) {
+    (*output_tensors)[i]->SetDtype(output_info_[i].data_type);
+    (*output_tensors)[i]->Resize(output_info_[i].shape);
+    output_info_[i].tensor_u8->SetDtype(DT_UINT8);
+    output_info_[i].tensor_u8->Resize(output_info_[i].shape);
+    outputs[i].data = reinterpret_cast<unsigned char *>(
+        output_info_[i].tensor_u8->raw_mutable_data());
+    outputs[i].dataLen =
+        static_cast<int>(output_info_[i].tensor_u8->raw_size());
+  }
+
+  int res = hexagon_hta_nn_execute_new(nn_id_,
+                                       inputs.data(),
+                                       num_inputs,
+                                       outputs.data(),
+                                       num_outputs);
+
+  for (size_t i = 0; i < num_outputs; ++i) {
+    std::vector<uint32_t> output_shape{
+        outputs[i].batches, outputs[i].height, outputs[i].width,
+        outputs[i].depth};
+    MACE_ASSERT(output_shape.size() == output_info_[i].shape.size(),
+                "wrong output shape inferred");
+    for (size_t j = 0; j < output_shape.size(); ++j) {
+      MACE_ASSERT(static_cast<index_t>(output_shape[j])
+                      == output_info_[i].shape[j],
+                  "wrong output shape inferred");
+    }
+    MACE_ASSERT(static_cast<index_t>(outputs[i].data_valid_len)
+                    == (*output_tensors)[i]->raw_size(),
+                "wrong output bytes inferred.");
+
+    const uint8_t *output_data_u8 = output_info_[i].tensor_u8->data<uint8_t>();
+    float *output_data = (*output_tensors)[i]->mutable_data<float>();
+    Dequantize(output_data_u8,
+               output_info_[i].tensor_u8->size(),
+               output_info_[i].scale,
+               output_info_[i].zero_point,
+               output_data);
+  }
+
+  return res == 0;
+}
+
+}  // namespace mace
diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..26ea17bde45da1853efe222e9f7d30baa25d3471
--- /dev/null
+++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h
@@ -0,0 +1,51 @@
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_
+#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_
+
+#include <vector>
+
+#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
+#include "mace/core/tensor.h"
+#include "mace/public/mace.h"
+
+namespace mace {
+
+class HexagonHTAWrapper : public HexagonControlWrapper {
+ public:
+  HexagonHTAWrapper() = default;
+
+  int GetVersion() override;
+  bool Config() override;
+  bool Init() override;
+  bool Finalize() override;
+  bool SetupGraph(const NetDef &net_def,
+                  const unsigned char *model_data) override;
+  bool ExecuteGraph(const Tensor &input_tensor,
+                    Tensor *output_tensor) override;
+  bool ExecuteGraphNew(const std::vector<Tensor *> &input_tensors,
+                       std::vector<Tensor *> *output_tensors) override;
+  bool TeardownGraph() override;
+  void PrintLog() override;
+  void PrintGraph() override;
+  void GetPerfInfo() override;
+  void ResetPerfInfo() override;
+  void SetDebugLevel(int level) override;
+
+  MACE_DISABLE_COPY_AND_ASSIGN(HexagonHTAWrapper);
+};
+}  // namespace mace
+
+#endif  // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_
diff --git a/mace/examples/cli/BUILD.bazel b/mace/examples/cli/BUILD.bazel
index e661c10b80989f3aa4238b0fb40bbad53976f2d4..693009e37f0a5a49fc1ca4ffab771c67de25b7c5 100644
--- a/mace/examples/cli/BUILD.bazel
+++ b/mace/examples/cli/BUILD.bazel
@@ -3,6 +3,7 @@ load(
     "//mace:mace.bzl",
     "if_android",
     "if_hexagon_enabled",
+    "if_hta_enabled",
     "if_opencl_enabled",
     "if_openmp_enabled",
 )
@@ -36,6 +37,8 @@ cc_binary(
         "//mace/utils:utils_hdrs",
     ] + if_hexagon_enabled([
         "//third_party/nnlib:libhexagon",
+    ]) + if_hta_enabled([
+        "//third_party/hta",
     ]),
 )
 
diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc
index 7e485bc65790797fde97516d8418c4ab58035030..0e26b9f5c0bd0ab1db390f26a6b5d8a3b2ece767 100644
--- a/mace/examples/cli/example.cc
+++ b/mace/examples/cli/example.cc
@@ -79,6 +79,8 @@ DeviceType ParseDeviceType(const std::string &device_str) {
     return DeviceType::GPU;
   } else if (device_str.compare("HEXAGON") == 0) {
     return DeviceType::HEXAGON;
+  } else if (device_str.compare("HTA") == 0) {
+    return DeviceType::HTA;
   } else {
     return DeviceType::CPU;
   }
diff --git a/mace/libmace/BUILD.bazel b/mace/libmace/BUILD.bazel
index 29127a1499de0dc7c0e7a5d464cc5e1371976731..36eff0c80a76c3adb0b9e8738281974bf1aa2280 100644
--- a/mace/libmace/BUILD.bazel
+++ b/mace/libmace/BUILD.bazel
@@ -16,6 +16,7 @@ load(
     "if_openmp_enabled",
     "if_android_armv7",
     "if_hexagon_enabled",
+    "if_hta_enabled",
     "if_opencl_enabled",
     "if_quantize_enabled",
 )
@@ -40,6 +41,8 @@ cc_library(
         "-DMACE_ENABLE_QUANTIZE",
     ]) + if_hexagon_enabled([
         "-DMACE_ENABLE_HEXAGON",
+    ]) + if_hta_enabled([
+        "-DMACE_ENABLE_HTA",
     ]),
     deps = [
         "//mace/ops",
diff --git a/mace/libmace/capability.cc b/mace/libmace/capability.cc
index c9dff5dc73782d6831a9b4a59d0e9aa22ada2e99..d37a62b6616b03bc476e7549b4e1b5d73357148d 100644
--- a/mace/libmace/capability.cc
+++ b/mace/libmace/capability.cc
@@ -142,7 +142,7 @@ void BMNet::SetUp() {
 
   // Add input and output information
   for (size_t i = 0; i < input_names_.size(); ++i) {
-    InputInfo *info = net_.add_input_info();
+    InputOutputInfo *info = net_.add_input_info();
     info->set_data_format(DataFormat::NHWC);
     info->set_name(input_names_[i]);
     for (auto d : input_shapes_[i]) {
@@ -150,7 +150,7 @@ void BMNet::SetUp() {
     }
   }
   for (auto output_name : output_names_) {
-    OutputInfo *info = net_.add_output_info();
+    InputOutputInfo *info = net_.add_output_info();
     info->set_name(output_name);
   }
   // allocate weight data
diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc
index 78991a71c74f206abdfe4cf8d547b2fd6d6b2826..2b626cf51320d11e8e50e2494b70913e268ffe57 100644
--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
@@ -33,10 +33,9 @@
 #include "mace/core/runtime/opencl/opencl_runtime.h"
 #endif  // MACE_ENABLE_OPENCL
 
-#ifdef MACE_ENABLE_HEXAGON
-#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
 #include "mace/core/runtime/hexagon/hexagon_device.h"
-#endif  // MACE_ENABLE_HEXAGON
+#endif
 
 namespace mace {
 namespace {
@@ -387,11 +386,11 @@ class MaceEngine::Impl {
   std::unique_ptr<Workspace> ws_;
   std::unique_ptr<NetBase> net_;
   bool is_quantized_model_;
-#ifdef MACE_ENABLE_HEXAGON
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
   std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
 #endif
-  std::map<std::string, mace::InputInfo> input_info_map_;
-  std::map<std::string, mace::OutputInfo> output_info_map_;
+  std::map<std::string, mace::InputOutputInfo> input_info_map_;
+  std::map<std::string, mace::InputOutputInfo> output_info_map_;
 
   MACE_DISABLE_COPY_AND_ASSIGN(Impl);
 };
@@ -404,7 +403,7 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
       ws_(new Workspace()),
       net_(nullptr),
       is_quantized_model_(false)
-#ifdef MACE_ENABLE_HEXAGON
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
       , hexagon_controller_(nullptr)
 #endif
 {
@@ -427,9 +426,9 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
         config.impl_->use_gemmlowp()));
   }
 #endif
-#ifdef MACE_ENABLE_HEXAGON
-  if (device_type_ == DeviceType::HEXAGON) {
-    device_.reset(new HexagonDevice());
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
+  if (device_type_ == DeviceType::HEXAGON || device_type_ == DeviceType::HTA) {
+    device_.reset(new HexagonDevice(device_type_));
   }
 #endif
   MACE_CHECK_NOTNULL(device_);
@@ -481,13 +480,13 @@ MaceStatus MaceEngine::Impl::Init(
                  << "' does not belong to model's outputs "
                  << MakeString(MapKeys(output_info_map_));
     }
-#ifdef MACE_ENABLE_HEXAGON
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
     ws_->CreateTensor(output_name, device_->allocator(), DT_FLOAT);
 #endif
   }
-#ifdef MACE_ENABLE_HEXAGON
-  if (device_type_ == HEXAGON) {
-    hexagon_controller_.reset(new HexagonControlWrapper());
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
+  if (device_type_ == HEXAGON || device_type_ == HTA) {
+    hexagon_controller_ = CreateHexagonControlWrapper(device_type_);
     MACE_CHECK(hexagon_controller_->Config(), "hexagon config error");
     MACE_CHECK(hexagon_controller_->Init(), "hexagon init error");
     hexagon_controller_->SetDebugLevel(
@@ -519,7 +518,7 @@ MaceStatus MaceEngine::Impl::Init(
       ws_->RemoveAndReloadBuffer(*net_def, model_data, device_->allocator());
     }
     MACE_RETURN_IF_ERROR(net_->Init());
-#ifdef MACE_ENABLE_HEXAGON
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
   }
 #endif
 
@@ -541,6 +540,7 @@ MaceStatus MaceEngine::Impl::Init(
         reinterpret_cast<const unsigned char *>(model_data_->data())));
 
   if (device_type_ == DeviceType::GPU || device_type_ == DeviceType::HEXAGON ||
+      device_type_ == DeviceType::HTA ||
       (device_type_ == DeviceType::CPU && ws_->diffused_buffer())) {
     model_data_.reset();
   }
@@ -549,8 +549,8 @@ MaceStatus MaceEngine::Impl::Init(
 
 MaceEngine::Impl::~Impl() {
   LOG(INFO) << "Destroying MaceEngine";
-#ifdef MACE_ENABLE_HEXAGON
-  if (device_type_ == HEXAGON) {
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
+  if (device_type_ == HEXAGON || device_type_ == HTA) {
     if (VLOG_IS_ON(2)) {
       hexagon_controller_->GetPerfInfo();
       hexagon_controller_->PrintLog();
@@ -699,15 +699,15 @@ MaceStatus MaceEngine::Impl::Run(
     Tensor *output_tensor = ws_->GetTensor(output.first);
     output_tensors.push_back(output_tensor);
   }
-#ifdef MACE_ENABLE_HEXAGON
-  if (device_type_ == HEXAGON) {
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
+  if (device_type_ == HEXAGON || device_type_ == HTA) {
     MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1,
                "HEXAGON not support multiple inputs and outputs yet.");
-    hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors, true);
+    hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors);
   } else {
 #endif
     MACE_RETURN_IF_ERROR(net_->Run(run_metadata));
-#ifdef MACE_ENABLE_HEXAGON
+#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
   }
 #endif
 
diff --git a/mace/libmace/mace_version_script.lds b/mace/libmace/mace_version_script.lds
index 88f748edcd629658d107cb59e93e35d231309d3e..a088736de4d1e6c0ab07a397ae5d4164689726b7 100644
--- a/mace/libmace/mace_version_script.lds
+++ b/mace/libmace/mace_version_script.lds
@@ -15,8 +15,7 @@ mace {
     *mace*NetDef*;
     *mace*MemoryType*;
     *mace*DataType*;
-    *mace*InputInfo*;
-    *mace*OutputInfo*;
+    *mace*InputOutputInfo*;
     *mace*OutputShape*;
     *mace*OperatorDef*;
     *mace*ConstTensor*;
diff --git a/mace/mace.bzl b/mace/mace.bzl
index ee9f8c59dc94ab15dd0749205c4630ce9f4b1ce4..1f577e7e47d02f6ce23391205110687b49d1efdf 100644
--- a/mace/mace.bzl
+++ b/mace/mace.bzl
@@ -60,6 +60,19 @@ def if_not_hexagon_enabled(a):
       "//conditions:default": a,
   })
 
+def if_hta_enabled(a):
+  return select({
+      "//mace:hta_enabled": a,
+      "//conditions:default": [],
+  })
+
+def if_hexagon_or_hta_enabled(a):
+  return select({
+      "//mace:hexagon_enabled": a,
+      "//mace:hta_enabled": a,
+      "//conditions:default": [],
+  })
+
 def if_openmp_enabled(a):
   return select({
       "//mace:openmp_enabled": a,
diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto
index 530de3aedfcd6a94d9ee840f8e368a4447d6cd8c..d3b564fc6a9de2b7b79f9c73df53b3fa9e310788 100644
--- a/mace/proto/mace.proto
+++ b/mace/proto/mace.proto
@@ -86,21 +86,15 @@ message OperatorDef {
 }
 
 // for hexagon mace-nnlib
-message InputInfo {
-  optional string name = 1;
-  optional int32 node_id = 2;
-  repeated int32 dims = 3;
-  optional int32 max_byte_size = 4;  // only support 32-bit len
-  optional DataType data_type = 5 [default = DT_FLOAT];
-  optional int32 data_format = 6 [default = 1];  // NHWC
-}
-message OutputInfo {
+message InputOutputInfo {
   optional string name = 1;
   optional int32 node_id = 2;
   repeated int32 dims = 3;
   optional int32 max_byte_size = 4;  // only support 32-bit len
   optional DataType data_type = 5 [default = DT_FLOAT];
   optional int32 data_format = 6 [default = 1];  // NHWC
+  optional float scale = 7;
+  optional int32 zero_point = 8;
 }
 
 message NetDef {
@@ -109,6 +103,6 @@ message NetDef {
   repeated ConstTensor tensors = 3;
 
   // for hexagon mace-nnlib
-  repeated InputInfo input_info = 100;
-  repeated OutputInfo output_info = 101;
+  repeated InputOutputInfo input_info = 100;
+  repeated InputOutputInfo output_info = 101;
 }
diff --git a/mace/public/mace.h b/mace/public/mace.h
index 3d210d3801bd899a5ad27951f61a898648845096..1e0bdc187b11b898db7b5c4430c26b3452a35998 100644
--- a/mace/public/mace.h
+++ b/mace/public/mace.h
@@ -32,7 +32,7 @@ namespace mace {
 
 class NetDef;
 
-enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 };
+enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3, HTA = 4 };
 
 enum DataFormat { DF_NONE = 0, NHWC = 1, NCHW = 2};
 
diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py
index 6d2e31d3de67502b77ac76f051d90a2bc0678659..bd588d0cabde72bb5dd567c52fbeb7d997f826fa 100644
--- a/mace/python/tools/converter.py
+++ b/mace/python/tools/converter.py
@@ -37,6 +37,7 @@ FLAGS = None
 device_type_map = {'cpu': cvt.DeviceType.CPU.value,
                    'gpu': cvt.DeviceType.GPU.value,
                    'dsp': cvt.DeviceType.HEXAGON.value,
+                   'hta': cvt.DeviceType.HTA.value,
                    'cpu+gpu': cvt.DeviceType.CPU.value}
 
 data_format_map = {
@@ -53,10 +54,11 @@ def parse_data_type(data_type, device_type):
             return mace_pb2.DT_FLOAT
         else:
             return mace_pb2.DT_HALF
-    elif device_type == cvt.DeviceType.HEXAGON.value:
+    elif device_type == cvt.DeviceType.HEXAGON.value or \
+            device_type == cvt.DeviceType.HTA.value:
         return mace_pb2.DT_FLOAT
     else:
-        print("Invalid device type: " + device_type)
+        print("Invalid device type: " + str(device_type))
 
 
 def file_checksum(fname):
@@ -121,7 +123,7 @@ def main(unused_args):
         six.print_("platform %s is not supported." % FLAGS.platform,
                    file=sys.stderr)
         sys.exit(-1)
-    if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'cpu+gpu']:
+    if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'hta', 'cpu+gpu']:
         six.print_("runtime %s is not supported." % FLAGS.runtime,
                    file=sys.stderr)
         sys.exit(-1)
@@ -220,7 +222,8 @@ def main(unused_args):
         option, output_graph_def)
     output_graph_def, quantize_activation_info = mace_transformer.run()
 
-    if FLAGS.runtime == 'dsp':
+    if option.device in [cvt.DeviceType.HEXAGON.value,
+                         cvt.DeviceType.HTA.value]:
         from mace.python.tools.converter_tool import hexagon_converter
         converter = hexagon_converter.HexagonConverter(
             option, output_graph_def, quantize_activation_info)
diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py
index 204b1d21fe79d0cd0c266933f2b4d80591eaf24e..fbfb1b0239693c05d14236ec841f073654090062 100644
--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -22,6 +22,7 @@ class DeviceType(Enum):
     CPU = 0
     GPU = 2
     HEXAGON = 3
+    HTA = 4
 
 
 class DataFormat(Enum):
diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py
index 081adde9c0881caa57bc81a9aaa8118c8e77bcf4..53598243b247094ce43b5a832b65d1498c796547 100644
--- a/mace/python/tools/converter_tool/hexagon_converter.py
+++ b/mace/python/tools/converter_tool/hexagon_converter.py
@@ -20,6 +20,7 @@ from operator import mul
 from mace.proto import mace_pb2
 from mace.python.tools.converter_tool import base_converter
 from mace.python.tools.converter_tool.base_converter import ConverterUtil
+from mace.python.tools.converter_tool.base_converter import DeviceType
 from mace.python.tools.converter_tool.base_converter import EltwiseType
 from mace.python.tools.converter_tool.base_converter import MaceKeyword
 from mace.python.tools.converter_tool.base_converter import MaceOp
@@ -36,6 +37,8 @@ HexagonSupportedOps = [
     'BatchToSpaceND_8',
     'DepthwiseSupernode_8x8p32to8',
     'DequantizeOUTPUT_8tof',
+    'INPUT',
+    'OUTPUT',
     'QuantizedAdd_8p8to8',
     'QuantizedAvgPool_8',
     'QuantizedConcat_8',
@@ -332,7 +335,7 @@ class HexagonConverter(base_converter.ConverterInterface):
             else:
                 op.type = self._hexagon_ops.map_nn_op(op.type)
 
-    def add_min_max(self, name, val):
+    def add_const_node(self, name, val):
         if name not in self._consts:
             tensor = self._model.tensors.add()
             self._consts[name] = tensor
@@ -364,14 +367,14 @@ class HexagonConverter(base_converter.ConverterInterface):
                 min_tensor_name = op + ':1'
             else:
                 min_tensor_name = op + '_min:0'
-                self.add_min_max(min_tensor_name, minval)
+                self.add_const_node(min_tensor_name, minval)
             this_op.input.extend([min_tensor_name])
         if add_max:
             if is_activation and diff_port:
                 max_tensor_name = op + ':2'
             else:
                 max_tensor_name = op + '_max:0'
-                self.add_min_max(max_tensor_name, maxval)
+                self.add_const_node(max_tensor_name, maxval)
             this_op.input.extend([max_tensor_name])
 
     def add_shape_const_node(self, op, values, name):
@@ -382,27 +385,48 @@ class HexagonConverter(base_converter.ConverterInterface):
         tensor.dims.extend(values)
         return tensor.name
 
-    def add_input_output_node(self):
-        for op in self._model.op:
-            if op.name.startswith(MaceKeyword.mace_input_node_name):
-                del op.input[0]
-                break
+    def add_constant_min_max_for_first_op(self, op):
+        minval = self._quantize_activation_info[op.input[0]].minval
+        maxval = self._quantize_activation_info[op.input[0]].maxval
+        input_op, _ = get_op_and_port_from_tensor(op.input[0])
+        input_min = input_op + '_min:0'
+        input_max = input_op + '_max:0'
+        self.add_const_node(input_min, minval)
+        self.add_const_node(input_max, maxval)
+        for i in range(len(op.input)):
+            if op.input[i] == input_op + ':1':
+                op.input[i] = input_min
+            elif op.input[i] == input_op + ':2':
+                op.input[i] = input_max
 
-        output_node = None
-        if not self._option.check_nodes:
-            output_name = list(self._option.output_nodes.values())[0].name
-        else:
-            output_name = list(self._option.check_nodes.values())[0].name
-        output_name = normalize_name(output_name)
-        for op in self._model.op:
-            if op.name == output_name:
-                output_node = op
-                break
-        mace_check(output_node is not None,
-                   "mace_output_node_* not found.")
-        del output_node.output_shape[:]
-        del output_node.output_type[:]
-        del output_node.out_max_byte_size[:]
+    def add_input_output_node(self):
+        mace_check(
+            self._model.op[0].type == HexagonOp.QuantizeINPUT_f_to_8.name,
+            "Not started with Quantize op.")
+        quantize_input_op = self._model.op[0]
+        del quantize_input_op.input[:]
+
+        mace_check(
+            self._model.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name,
+            "Not ended with Dequantize op.")
+        dequantize_output_op = self._model.op[-1]
+        del dequantize_output_op.output_shape[:]
+        del dequantize_output_op.output_type[:]
+        del dequantize_output_op.out_max_byte_size[:]
+
+        if self._option.device == DeviceType.HTA.value:
+            # replace QuantizeINPUT_f_to_8 with INPUT
+            quantize_input_op.type = HexagonOp.INPUT.name
+            del quantize_input_op.output_shape[1:]
+            del quantize_input_op.output_type[1:]
+            del quantize_input_op.out_max_byte_size[1:]
+
+            # replace first op's input min max with constant
+            self.add_constant_min_max_for_first_op(self._model.op[1])
+
+            # replace DequantizeOUTPUT_8tof with OUTPUT
+            dequantize_output_op.type = HexagonOp.OUTPUT.name
+            del dequantize_output_op.input[1:]
 
     def add_node_id(self):
         node_id_counter = 0
diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py
index 02fc3a12ace0808e083482ea93ae9fe2d6a7c65a..a3976913c76e70b5fcc48e498c0a3683c78c7005 100644
--- a/mace/python/tools/converter_tool/transformer.py
+++ b/mace/python/tools/converter_tool/transformer.py
@@ -1174,7 +1174,8 @@ class Transformer(base_converter.ConverterInterface):
 
             self.set_filter_format(FilterFormat.OHWI)
         elif self._option.quantize and \
-                self._option.device == DeviceType.HEXAGON.value:
+                (self._option.device == DeviceType.HEXAGON.value or
+                 self._option.device == DeviceType.HTA.value):
             print("Transpose filters to HWIO/HWIM")
             mace_check(filter_format == FilterFormat.HWIO,
                        "HEXAGON only support HWIO/HWIM filter format.")
@@ -1456,7 +1457,7 @@ class Transformer(base_converter.ConverterInterface):
                            % (op.name, op.type,
                               mace_pb2.DataType.Name(data_type_arg.i)))
 
-        for input_node in self._option.input_nodes.values():
+        for i, input_node in enumerate(self._option.input_nodes.values()):
             new_input_name = self.input_name_map[input_node.name]
             op_def = self._model.op.add()
             op_def.name = self.normalize_op_name(new_input_name)
@@ -1465,8 +1466,10 @@ class Transformer(base_converter.ConverterInterface):
             op_def.output.extend([new_input_name])
             output_shape = op_def.output_shape.add()
             output_shape.dims.extend(input_node.shape)
-            self.copy_quantize_info(
-                op_def, self._quantize_activation_info[new_input_name])
+            quantize_info = self._quantize_activation_info[new_input_name]
+            self.copy_quantize_info(op_def, quantize_info)
+            self._model.input_info[i].scale = quantize_info.scale
+            self._model.input_info[i].zero_point = quantize_info.zero_point
 
             ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
             ConverterUtil.add_data_format_arg(op_def, DataFormat.NHWC)
@@ -1477,16 +1480,19 @@ class Transformer(base_converter.ConverterInterface):
             find_range_every_time_arg.i = 1
 
         output_nodes = self._option.check_nodes.values()
-        for output_node in output_nodes:
+        for i, output_node in enumerate(output_nodes):
             op_def = self._model.op.add()
             op_def.name = self.normalize_op_name(output_node.name)
             op_def.type = MaceOp.Dequantize.name
             op_def.input.extend([self.output_name_map[output_node.name]])
             op_def.output.extend([output_node.name])
             output_shape = op_def.output_shape.add()
-            output_shape.dims.extend(
-                self._producer[output_node.name].output_shape[0].dims)
+            producer_op = self._producer[output_node.name]
+            output_shape.dims.extend(producer_op.output_shape[0].dims)
             op_def.output_type.extend([mace_pb2.DT_FLOAT])
+            quantize_info = producer_op.quantize_info[0]
+            self._model.output_info[i].scale = quantize_info.scale
+            self._model.output_info[i].zero_point = quantize_info.zero_point
 
             ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
 
@@ -1533,7 +1539,8 @@ class Transformer(base_converter.ConverterInterface):
                     quantized_tensor = \
                         quantize_util.quantize_with_scale_and_zero(
                             tensor.float_data, scale, 0)
-                elif self._option.device == DeviceType.HEXAGON.value:
+                elif self._option.device == DeviceType.HEXAGON.value or \
+                        self._option.device == DeviceType.HTA.value:
                     quantized_tensor = \
                         quantize_util.quantize_bias_for_hexagon(
                             tensor.float_data)
@@ -1691,7 +1698,7 @@ class Transformer(base_converter.ConverterInterface):
             return False
 
         print("Add default quantize info for input")
-        for input_node in self._option.input_nodes.values():
+        for i, input_node in enumerate(self._option.input_nodes.values()):
             if input_node.name not in self._quantize_activation_info:
                 print("Input range %s: %s" % (input_node.name,
                                               str(input_node.range)))
diff --git a/mace/python/tools/model.jinja2 b/mace/python/tools/model.jinja2
index c7d936c0318527423e8b251b06647048c446a17a..89bee8d8f9dba8ce27ff97ff016381eb7b9da5e7 100644
--- a/mace/python/tools/model.jinja2
+++ b/mace/python/tools/model.jinja2
@@ -75,7 +75,7 @@ void CreateNetArg(NetDef *net_def) {
 {% if net.input_info | length > 0 %}
 void CreateInputInfo(NetDef *net_def) {
   net_def->mutable_input_info()->Reserve({{ net.input_info | length }});
-  InputInfo *input_info = nullptr;
+  InputOutputInfo *input_info = nullptr;
   {% for idx in range(net.input_info|length) %}
   input_info = net_def->add_input_info();
   input_info->set_name({{ net.input_info[idx].name|tojson }});
@@ -92,7 +92,7 @@ void CreateInputInfo(NetDef *net_def) {
 {% if net.output_info | length > 0 %}
 void CreateOutputInfo(NetDef *net_def) {
   net_def->mutable_output_info()->Reserve({{ net.output_info | length }});
-  OutputInfo *output_info = nullptr;
+  InputOutputInfo *output_info = nullptr;
   {% for idx in range(net.output_info|length) %}
   output_info = net_def->add_output_info();
   output_info->set_name({{ net.output_info[idx].name|tojson }});
diff --git a/mace/test/BUILD.bazel b/mace/test/BUILD.bazel
index 36a2b6472d46db4360b1840b6031f32f94212e40..a5c5f974552dd13b35faff26f7e14266e042b3fc 100644
--- a/mace/test/BUILD.bazel
+++ b/mace/test/BUILD.bazel
@@ -11,6 +11,7 @@ load(
     "if_openmp_enabled",
     "if_android_armv7",
     "if_hexagon_enabled",
+    "if_hta_enabled",
     "if_opencl_enabled",
     "if_quantize_enabled",
 )
@@ -45,6 +46,8 @@ cc_test(
         "-DMACE_ENABLE_QUANTIZE",
     ]) + if_hexagon_enabled([
         "-DMACE_ENABLE_HEXAGON",
+    ]) + if_hta_enabled([
+        "-DMACE_ENABLE_HTA",
     ]),
     linkopts = ["-fopenmp"],
     linkstatic = 1,
@@ -78,6 +81,8 @@ cc_test(
         "-DMACE_ENABLE_QUANTIZE",
     ]) + if_hexagon_enabled([
         "-DMACE_ENABLE_HEXAGON",
+    ]) + if_hta_enabled([
+        "-DMACE_ENABLE_HTA",
     ]),
     linkopts = ["-fopenmp"],
     linkstatic = 1,
@@ -111,6 +116,8 @@ cc_test(
         "-DMACE_ENABLE_QUANTIZE",
     ]) + if_hexagon_enabled([
         "-DMACE_ENABLE_HEXAGON",
+    ]) + if_hta_enabled([
+        "-DMACE_ENABLE_HTA",
     ]),
     linkopts = ["-fopenmp"],
     linkstatic = 1,
@@ -143,6 +150,8 @@ cc_test(
         "-DMACE_ENABLE_QUANTIZE",
     ]) + if_hexagon_enabled([
         "-DMACE_ENABLE_HEXAGON",
+    ]) + if_hta_enabled([
+        "-DMACE_ENABLE_HTA",
     ]),
     linkopts = ["-fopenmp"],
     linkstatic = 1,
diff --git a/mace/test/mace_api_exception_test.cc b/mace/test/mace_api_exception_test.cc
index 075b04b40c7467d2d6a6dff10b6cb245521b68f5..232023dace17584f49c15a499b196c538f6598eb 100644
--- a/mace/test/mace_api_exception_test.cc
+++ b/mace/test/mace_api_exception_test.cc
@@ -29,7 +29,7 @@ TEST(MaceAPIExceptionTest, WrongInputTest) {
 
   std::shared_ptr<NetDef> net_def(new NetDef());
   for (size_t i = 0; i < input_names.size(); ++i) {
-    InputInfo *info = net_def->add_input_info();
+    InputOutputInfo *info = net_def->add_input_info();
     info->set_name(input_names[i]);
   }
 
diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc
index f13d05b621c9d32e659b3b908b7fe85836112b7a..ee14129a05dd23d7d2fa6b3bcc491da375c12096 100644
--- a/mace/test/mace_api_mt_test.cc
+++ b/mace/test/mace_api_mt_test.cc
@@ -45,7 +45,7 @@ void MaceRunFunc(const int in_out_size) {
       filter_tensor_name, filter_shape, 0, data.size(), net_def.get());
 
   for (size_t i = 0; i < input_names.size(); ++i) {
-    InputInfo *info = net_def->add_input_info();
+    InputOutputInfo *info = net_def->add_input_info();
     info->set_data_format(DataFormat::NHWC);
     info->set_name(input_names[i]);
     for (auto d : input_shapes[0]) {
@@ -53,7 +53,7 @@ void MaceRunFunc(const int in_out_size) {
     }
   }
   for (size_t i = 0; i < output_names.size(); ++i) {
-    OutputInfo *info = net_def->add_output_info();
+    InputOutputInfo *info = net_def->add_output_info();
     info->set_name(output_names[i]);
   }
   for (size_t i = 0; i < output_names.size(); ++i) {
diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc
index baff89112786ea9ac569f06007c96d81cffa6bd7..0a852a17a9a9cfd6a7d331556b1ad1b1a85e397a 100644
--- a/mace/test/mace_api_test.cc
+++ b/mace/test/mace_api_test.cc
@@ -44,7 +44,7 @@ void MaceRun(const int in_out_size,
   AddTensor<T>(filter_tensor_name, filter_shape, 0, data.size(), net_def.get());
 
   for (size_t i = 0; i < input_names.size(); ++i) {
-    InputInfo *info = net_def->add_input_info();
+    InputOutputInfo *info = net_def->add_input_info();
     info->set_data_format(DataFormat::NHWC);
     info->set_name(input_names[i]);
     for (auto d : max_shape) {
@@ -52,7 +52,7 @@ void MaceRun(const int in_out_size,
     }
   }
   for (size_t i = 0; i < output_names.size(); ++i) {
-    OutputInfo *info = net_def->add_output_info();
+    InputOutputInfo *info = net_def->add_output_info();
     info->set_name(output_names[i]);
   }
   for (size_t i = 0; i < output_names.size(); ++i) {
diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc
index 4dd885a55992300f83c268fc704975272a4ae71d..d1139e519cd78d5e815f3da66808567a310da039 100644
--- a/mace/tools/validation/mace_run.cc
+++ b/mace/tools/validation/mace_run.cc
@@ -76,6 +76,8 @@ DeviceType ParseDeviceType(const std::string &device_str) {
     return DeviceType::GPU;
   } else if (device_str.compare("HEXAGON") == 0) {
     return DeviceType::HEXAGON;
+  } else if (device_str.compare("HTA") == 0) {
+    return DeviceType::HTA;
   } else {
     return DeviceType::CPU;
   }
diff --git a/third_party/hta/BUILD b/third_party/hta/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..7385472755eab0a1fb75df4bb089a63aa01e110e
--- /dev/null
+++ b/third_party/hta/BUILD
@@ -0,0 +1,31 @@
+# These files are generated fron nnlib project
+
+licenses(["notice"])
+
+exports_files(["license.txt"])
+
+load(
+    "//mace:mace.bzl",
+    "if_android_armv7",
+    "if_android_arm64",
+)
+
+cc_library(
+    name = "hta",
+    srcs = if_android_armv7([
+        "armeabi-v7a/libhta_controller.so",
+        "armeabi-v7a/libhta_hexagon_runtime.so",
+        "armeabi-v7a/libnpu.so",
+    ]) + if_android_arm64([
+        "arm64-v8a/libcdsprpc.so",
+        "arm64-v8a/libhta_controller.so",
+        "arm64-v8a/libhta_hexagon_runtime.so",
+        "arm64-v8a/libnpu.so",
+    ]),
+    hdrs = [
+        "hta_hexagon_api.h",
+        "hta_hexagon_nn_ops.h",
+        "hta_ops.h",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/hta/arm64-v8a/libcdsprpc.so b/third_party/hta/arm64-v8a/libcdsprpc.so
new file mode 100755
index 0000000000000000000000000000000000000000..57de01f4887197b0b510f395f828289d74597069
Binary files /dev/null and b/third_party/hta/arm64-v8a/libcdsprpc.so differ
diff --git a/third_party/hta/arm64-v8a/libhta_controller.so b/third_party/hta/arm64-v8a/libhta_controller.so
new file mode 100644
index 0000000000000000000000000000000000000000..3cb5ea31a24d319779521454720c3b587120d2e0
Binary files /dev/null and b/third_party/hta/arm64-v8a/libhta_controller.so differ
diff --git a/third_party/hta/arm64-v8a/libhta_hexagon_runtime.so b/third_party/hta/arm64-v8a/libhta_hexagon_runtime.so
new file mode 100644
index 0000000000000000000000000000000000000000..32b5d784a19a6390ffe25f4c4e4853172b4d5074
Binary files /dev/null and b/third_party/hta/arm64-v8a/libhta_hexagon_runtime.so differ
diff --git a/third_party/hta/arm64-v8a/libnpu.so b/third_party/hta/arm64-v8a/libnpu.so
new file mode 100644
index 0000000000000000000000000000000000000000..9b6633769db106f516ac7cfebea0b40b491996e1
Binary files /dev/null and b/third_party/hta/arm64-v8a/libnpu.so differ
diff --git a/third_party/hta/armeabi-v7a/libhta_controller.so b/third_party/hta/armeabi-v7a/libhta_controller.so
new file mode 100644
index 0000000000000000000000000000000000000000..03b267889d96e74b965fd485313d35ce59b8bc97
Binary files /dev/null and b/third_party/hta/armeabi-v7a/libhta_controller.so differ
diff --git a/third_party/hta/armeabi-v7a/libhta_hexagon_runtime.so b/third_party/hta/armeabi-v7a/libhta_hexagon_runtime.so
new file mode 100644
index 0000000000000000000000000000000000000000..9136f520d74901ca068c5377eccb578978ca9fa6
Binary files /dev/null and b/third_party/hta/armeabi-v7a/libhta_hexagon_runtime.so differ
diff --git a/third_party/hta/armeabi-v7a/libnpu.so b/third_party/hta/armeabi-v7a/libnpu.so
new file mode 100644
index 0000000000000000000000000000000000000000..a88605929cfdca12ecd720749064d880a6d48ab4
Binary files /dev/null and b/third_party/hta/armeabi-v7a/libnpu.so differ
diff --git a/third_party/hta/hta_hexagon_api.h b/third_party/hta/hta_hexagon_api.h
new file mode 100644
index 0000000000000000000000000000000000000000..cb13fe62bcd8bbdcb8f50f4dfb725df292aa87fd
--- /dev/null
+++ b/third_party/hta/hta_hexagon_api.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted (subject to the limitations in the
+ * disclaimer below) provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *    * Neither the name of The Linux Foundation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+ * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef THIRD_PARTY_HTA_HEXAGON_API_H_
+#define THIRD_PARTY_HTA_HEXAGON_API_H_
+
+#include "hta_hexagon_nn_ops.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int hexagon_hta_nn_nn_id;
+
+struct input {
+	uint32_t src_id;
+	uint32_t output_idx;
+};
+
+#define NODE_ID_RESERVED_CONSTANT 0
+
+#define MAX_DIMENSIONS 8
+struct output {
+	uint32_t rank; // dimensions in the tensor
+	uint32_t max_sizes[MAX_DIMENSIONS]; // max num elements in each dimension
+	uint32_t elementsize; // size of each element
+	int32_t zero_offset; // 0 for float / integer values
+	float stepsize; // 0 for float/integer values
+};
+
+struct perfinfo {
+	uint32_t node_id;
+	uint32_t executions;
+	union {
+		uint64_t counter;
+		struct {
+			uint32_t counter_lo;
+			uint32_t counter_hi;
+		};
+	};
+};
+
+typedef struct input hexagon_hta_nn_input;
+typedef struct output hexagon_hta_nn_output;
+typedef struct perfinfo hexagon_hta_nn_perfinfo;
+typedef int32_t hexagon_hta_nn_padding_type;
+
+typedef enum padding_type_enum {
+	HTA_NN_PAD_NA = 0,
+	HTA_NN_PAD_SAME,
+	HTA_NN_PAD_VALID,
+	HTA_NN_PAD_MIRROR_REFLECT,
+	HTA_NN_PAD_MIRROR_SYMMETRIC,
+	HTA_NN_PAD_SAME_CAFFE,
+} hta_padding_type;
+
+typedef struct {
+	unsigned int batches;
+	unsigned int height;
+	unsigned int width;
+	unsigned int depth;
+	unsigned char *data;
+	int dataLen;		/* For input and output */
+	unsigned int data_valid_len; /* for output only */
+	unsigned int unused;
+} hexagon_hta_nn_tensordef;
+
+typedef struct hexagon_nn_op_node hexagon_nn_op_node;
+struct hexagon_nn_op_node {
+  unsigned int node_id;
+  hta_op_type operation;
+  hta_padding_type padding;
+  hexagon_hta_nn_input* inputs;
+  int inputsLen;
+  hexagon_hta_nn_output* outputs;
+  int outputsLen;
+};
+typedef struct hexagon_nn_const_node hexagon_nn_const_node;
+struct hexagon_nn_const_node {
+  unsigned int node_id;
+  hexagon_hta_nn_tensordef tensor;
+};
+
+/* Actual functions in the interface */
+/* Returns 0 on success, nonzero on error unless otherwise noted */
+/* Configure the hardware and software environment.  Should be called once before doing anything */
+int hexagon_hta_nn_config( void );
+
+/* Initialize a new graph, returns a new nn_id or -1 on error */
+int hexagon_hta_nn_init(hexagon_hta_nn_nn_id *g);
+
+/* Set debug verbosity.  Default is 0, higher values are more verbose */
+int hexagon_hta_nn_set_debug_level(hexagon_hta_nn_nn_id id, int level);
+
+/* Append a node to the graph.  Nodes are executed in the appended order. */
+int hexagon_hta_nn_append_node(
+	hexagon_hta_nn_nn_id id,
+	uint32_t node_id,
+	hta_op_type operation,
+	hta_padding_type padding,
+	const struct input *inputs,
+	uint32_t num_inputs,
+	const struct output *outputs,
+	uint32_t num_outputs);
+
+/*
+ * Append a const node into the graph.  The data is copied locally during this
+ * call, the caller does not need it to persist.
+ */
+int hexagon_hta_nn_append_const_node(
+	hexagon_hta_nn_nn_id id,
+	uint32_t node_id,
+	uint32_t batches,
+	uint32_t height,
+	uint32_t width,
+	uint32_t depth,
+	const uint8_t *data,
+	uint32_t data_len);
+
+/*
+ * Prepare a graph for execution.  Must be done before attempting to execute the graph.
+ */
+int hexagon_hta_nn_prepare(hexagon_hta_nn_nn_id id);
+
+/* Execute the graph with a single input and a single output. */
+int hexagon_hta_nn_execute(
+	hexagon_hta_nn_nn_id id,
+	uint32_t batches_in,
+	uint32_t height_in,
+	uint32_t width_in,
+	uint32_t depth_in,
+	const uint8_t *data_in,
+	uint32_t data_len_in,
+	uint32_t *batches_out,
+	uint32_t *height_out,
+	uint32_t *width_out,
+	uint32_t *depth_out,
+	uint8_t *data_out,
+	uint32_t data_out_max,
+	uint32_t *data_out_size);
+
+/* Tear down a graph, destroying it and freeing resources.  */
+int hexagon_hta_nn_teardown(hexagon_hta_nn_nn_id id);
+
+/* Get the version of the library */
+int hexagon_hta_nn_version(int *ver);
+
+/* Execute the graph with a multiple input and a multiple output. */
+int hexagon_hta_nn_execute_new(
+	hexagon_hta_nn_nn_id id,
+	const hexagon_hta_nn_tensordef *inputs,
+	uint32_t n_inputs,
+	hexagon_hta_nn_tensordef *outputs,
+	uint32_t n_outputs);
+
+int hexagon_hta_nn_serialize_size(hexagon_hta_nn_nn_id id, unsigned int *serialized_obj_size_out);
+int hexagon_hta_nn_serialize(hexagon_hta_nn_nn_id id, void *buf, unsigned int buf_len);
+int hexagon_hta_nn_deserialize(void *buf, unsigned len, hexagon_hta_nn_nn_id *g);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //THIRD_PARTY_HTA_HEXAGON_API_H_
diff --git a/third_party/hta/hta_hexagon_nn_ops.h b/third_party/hta/hta_hexagon_nn_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..e2aaa5881c842d12892d21dead102efad08df270
--- /dev/null
+++ b/third_party/hta/hta_hexagon_nn_ops.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted (subject to the limitations in the
+ * disclaimer below) provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *    * Neither the name of The Linux Foundation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+ * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef THIRD_PARTY_HTA_HEXAGON_NN_OPS_H_
+#define THIRD_PARTY_HTA_HEXAGON_NN_OPS_H_
+
+typedef enum hta_op_type_enum {
+#define HTA_DEF_OP(NAME, ...) HTA_OP_##NAME,
+
+#include "hta_ops.h"
+  HTA_NN_OPS_MAX
+
+#undef HTA_DEF_OP
+} hta_op_type;
+
+#endif  // THIRD_PARTY_HTA_HEXAGON_NN_OPS_H_
diff --git a/third_party/hta/hta_ops.h b/third_party/hta/hta_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..3becf1d3a79534131a8cfb3c9508bada52752623
--- /dev/null
+++ b/third_party/hta/hta_ops.h
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted (subject to the limitations in the
+ * disclaimer below) provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *    * Neither the name of The Linux Foundation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+ * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * You probably want to
+ *
+ *    ##    #####   #####
+ *   #  #   #    #  #    #
+ *  #    #  #    #  #    #
+ *  ######  #    #  #    #
+ *  #    #  #    #  #    #
+ *  #    #  #####   #####
+ *
+ *
+ *  #    #   ####   #####   ######   ####
+ *  ##   #  #    #  #    #  #       #
+ *  # #  #  #    #  #    #  #####    ####
+ *  #  # #  #    #  #    #  #            #
+ *  #   ##  #    #  #    #  #       #    #
+ *  #    #   ####   #####   ######   ####
+ *
+ *
+ *    ##     #####
+ *   #  #      #
+ *  #    #     #
+ *  ######     #
+ *  #    #     #
+ *  #    #     #
+ *
+ *
+ *   #####  #    #  ######
+ *     #    #    #  #
+ *     #    ######  #####
+ *     #    #    #  #
+ *     #    #    #  #
+ *     #    #    #  ######
+ *
+ *
+ *  ######  #    #  #####
+ *  #       ##   #  #    #
+ *  #####   # #  #  #    #
+ *  #       #  # #  #    #
+ *  #       #   ##  #    #
+ *  ######  #    #  #####
+ *
+ * otherwise the interface becomes incompatible.
+ */
+HTA_DEF_OP(INPUT)
+HTA_DEF_OP(OUTPUT)
+HTA_DEF_OP(Nop)
+HTA_DEF_OP(Const)
+HTA_DEF_OP(Check)
+HTA_DEF_OP(Close_f)
+HTA_DEF_OP(Close_quint8)
+HTA_DEF_OP(Close_q_quint8)
+HTA_DEF_OP(Close_int32)
+HTA_DEF_OP(Close_qint32)
+HTA_DEF_OP(PPrint_8)
+HTA_DEF_OP(PPrint_32)
+HTA_DEF_OP(PPrint_f)
+HTA_DEF_OP(PreFree)
+HTA_DEF_OP(Flatten)
+
+#ifndef HTA_DEF_OP_WREF
+#define HTA_DEF_OP_WREF(NAME) HTA_DEF_OP(NAME) HTA_DEF_OP(NAME##_ref)
+#define __SELF_HTA_DEF_OP_WREF
+#endif
+
+HTA_DEF_OP_WREF(QuantizedConv2d_8x8to32)
+HTA_DEF_OP_WREF(QuantizedMatMul_8x8to32)
+HTA_DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8)
+HTA_DEF_OP_WREF(QuantizedRelu_8)
+HTA_DEF_OP_WREF(QuantizedReluX_8)
+HTA_DEF_OP_WREF(QuantizedMaxPool_8)
+HTA_DEF_OP_WREF(QuantizedAvgPool_8)
+HTA_DEF_OP_WREF(QuantizedL2Pool_8)
+HTA_DEF_OP_WREF(QuantizedConcat_8)
+HTA_DEF_OP_WREF(QuantizedBiasAdd_8p8to32)
+HTA_DEF_OP_WREF(Min_f)
+HTA_DEF_OP_WREF(Max_f)
+HTA_DEF_OP_WREF(Quantize)
+HTA_DEF_OP_WREF(Dequantize)
+HTA_DEF_OP_WREF(Supernode_8x8p8to8)
+
+HTA_DEF_OP(QuantizedFlatten)
+HTA_DEF_OP(Softmax_f)
+HTA_DEF_OP(Conv2d_f)
+HTA_DEF_OP(MatMul_f)
+HTA_DEF_OP(Relu_f)
+HTA_DEF_OP(ReluX_f)
+HTA_DEF_OP(AvgPool_f)
+HTA_DEF_OP(L2Pool_f)
+HTA_DEF_OP(MaxPool_f)
+HTA_DEF_OP(Concat_f)
+HTA_DEF_OP(BiasAdd_f)
+HTA_DEF_OP(LRN_f)
+
+HTA_DEF_OP(Variable)
+HTA_DEF_OP(Assign)
+HTA_DEF_OP(Reshape)
+HTA_DEF_OP(QuantizedReshape)
+HTA_DEF_OP(Tanh_f)
+HTA_DEF_OP(Sigmoid_f)
+HTA_DEF_OP(Slice_8)
+HTA_DEF_OP(Slice_f)
+HTA_DEF_OP(QuantizedSlice_8)
+HTA_DEF_OP(Add_f)
+HTA_DEF_OP(Mul_f)
+HTA_DEF_OP(Minimum_f)
+HTA_DEF_OP(Maximum_f)
+
+HTA_DEF_OP_WREF(Requantize_32to8)
+HTA_DEF_OP_WREF(RequantizationRange_32)
+
+HTA_DEF_OP(Neg_f)
+HTA_DEF_OP(Sub_f)
+HTA_DEF_OP(AddN_f)
+HTA_DEF_OP(Range_int32)
+HTA_DEF_OP(Rank_int32)
+HTA_DEF_OP(Transpose_int32)
+HTA_DEF_OP(Transpose_f)
+HTA_DEF_OP(InstanceNorm_f)
+HTA_DEF_OP_WREF(QuantizedInstanceNorm_8)
+HTA_DEF_OP(Sub_int32)
+HTA_DEF_OP(Add_int32)
+HTA_DEF_OP(Split_f)
+HTA_DEF_OP(Dequantize_qint32_f)
+HTA_DEF_OP(PRelu_f)
+HTA_DEF_OP_WREF(QuantizedPRelu_8)
+HTA_DEF_OP(Sum_f)
+HTA_DEF_OP(Prod_f)
+HTA_DEF_OP(Mul_int32)
+HTA_DEF_OP(LogicalAnd_int32)
+HTA_DEF_OP(LogicalOr_int32)
+HTA_DEF_OP(LogicalXor_int32)
+HTA_DEF_OP(Shape_int32)
+HTA_DEF_OP(Pack_int32)
+HTA_DEF_OP(MirrorPad_f)
+HTA_DEF_OP(ResizeNearestNeighbor_f)
+HTA_DEF_OP(StridedSlice_int32)
+HTA_DEF_OP(StridedSlice_f)
+HTA_DEF_OP(ExpandDims_int32)
+HTA_DEF_OP(ExpandDims_f)
+
+HTA_DEF_OP(LogSoftmax_f)
+HTA_DEF_OP(Split_int32)
+HTA_DEF_OP(QuantizedSplit_8)
+
+HTA_DEF_OP(Deconv_f)
+HTA_DEF_OP_WREF(QuantizedDeconv_8x8to32)
+
+HTA_DEF_OP_WREF(QuantizedMul_8x8to32)
+HTA_DEF_OP_WREF(QuantizedAdd_8p8to32)
+HTA_DEF_OP_WREF(QuantizedSigmoid_8)
+HTA_DEF_OP_WREF(QuantizedTanh_8)
+HTA_DEF_OP_WREF(QuantizedSoftmax_8)
+HTA_DEF_OP_WREF(QuantizedLRN_8)
+HTA_DEF_OP_WREF(Quantizedpad2d_frame_8p)
+HTA_DEF_OP_WREF(QuantizedSub_8p8to32)
+HTA_DEF_OP_WREF(QuantizedMaximum_8)
+HTA_DEF_OP_WREF(QuantizedMinimum_8)
+
+HTA_DEF_OP(Pad_f)
+HTA_DEF_OP(SpaceToBatchND_f)
+HTA_DEF_OP(BatchToSpaceND_f)
+HTA_DEF_OP(QuantizedPad_8)
+HTA_DEF_OP(ResizeBilinear_f)
+HTA_DEF_OP(ConcatV2_f)
+HTA_DEF_OP(ConcatV2_int32)
+HTA_DEF_OP(Prod_int32)
+HTA_DEF_OP(Slice_int32)
+
+HTA_DEF_OP(QuantizedAdd_8p8to8)
+HTA_DEF_OP(QuantizedResizeBilinear_8)
+HTA_DEF_OP(Supernode_8x8p8to8_d32)
+HTA_DEF_OP(Convert_to_d32)
+HTA_DEF_OP(Convert_from_d32)
+HTA_DEF_OP_WREF(QuantizedMaxPool_8_d32)
+HTA_DEF_OP_WREF(QuantizedConcat_8_d32)
+HTA_DEF_OP_WREF(QuantizedAvgPool_8_d32)
+
+HTA_DEF_OP(Sink)
+
+HTA_DEF_OP_WREF(QuantizedPRelu_8_d32)
+HTA_DEF_OP_WREF(AutoQuantize)
+HTA_DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32)
+HTA_DEF_OP_WREF(DepthwiseConv2d_f)
+HTA_DEF_OP(DepthwiseSupernode_8x8p8to8)
+HTA_DEF_OP(DepthwiseSupernode_8x8p8to8_d32)
+
+HTA_DEF_OP_WREF(QuantizedMul_8x8to8_d32)
+
+HTA_DEF_OP(FullyConnected_u8)
+#if 0
+HTA_DEF_OP_WREF(QuantizedFC_8x8p8to8)
+#endif
+
+HTA_DEF_OP_WREF(QuantizedAdd_8p8to8_d32)
+
+HTA_DEF_OP_WREF(QuantizedClamp_8)
+HTA_DEF_OP(Clamp_f)
+HTA_DEF_OP(QuantizeForTest_d32)
+HTA_DEF_OP(Close_d32)
+HTA_DEF_OP_WREF(QuantizedSub_8p8to8_d32)
+
+HTA_DEF_OP(InputSupernode_8x8p8to8_outd32)
+HTA_DEF_OP(QuantizedLRN_8_d32)
+HTA_DEF_OP_WREF(QuantizedBiasAdd_32p32to32)
+HTA_DEF_OP_WREF(Quantize_int32)
+
+HTA_DEF_OP(Supernode_8x8p32to8)
+HTA_DEF_OP(DepthwiseSupernode_8x8p32to8)
+HTA_DEF_OP(Supernode_8x8p32to8_d32)
+HTA_DEF_OP(DepthwiseSupernode_8x8p32to8_d32)
+HTA_DEF_OP(InputSupernode_8x8p32to8_outd32)
+
+HTA_DEF_OP(PPrint_8_d32)
+HTA_DEF_OP(PPrintWithPadding_8_d32)
+HTA_DEF_OP_WREF(AutoQuantize_d32)
+
+HTA_DEF_OP_WREF(QuantizedTanh_8_d32)
+HTA_DEF_OP_WREF(QuantizedSigmoid_8_d32)
+HTA_DEF_OP_WREF(QuantizedSoftmax_8_d32)
+
+
+HTA_DEF_OP_WREF(QuantizedL2Pool_8_d32)
+
+HTA_DEF_OP(Gather_f)
+HTA_DEF_OP(Gather_int32)
+HTA_DEF_OP(Gather_8)
+HTA_DEF_OP(Table_f)
+HTA_DEF_OP(Table_int32)
+HTA_DEF_OP(Table_8)
+
+HTA_DEF_OP(FillPadding_8_d32)
+HTA_DEF_OP(QuantizedResizeBilinear_8_d32)
+
+HTA_DEF_OP(QuantizeINPUT_f_to_8)
+HTA_DEF_OP_WREF(DeconvBias_8x8to32)
+
+HTA_DEF_OP(SpaceToBatchND_8)
+HTA_DEF_OP(BatchToSpaceND_8)
+
+
+HTA_DEF_OP(SpaceToDepth_f)
+HTA_DEF_OP(DepthToSpace_f)
+HTA_DEF_OP(SpaceToDepth_8)
+HTA_DEF_OP(DepthToSpace_8)
+
+HTA_DEF_OP(DequantizeOUTPUT_8tof)
+HTA_DEF_OP(QuantizedBatchNorm_8x8p8to8)
+HTA_DEF_OP(QuantizedBatchNorm_8x8p32to8)
+HTA_DEF_OP(QuantizedBatchNorm_8x8p8to8_d32)
+HTA_DEF_OP(QuantizedBatchNorm_8x8p32to8_d32)
+
+HTA_DEF_OP_WREF(QuantizedInstanceNorm_8_d32)
+HTA_DEF_OP_WREF(QuantizedInstanceNormBG_8)
+HTA_DEF_OP_WREF(QuantizedInstanceNormBG_8_d32)
+
+HTA_DEF_OP(SuperFC_8x8p32to8)
+HTA_DEF_OP(SuperFC_8x8p32to8_ref)
+HTA_DEF_OP(SuperFC_8x8p32to8_d32)
+
+HTA_DEF_OP(ChannelShuffle_f)
+HTA_DEF_OP(ChannelShuffle_int32)
+HTA_DEF_OP_WREF(QuantizedChannelShuffle_8)
+HTA_DEF_OP(QuantizedChannelShuffle_8_d32)
+/* this is in op_chanshuffle_d32.c*/
+HTA_DEF_OP(QuantizedSplit_8_d32)
+
+HTA_DEF_OP(QuantizedCrop_8)
+HTA_DEF_OP(ResizeUnitSquare_f)
+HTA_DEF_OP_WREF(ResizeUnitSquare_8)
+HTA_DEF_OP_WREF(Nv21ToRgb_8)
+HTA_DEF_OP_WREF(RgbaToRgb_8)
+HTA_DEF_OP_WREF(Argb32ToRgb_8)
+HTA_DEF_OP(Permute_f)
+HTA_DEF_OP(QuantizedPermute_8)
+HTA_DEF_OP_WREF(QuantizedRoiPool_8)
+HTA_DEF_OP(Proposal_f)
+HTA_DEF_OP(RoiAlign_f)
+HTA_DEF_OP_WREF(QuantizedRoiAlign_8)
+HTA_DEF_OP_WREF(Implode_8)
+HTA_DEF_OP(QuantizedConcat_8_nond32)
+
+HTA_DEF_OP(Close_16tof)
+HTA_DEF_OP(QuantizedLstmInput_16x16to16)
+HTA_DEF_OP(QuantizedLstmOutput_16x16to8)
+
+HTA_DEF_OP(Quantize_16)
+HTA_DEF_OP(Dequantize_16)
+HTA_DEF_OP(Convert_8_16)
+HTA_DEF_OP(QuantizedTanh_16)
+HTA_DEF_OP(QuantizedSigmoid_16)
+
+HTA_DEF_OP_WREF(QuantizeDownAndShrinkRange_32to16)
+HTA_DEF_OP_WREF(Requantize_32to16)
+HTA_DEF_OP_WREF(QuantizedMatMul_8x8p32to16)
+
+HTA_DEF_OP(QuantizedStridedSlice_8)
+HTA_DEF_OP(Bbox_Transform_f)
+HTA_DEF_OP(Softmax_uint8)
+
+HTA_DEF_OP(QuantizedFakeConcat_8_d32)
+
+HTA_DEF_OP(DepthToSpace_8_d32)
+HTA_DEF_OP(OemNode)
+
+HTA_DEF_OP(QuantizedPad_8_d32)
+// Add new operations above this line
+#ifdef __SELF_HTA_DEF_OP_WREF
+#undef __SELF_HTA_DEF_OP_WREF
+#undef HTA_DEF_OP_WREF
+#endif
+
diff --git a/third_party/hta/libhta_dsp_skel.so b/third_party/hta/libhta_dsp_skel.so
new file mode 100644
index 0000000000000000000000000000000000000000..6a371cfef8f47e6541be0f6bc307d9ed72aa5c7a
Binary files /dev/null and b/third_party/hta/libhta_dsp_skel.so differ
diff --git a/third_party/hta/license.txt b/third_party/hta/license.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fc186df55d1d4b6d43eaea9f7e77be6bc470459
--- /dev/null
+++ b/third_party/hta/license.txt
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted (subject to the limitations in the
+ * disclaimer below) provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *    * Neither the name of The Linux Foundation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+ * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
diff --git a/tools/common.py b/tools/common.py
index 450ca58b5fbca3953f4ce52f2a8698be9cbff18d..4ed60a3632f23bb07e5517174d25336498ef4a64 100644
--- a/tools/common.py
+++ b/tools/common.py
@@ -129,6 +129,7 @@ class DeviceType(object):
     CPU = 'CPU'
     GPU = 'GPU'
     HEXAGON = 'HEXAGON'
+    HTA = 'HTA'
 
 
 class DataFormat(object):
@@ -199,6 +200,8 @@ def parse_device_type(runtime):
 
     if runtime == RuntimeType.dsp:
         device_type = DeviceType.HEXAGON
+    elif runtime == RuntimeType.hta:
+        device_type = DeviceType.HTA
     elif runtime == RuntimeType.gpu:
         device_type = DeviceType.GPU
     elif runtime == RuntimeType.cpu:
@@ -513,6 +516,7 @@ class RuntimeType(object):
     cpu = 'cpu'
     gpu = 'gpu'
     dsp = 'dsp'
+    hta = 'hta'
     cpu_gpu = 'cpu+gpu'
 
 
diff --git a/tools/converter.py b/tools/converter.py
index 8bb9adad2d6f1a1db75c2a7c6fb4ee1470495d7b..0349dd397dbf1977b64e4310b04ea7c29db90c01 100644
--- a/tools/converter.py
+++ b/tools/converter.py
@@ -61,6 +61,7 @@ RuntimeTypeStrs = [
     "cpu",
     "gpu",
     "dsp",
+    "hta",
     "cpu+gpu"
 ]
 
@@ -142,6 +143,8 @@ def parse_device_type(runtime):
 
     if runtime == RuntimeType.dsp:
         device_type = DeviceType.HEXAGON
+    elif runtime == RuntimeType.hta:
+        device_type = DeviceType.HTA
     elif runtime == RuntimeType.gpu:
         device_type = DeviceType.GPU
     elif runtime == RuntimeType.cpu:
@@ -163,6 +166,19 @@ def get_hexagon_mode(configs):
     return False
 
 
+def get_hta_mode(configs):
+    runtime_list = []
+    for model_name in configs[YAMLKeyword.models]:
+        model_runtime = \
+            configs[YAMLKeyword.models][model_name].get(
+                YAMLKeyword.runtime, "")
+        runtime_list.append(model_runtime.lower())
+
+    if RuntimeType.hta in runtime_list:
+        return True
+    return False
+
+
 def get_opencl_mode(configs):
     runtime_list = []
     for model_name in configs[YAMLKeyword.models]:
@@ -452,6 +468,8 @@ def format_model_config(flags):
                 DeviceType.GPU: ValidationThreshold.gpu_threshold,
                 DeviceType.HEXAGON + "_QUANTIZE":
                     ValidationThreshold.hexagon_threshold,
+                DeviceType.HTA + "_QUANTIZE":
+                    ValidationThreshold.hexagon_threshold,
                 DeviceType.CPU + "_QUANTIZE":
                     ValidationThreshold.cpu_quantize_threshold,
             }
@@ -461,6 +479,7 @@ def format_model_config(flags):
                 if k.upper() not in (DeviceType.CPU,
                                      DeviceType.GPU,
                                      DeviceType.HEXAGON,
+                                     DeviceType.HTA,
                                      DeviceType.CPU + "_QUANTIZE"):
                     raise argparse.ArgumentTypeError(
                         'Unsupported validation threshold runtime: %s' % k)
@@ -740,7 +759,6 @@ def build_model_lib(configs, address_sanitizer):
     # create model library dir
     library_name = configs[YAMLKeyword.library_name]
     for target_abi in configs[YAMLKeyword.target_abis]:
-        hexagon_mode = get_hexagon_mode(configs)
         model_lib_output_path = get_model_lib_output_path(library_name,
                                                           target_abi)
         library_out_dir = os.path.dirname(model_lib_output_path)
@@ -751,7 +769,8 @@ def build_model_lib(configs, address_sanitizer):
             MODEL_LIB_TARGET,
             abi=target_abi,
             toolchain=toolchain,
-            hexagon_mode=hexagon_mode,
+            enable_hexagon=get_hexagon_mode(configs),
+            enable_hta=get_hta_mode(configs),
             enable_opencl=get_opencl_mode(configs),
             enable_quantize=get_quantize_mode(configs),
             address_sanitizer=address_sanitizer,
@@ -842,7 +861,6 @@ def report_run_statistics(stdout,
 def build_mace_run(configs, target_abi, toolchain, enable_openmp,
                    address_sanitizer, mace_lib_type):
     library_name = configs[YAMLKeyword.library_name]
-    hexagon_mode = get_hexagon_mode(configs)
 
     build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
     if os.path.exists(build_tmp_binary_dir):
@@ -865,7 +883,8 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
         mace_run_target,
         abi=target_abi,
         toolchain=toolchain,
-        hexagon_mode=hexagon_mode,
+        enable_hexagon=get_hexagon_mode(configs),
+        enable_hta=get_hta_mode(configs),
         enable_openmp=enable_openmp,
         enable_opencl=get_opencl_mode(configs),
         enable_quantize=get_quantize_mode(configs),
@@ -880,7 +899,6 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
 def build_example(configs, target_abi, toolchain,
                   enable_openmp, mace_lib_type, cl_binary_to_code, device):
     library_name = configs[YAMLKeyword.library_name]
-    hexagon_mode = get_hexagon_mode(configs)
 
     build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
     if os.path.exists(build_tmp_binary_dir):
@@ -914,7 +932,8 @@ def build_example(configs, target_abi, toolchain,
                             enable_openmp=enable_openmp,
                             enable_opencl=get_opencl_mode(configs),
                             enable_quantize=get_quantize_mode(configs),
-                            hexagon_mode=hexagon_mode,
+                            enable_hexagon=get_hexagon_mode(configs),
+                            enable_hta=get_hta_mode(configs),
                             address_sanitizer=flags.address_sanitizer,
                             symbol_hidden=symbol_hidden)
 
@@ -945,7 +964,8 @@ def build_example(configs, target_abi, toolchain,
                             enable_openmp=enable_openmp,
                             enable_opencl=get_opencl_mode(configs),
                             enable_quantize=get_quantize_mode(configs),
-                            hexagon_mode=hexagon_mode,
+                            enable_hexagon=get_hexagon_mode(configs),
+                            enable_hta=get_hta_mode(configs),
                             address_sanitizer=flags.address_sanitizer,
                             extra_args=build_arg)
 
@@ -1028,7 +1048,6 @@ def build_benchmark_model(configs,
                           enable_openmp,
                           mace_lib_type):
     library_name = configs[YAMLKeyword.library_name]
-    hexagon_mode = get_hexagon_mode(configs)
 
     link_dynamic = mace_lib_type == MACELibType.dynamic
     if link_dynamic:
@@ -1051,7 +1070,8 @@ def build_benchmark_model(configs,
                             enable_openmp=enable_openmp,
                             enable_opencl=get_opencl_mode(configs),
                             enable_quantize=get_quantize_mode(configs),
-                            hexagon_mode=hexagon_mode,
+                            enable_hexagon=get_hexagon_mode(configs),
+                            enable_hta=get_hta_mode(configs),
                             symbol_hidden=symbol_hidden,
                             extra_args=build_arg)
     # clear tmp binary dir
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index ac00dc66f77a8a3996a9276160e6411b01d60aef..e24d3055a09a4c37e5626bb33d3bb1c56a59549d 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -263,7 +263,8 @@ def find_simpleperf_library(abi, simpleperf_path=''):
 def bazel_build(target,
                 abi="armeabi-v7a",
                 toolchain='android',
-                hexagon_mode=False,
+                enable_hexagon=False,
+                enable_hta=False,
                 enable_openmp=True,
                 enable_neon=True,
                 enable_opencl=True,
@@ -299,7 +300,9 @@ def bazel_build(target,
             "--define",
             "quantize=%s" % str(enable_quantize).lower(),
             "--define",
-            "hexagon=%s" % str(hexagon_mode).lower())
+            "hexagon=%s" % str(enable_hexagon).lower(),
+            "--define",
+            "hta=%s" % str(enable_hta).lower())
     if address_sanitizer:
         bazel_args += ("--config", "asan")
     else: