diff --git a/mace/BUILD.bazel b/mace/BUILD.bazel index 77e2c532ec7bb563a17af07e9fab1cfce27be58a..ef1c338d0838c12ef2c44035e6b8104baf1d6361 100644 --- a/mace/BUILD.bazel +++ b/mace/BUILD.bazel @@ -78,6 +78,17 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "hta_enabled", + define_values = { + "hta": "true", + }, + values = { + "crosstool_top": "//external:android/crosstool", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "openmp_enabled", define_values = { diff --git a/mace/core/BUILD.bazel b/mace/core/BUILD.bazel index 2c905389aec20c51a2e54672411c1a696ff1b3b9..91df4f0f1d0d0a66b2903575a4373b26897628cb 100644 --- a/mace/core/BUILD.bazel +++ b/mace/core/BUILD.bazel @@ -12,6 +12,8 @@ load( "if_android", "if_android_armv7", "if_hexagon_enabled", + "if_hta_enabled", + "if_hexagon_or_hta_enabled", "if_neon_enabled", "if_not_hexagon_enabled", "if_opencl_enabled", @@ -33,17 +35,24 @@ cc_library( [ "runtime/opencl/*.cc", ], - )) + if_hexagon_enabled(glob([ - "runtime/hexagon/*.cc", - ])), + )) + if_hexagon_enabled([ + "runtime/hexagon/hexagon_dsp_wrapper.cc", + ]) + if_hta_enabled([ + "runtime/hexagon/hexagon_hta_wrapper.cc", + ]), hdrs = glob([ "*.h", "runtime/cpu/*.h", - ]) + if_opencl_enabled(glob( - [ - "runtime/opencl/*.h", - ], - )) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])), + ]) + if_opencl_enabled(glob([ + "runtime/opencl/*.h", + ])) + if_hexagon_or_hta_enabled(glob([ + "runtime/hexagon/hexagon_control_wrapper.h", + "runtime/hexagon/hexagon_device.h", + ])) + if_hexagon_enabled(glob([ + "runtime/hexagon/*dsp*.h", + ])) + if_hta_enabled(glob([ + "runtime/hexagon/*hta*.h", + ])), copts = [ "-Werror", "-Wextra", @@ -57,6 +66,8 @@ cc_library( "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", + ]) + if_hta_enabled([ + "-DMACE_ENABLE_HTA", ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ @@ -77,6 +88,8 @@ cc_library( "@gemmlowp", ]) + if_hexagon_enabled([ "//third_party/nnlib:libhexagon", + ]) + if_hta_enabled([ + "//third_party/hta", ]), ) diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index c74af2578f345b4d4c5b976811a0f6d64dbdf889..eda740f400e47bab5fac2ab04057522ad9f9b7ce 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -16,50 +16,67 @@ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ #include +#include #include #include "mace/core/tensor.h" #include "mace/public/mace.h" -#include "third_party/nnlib/hexagon_nn.h" namespace mace { +struct InOutInfo { + InOutInfo(const std::vector &shape, + const DataType data_type, + const float scale, + const int32_t zero_point, + std::unique_ptr tensor_u8) + : shape(shape), + data_type(data_type), + scale(scale), + zero_point(zero_point), + tensor_u8(std::move(tensor_u8)) {} + + std::vector shape; + DataType data_type; + float scale; + int32_t zero_point; + std::unique_ptr tensor_u8; +}; + class HexagonControlWrapper { public: - HexagonControlWrapper() {} - int GetVersion(); - bool Config(); - bool Init(); - bool Finalize(); - bool SetupGraph(const NetDef &net_def, const unsigned char *model_data); - bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); - bool ExecuteGraphNew(const std::vector &input_tensors, - std::vector *output_tensors, - bool hexagon_quantize); + HexagonControlWrapper() = default; + virtual ~HexagonControlWrapper() = default; - bool TeardownGraph(); - void PrintLog(); - void PrintGraph(); - void GetPerfInfo(); - void ResetPerfInfo(); - void SetDebugLevel(int level); + virtual int GetVersion() = 0; + virtual bool Config() = 0; + virtual bool Init() = 0; + virtual bool Finalize() = 0; + virtual bool SetupGraph(const NetDef &net_def, + const unsigned char *model_data) = 0; + virtual bool ExecuteGraph(const Tensor &input_tensor, + Tensor *output_tensor) = 0; + virtual bool ExecuteGraphNew(const std::vector &input_tensors, + std::vector *output_tensors) = 0; + virtual bool TeardownGraph() = 0; + virtual void PrintLog() = 0; + virtual void PrintGraph() = 0; + virtual void GetPerfInfo() = 0; + virtual void ResetPerfInfo() = 0; + virtual void SetDebugLevel(int level) = 0; - private: - static constexpr int NODE_ID_OFFSET = 10000; - static constexpr int NUM_METADATA = 4; + protected: + static constexpr int kNodeIdOffset = 10000; + static constexpr int kNumMetaData = 4; - inline uint32_t node_id(uint32_t nodeid) { return NODE_ID_OFFSET + nodeid; } + inline uint32_t node_id(uint32_t nodeid) { return kNodeIdOffset + nodeid; } int nn_id_; - std::vector> input_shapes_; - std::vector> output_shapes_; - std::vector input_data_types_; - std::vector output_data_types_; - uint32_t num_inputs_; - uint32_t num_outputs_; - std::vector> input_tensors_u8_; - std::vector> output_tensors_u8_; + std::vector input_info_; + std::vector output_info_; + int num_inputs_; + int num_outputs_; MACE_DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper); }; diff --git a/mace/core/runtime/hexagon/hexagon_device.h b/mace/core/runtime/hexagon/hexagon_device.h index 0c933ae0b6ff2171008058cc074c293e1909b819..f80607d3196582f850d0911fec0429784cabaca0 100644 --- a/mace/core/runtime/hexagon/hexagon_device.h +++ b/mace/core/runtime/hexagon/hexagon_device.h @@ -15,18 +15,55 @@ #ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_ #define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_ +#include +#include + #include "mace/core/device.h" +#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#ifdef MACE_ENABLE_HEXAGON +#include "mace/core/runtime/hexagon/hexagon_dsp_wrapper.h" +#endif +#ifdef MACE_ENABLE_HTA +#include "mace/core/runtime/hexagon/hexagon_hta_wrapper.h" +#endif namespace mace { class HexagonDevice : public CPUDevice { public: - HexagonDevice() : CPUDevice(0, AFFINITY_NONE, false) {} + explicit HexagonDevice(DeviceType device_type) + : CPUDevice(0, AFFINITY_NONE, false), + device_type_(device_type) {} DeviceType device_type() const override { - return DeviceType::HEXAGON; + return device_type_; }; + + private: + DeviceType device_type_; }; +std::unique_ptr CreateHexagonControlWrapper( + DeviceType device_type) { + std::unique_ptr hexagon_controller; + + switch (device_type) { +#ifdef MACE_ENABLE_HEXAGON + case HEXAGON: + hexagon_controller = make_unique(); + break; +#endif +#ifdef MACE_ENABLE_HTA + case HTA: + hexagon_controller = make_unique(); + break; +#endif + default: + LOG(FATAL) << "Not supported Hexagon device type: " << device_type; + } + + return hexagon_controller; +} + } // namespace mace #endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_ diff --git a/mace/core/runtime/hexagon/hexagon_nn_ops.h b/mace/core/runtime/hexagon/hexagon_dsp_ops.h similarity index 89% rename from mace/core/runtime/hexagon/hexagon_nn_ops.h rename to mace/core/runtime/hexagon/hexagon_dsp_ops.h index 3ebedb8eb8d81850cd29383fd7667c42b2369262..1f50e13cb48bb8133fc31d71752a623fed16217f 100644 --- a/mace/core/runtime/hexagon/hexagon_nn_ops.h +++ b/mace/core/runtime/hexagon/hexagon_dsp_ops.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_ -#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_ +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_OPS_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_OPS_H_ #include #include @@ -57,4 +57,4 @@ class OpMap { }; } // namespace mace -#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_ +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_OPS_H_ diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc similarity index 75% rename from mace/core/runtime/hexagon/hexagon_control_wrapper.cc rename to mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc index b39bfeed2510ab10f401fe653ac3ad919e8b2619..a98d9ad1499251a15d7b969cecee2eaf28f84347 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc @@ -14,17 +14,19 @@ #include #include +#include #include // NOLINT(build/c++11) #include #include #include #include -#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" -#include "mace/core/runtime/hexagon/hexagon_nn_ops.h" +#include "mace/core/runtime/hexagon/hexagon_dsp_wrapper.h" +#include "mace/core/runtime/hexagon/hexagon_dsp_ops.h" #include "mace/core/types.h" #include "mace/port/env.h" -#include "mace/utils/quantize.h" +#include "mace/utils/memory.h" +#include "third_party/nnlib/hexagon_nn.h" namespace mace { @@ -85,33 +87,33 @@ std::string FloatToString(const FloatType v, const int32_t precision) { } } // namespace -int HexagonControlWrapper::GetVersion() { +int HexagonDSPWrapper::GetVersion() { int version; MACE_CHECK(hexagon_nn_version(&version) == 0, "get version error"); return version; } -bool HexagonControlWrapper::Config() { +bool HexagonDSPWrapper::Config() { LOG(INFO) << "Hexagon config"; MACE_CHECK(hexagon_nn_set_powersave_level(0) == 0, "hexagon power error"); MACE_CHECK(hexagon_nn_config() == 0, "hexagon config error"); return true; } -bool HexagonControlWrapper::Init() { +bool HexagonDSPWrapper::Init() { LOG(INFO) << "Hexagon init"; MACE_CHECK(hexagon_nn_init(&nn_id_) == 0, "hexagon_nn_init failed"); ResetPerfInfo(); return true; } -bool HexagonControlWrapper::Finalize() { +bool HexagonDSPWrapper::Finalize() { LOG(INFO) << "Hexagon finalize"; return hexagon_nn_set_powersave_level(1) == 0; } -bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, - unsigned const char *model_data) { +bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def, + unsigned const char *model_data) { LOG(INFO) << "Hexagon setup graph"; int64_t t0 = NowMicros(); @@ -229,36 +231,40 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, cached_outputs.clear(); // input info - num_inputs_ = 0; - for (const InputInfo &input_info : net_def.input_info()) { + num_inputs_ = net_def.input_info_size(); + input_info_.reserve(num_inputs_); + for (const InputOutputInfo &input_info : net_def.input_info()) { std::vector input_shape(input_info.dims().begin(), input_info.dims().end()); while (input_shape.size() < 4) { input_shape.insert(input_shape.begin(), 1); } - input_shapes_.push_back(input_shape); - input_data_types_.push_back(input_info.data_type()); - num_inputs_ += 1; + input_info_.emplace_back(input_shape, + input_info.data_type(), + input_info.scale(), + input_info.zero_point(), + make_unique()); } - input_tensors_u8_.reserve(num_inputs_); // output info - num_outputs_ = 0; - for (const OutputInfo &output_info : net_def.output_info()) { + num_outputs_ = net_def.output_info_size(); + output_info_.reserve(num_outputs_); + for (const InputOutputInfo &output_info : net_def.output_info()) { std::vector output_shape(output_info.dims().begin(), output_info.dims().end()); while (output_shape.size() < 4) { output_shape.insert(output_shape.begin(), 1); } - output_shapes_.push_back(output_shape); - output_data_types_.push_back(output_info.data_type()); - num_outputs_ += 1; + output_info_.emplace_back(output_shape, + output_info.data_type(), + output_info.scale(), + output_info.zero_point(), + make_unique()); VLOG(1) << "OutputInfo: " << "\n\t shape: " << output_shape[0] << " " << output_shape[1] << " " << output_shape[2] << " " << output_shape[3] << "\n\t type: " << output_info.data_type(); } - output_tensors_u8_.reserve(num_outputs_); int64_t t1 = NowMicros(); @@ -271,14 +277,14 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, return true; } -bool HexagonControlWrapper::TeardownGraph() { +bool HexagonDSPWrapper::TeardownGraph() { LOG(INFO) << "Hexagon teardown graph"; return hexagon_nn_teardown(nn_id_) == 0; } #define MACE_PRINT_BUFSIZE (2 * 1024 * 1024) -void HexagonControlWrapper::PrintLog() { +void HexagonDSPWrapper::PrintLog() { char *buf; if ((buf = new char[MACE_PRINT_BUFSIZE]) == NULL) return; MACE_CHECK(hexagon_nn_getlog(nn_id_, reinterpret_cast(buf), @@ -288,7 +294,7 @@ void HexagonControlWrapper::PrintLog() { delete[] buf; } -void HexagonControlWrapper::PrintGraph() { +void HexagonDSPWrapper::PrintGraph() { LOG(INFO) << "Print Graph"; char *buf; if ((buf = new char[MACE_PRINT_BUFSIZE]) == NULL) return; @@ -299,13 +305,13 @@ void HexagonControlWrapper::PrintGraph() { delete[] buf; } -void HexagonControlWrapper::SetDebugLevel(int level) { +void HexagonDSPWrapper::SetDebugLevel(int level) { LOG(INFO) << "Set debug level: " << level; MACE_CHECK(hexagon_nn_set_debug_level(nn_id_, level) == 0, "set debug level error"); } -void HexagonControlWrapper::GetPerfInfo() { +void HexagonDSPWrapper::GetPerfInfo() { LOG(INFO) << "Get perf info"; std::vector perf_info(MACE_MAX_NODE); unsigned int n_items = 0; @@ -380,20 +386,20 @@ void HexagonControlWrapper::GetPerfInfo() { LOG(INFO) << "total duration: " << std::fixed << total_duration; } -void HexagonControlWrapper::ResetPerfInfo() { +void HexagonDSPWrapper::ResetPerfInfo() { LOG(INFO) << "Reset perf info"; MACE_CHECK(hexagon_nn_reset_perfinfo(nn_id_, NN_GRAPH_PERFEVENT_UTIME) == 0, "reset perf error"); } -bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, - Tensor *output_tensor) { +bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor, + Tensor *output_tensor) { VLOG(2) << "Execute graph: " << nn_id_; // single input and single output MACE_ASSERT(num_inputs_ == 1, "Wrong inputs num"); MACE_ASSERT(num_outputs_ == 1, "Wrong outputs num"); - output_tensor->SetDtype(output_data_types_[0]); - output_tensor->Resize(output_shapes_[0]); + output_tensor->SetDtype(output_info_[0].data_type); + output_tensor->Resize(output_info_[0].shape); std::vector output_shape(4); uint32_t output_bytes; int res = hexagon_nn_execute( @@ -413,10 +419,11 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, &output_bytes); MACE_CHECK(res == 0, "execute error"); - MACE_ASSERT(output_shape.size() == output_shapes_[0].size(), + MACE_ASSERT(output_shape.size() == output_info_[0].shape.size(), "wrong output shape inferred"); for (size_t i = 0; i < output_shape.size(); ++i) { - MACE_ASSERT(static_cast(output_shape[i]) == output_shapes_[0][i], + MACE_ASSERT(static_cast(output_shape[i]) + == output_info_[0].shape[i], "wrong output shape inferred"); } MACE_ASSERT(output_bytes == output_tensor->raw_size(), @@ -424,59 +431,35 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, return res == 0; } -bool HexagonControlWrapper::ExecuteGraphNew( +bool HexagonDSPWrapper::ExecuteGraphNew( const std::vector &input_tensors, - std::vector *output_tensors, - bool hexagon_quantize) { + std::vector *output_tensors) { VLOG(2) << "Execute graph new: " << nn_id_; uint32_t num_inputs = static_cast(input_tensors.size()); uint32_t num_outputs = static_cast(output_tensors->size()); MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num"); MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num"); - std::vector inputs(num_inputs * NUM_METADATA); - std::vector outputs(num_outputs * NUM_METADATA); + std::vector inputs(num_inputs * kNumMetaData); + std::vector outputs(num_outputs * kNumMetaData); std::vector input_metadata(num_inputs); std::vector output_metadata(num_outputs); // transform mace input to hexagon input for (size_t i = 0; i < num_inputs; ++i) { std::vector input_shape = input_tensors[i]->shape(); - size_t index = i * NUM_METADATA; + size_t index = i * kNumMetaData; inputs[index].batches = static_cast(input_shape[0]); inputs[index].height = static_cast(input_shape[1]); inputs[index].width = static_cast(input_shape[2]); inputs[index].depth = static_cast(input_shape[3]); - if (hexagon_quantize) { - inputs[index].data = - const_cast(reinterpret_cast( - input_tensors[i]->raw_data())); - inputs[index].dataLen = static_cast(input_tensors[i]->raw_size()); - inputs[index].data_valid_len = - static_cast(input_tensors[i]->raw_size()); - input_metadata[i].Init(.0f, .0f, 1); - } else { - if (input_tensors_u8_.size() < i + 1) { - input_tensors_u8_.emplace_back(new Tensor()); - input_tensors_u8_[i]->SetDtype(DT_UINT8); - input_tensors_u8_[i]->Resize(input_shape); - } - - Quantize(*input_tensors[i], - input_tensors_u8_[i].get(), - &input_metadata[i].min_val, - &input_metadata[i].max_val); - - inputs[index].data = - const_cast(reinterpret_cast( - input_tensors_u8_[i]->raw_data())); - inputs[index].dataLen = - static_cast(input_tensors_u8_[i]->raw_size()); - inputs[index].data_valid_len = - static_cast(input_tensors_u8_[i]->raw_size()); - input_metadata[i].needs_quantization = 0; - } + inputs[index].data = const_cast( + reinterpret_cast(input_tensors[i]->raw_data())); + inputs[index].dataLen = static_cast(input_tensors[i]->raw_size()); + inputs[index].data_valid_len = + static_cast(input_tensors[i]->raw_size()); inputs[index].unused = 0; + input_metadata[i].Init(.0f, .0f, 1); AddInputMetadata(input_metadata[i].min_val, &inputs[index + 1]); AddInputMetadata(input_metadata[i].max_val, &inputs[index + 2]); AddInputMetadata(input_metadata[i].needs_quantization, &inputs[index + 3]); @@ -484,29 +467,14 @@ bool HexagonControlWrapper::ExecuteGraphNew( // transform mace output to hexagon output for (size_t i = 0; i < num_outputs; ++i) { - size_t index = i * NUM_METADATA; - (*output_tensors)[i]->SetDtype(output_data_types_[i]); - (*output_tensors)[i]->Resize(output_shapes_[i]); - - if (hexagon_quantize) { - outputs[index].data = reinterpret_cast( - (*output_tensors)[i]->raw_mutable_data()); - outputs[index].dataLen = - static_cast((*output_tensors)[i]->raw_size()); - output_metadata[i].Init(.0f, .0f, 1); - } else { - if (output_tensors_u8_.size() < i + 1) { - output_tensors_u8_.emplace_back(new Tensor()); - output_tensors_u8_[i]->SetDtype(DT_UINT8); - output_tensors_u8_[i]->Resize(output_shapes_[i]); - } + size_t index = i * kNumMetaData; + (*output_tensors)[i]->SetDtype(output_info_[i].data_type); + (*output_tensors)[i]->Resize(output_info_[i].shape); - outputs[index].data = reinterpret_cast( - output_tensors_u8_[i]->raw_mutable_data()); - outputs[index].dataLen = - static_cast(output_tensors_u8_[i]->raw_size()); - output_metadata[i].Init(.0f, .0f, 0); - } + outputs[index].data = reinterpret_cast( + (*output_tensors)[i]->raw_mutable_data()); + outputs[index].dataLen = static_cast((*output_tensors)[i]->raw_size()); + output_metadata[i].Init(.0f, .0f, 1); AddOutputMetadata(output_metadata[i].min_val, &outputs[index + 1]); AddOutputMetadata(output_metadata[i].max_val, &outputs[index + 2]); @@ -517,38 +485,27 @@ bool HexagonControlWrapper::ExecuteGraphNew( // Execute graph int res = hexagon_nn_execute_new(nn_id_, inputs.data(), - num_inputs * NUM_METADATA, + num_inputs * kNumMetaData, outputs.data(), - num_outputs * NUM_METADATA); + num_outputs * kNumMetaData); // handle hexagon output for (size_t i = 0; i < num_outputs; ++i) { - size_t index = i * NUM_METADATA; + size_t index = i * kNumMetaData; std::vector output_shape{ outputs[index].batches, outputs[index].height, outputs[index].width, outputs[index].depth}; - MACE_ASSERT(output_shape.size() == output_shapes_[i].size(), + MACE_ASSERT(output_shape.size() == output_info_[i].shape.size(), "wrong output shape inferred"); for (size_t j = 0; j < output_shape.size(); ++j) { MACE_ASSERT(static_cast(output_shape[j]) - == output_shapes_[i][j], + == output_info_[i].shape[j], "wrong output shape inferred"); } - if (hexagon_quantize) { - MACE_ASSERT(static_cast(outputs[index].data_valid_len) - == (*output_tensors)[i]->raw_size(), - "wrong output bytes inferred."); - } else { - MACE_ASSERT(static_cast(outputs[index].data_valid_len) - == output_tensors_u8_[i]->raw_size(), - "wrong output bytes inferred."); - - DeQuantize(*output_tensors_u8_[i], - output_metadata[i].min_val, - output_metadata[i].max_val, - (*output_tensors)[i]); - } + MACE_ASSERT(static_cast(outputs[index].data_valid_len) + == (*output_tensors)[i]->raw_size(), + "wrong output bytes inferred."); } return res == 0; diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..2c46414bf390b87af35f2000e2732b0e50663e95 --- /dev/null +++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.h @@ -0,0 +1,51 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_ + +#include + +#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#include "mace/core/tensor.h" +#include "mace/public/mace.h" + +namespace mace { + +class HexagonDSPWrapper : public HexagonControlWrapper { + public: + HexagonDSPWrapper() = default; + + int GetVersion() override; + bool Config() override; + bool Init() override; + bool Finalize() override; + bool SetupGraph(const NetDef &net_def, + const unsigned char *model_data) override; + bool ExecuteGraph(const Tensor &input_tensor, + Tensor *output_tensor) override; + bool ExecuteGraphNew(const std::vector &input_tensors, + std::vector *output_tensors) override; + bool TeardownGraph() override; + void PrintLog() override; + void PrintGraph() override; + void GetPerfInfo() override; + void ResetPerfInfo() override; + void SetDebugLevel(int level) override; + + MACE_DISABLE_COPY_AND_ASSIGN(HexagonDSPWrapper); +}; +} // namespace mace + +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DSP_WRAPPER_H_ diff --git a/mace/core/runtime/hexagon/hexagon_hta_ops.h b/mace/core/runtime/hexagon/hexagon_hta_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..39a108609d815b2eeaf805d611b5fb4fbd69c564 --- /dev/null +++ b/mace/core/runtime/hexagon/hexagon_hta_ops.h @@ -0,0 +1,50 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_OPS_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_OPS_H_ + +#include +#include + +#include "mace/utils/logging.h" +#include "third_party/hta/hta_hexagon_nn_ops.h" + +namespace mace { + +class OpMap { + public: + void Init() { +#define HTA_DEF_OP(NAME) op_map_[#NAME] = HTA_OP_##NAME; + +#include "third_party/hta/hta_ops.h" + +#undef HTA_DEF_OP + } + + hta_op_type GetOpId(const std::string &op_type) { + if (op_map_.find(op_type) != end(op_map_)) { + return op_map_[op_type]; + } else { + LOG(ERROR) << "HTA unsupported op type: " << op_type; + return HTA_NN_OPS_MAX; + } + } + + private: + std::unordered_map op_map_; +}; +} // namespace mace + +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_OPS_H_ diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc new file mode 100644 index 0000000000000000000000000000000000000000..e3754f19ca8f0528e0679816cd18c0ccfbb1197a --- /dev/null +++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc @@ -0,0 +1,318 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/runtime/hexagon/hexagon_hta_wrapper.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "mace/core/runtime/hexagon/hexagon_hta_ops.h" +#include "mace/core/types.h" +#include "mace/utils/memory.h" +#include "mace/utils/quantize.h" +#include "third_party/hta/hta_hexagon_api.h" + +namespace mace { + +int HexagonHTAWrapper::GetVersion() { + int version; + MACE_CHECK(hexagon_hta_nn_version(&version) == 0, "get version error"); + return version; +} + +bool HexagonHTAWrapper::Config() { + LOG(INFO) << "HTA config"; + MACE_CHECK(hexagon_hta_nn_config() == 0, "hexagon config error"); + return true; +} + +bool HexagonHTAWrapper::Init() { + LOG(INFO) << "Hexagon init"; + MACE_CHECK(hexagon_hta_nn_init(&nn_id_) == 0, "hexagon_nn_init failed"); + ResetPerfInfo(); + return true; +} + +bool HexagonHTAWrapper::Finalize() { + LOG(INFO) << "Hexagon finalize"; + return true; +} + +bool HexagonHTAWrapper::SetupGraph(const NetDef &net_def, + unsigned const char *model_data) { + LOG(INFO) << "Hexagon setup graph"; + + int64_t t0 = NowMicros(); + + // const node + for (const ConstTensor &const_tensor : net_def.tensors()) { + std::vector tensor_shape(const_tensor.dims().begin(), + const_tensor.dims().end()); + while (tensor_shape.size() < 4) { + tensor_shape.insert(tensor_shape.begin(), 1); + } + + hexagon_nn_const_node const_node; + const_node.node_id = node_id(const_tensor.node_id()); + const_node.tensor.batches = tensor_shape[0]; + const_node.tensor.height = tensor_shape[1]; + const_node.tensor.width = tensor_shape[2]; + const_node.tensor.depth = tensor_shape[3]; + + if (const_tensor.data_type() == DataType::DT_INT32 && + const_tensor.data_size() == 0) { + const_node.tensor.data = NULL; + const_node.tensor.dataLen = 0; + } else { + const_node.tensor.data = + const_cast(model_data + const_tensor.offset()); + const_node.tensor.dataLen = const_tensor.data_size() * + GetEnumTypeSize(const_tensor.data_type()); + } + + hexagon_hta_nn_append_const_node(nn_id_, + const_node.node_id, + const_node.tensor.batches, + const_node.tensor.height, + const_node.tensor.width, + const_node.tensor.depth, + const_node.tensor.data, + const_node.tensor.dataLen); + } + + // op node + OpMap op_map; + op_map.Init(); + std::vector> cached_inputs; + std::vector> cached_outputs; + std::vector inputs; + std::vector outputs; + + for (const OperatorDef &op : net_def.op()) { + hta_op_type op_id = op_map.GetOpId(op.type()); + inputs.resize(op.node_input().size()); + for (int i = 0; i < op.node_input().size(); ++i) { + inputs[i].src_id = node_id(op.node_input()[i].node_id()); + inputs[i].output_idx = op.node_input()[i].output_port(); + } + outputs.resize(op.output_shape().size()); + for (int i = 0; i < op.output_shape().size(); ++i) { + outputs[i].rank = op.output_shape()[i].dims().size(); + for (size_t j = 0; j < outputs[i].rank; ++j) { + outputs[i].max_sizes[j] = op.output_shape()[i].dims()[j]; + } + if (outputs[i].rank == 0) { + outputs[i].rank = 1; + outputs[i].max_sizes[0] = 1; + } + outputs[i].max_sizes[outputs[i].rank] = 0; + outputs[i].elementsize = GetEnumTypeSize( + static_cast(op.output_type()[i])); + outputs[i].zero_offset = 0; + outputs[i].stepsize = 0; + } + cached_inputs.push_back(inputs); + cached_outputs.push_back(outputs); + + auto padding_type = static_cast(op.padding()); + + hexagon_nn_op_node op_node; + op_node.node_id = node_id(op.node_id()); + op_node.operation = op_id; + op_node.padding = padding_type; + op_node.inputs = cached_inputs.back().data(); + op_node.inputsLen = inputs.size(); + op_node.outputs = cached_outputs.back().data(); + op_node.outputsLen = outputs.size(); + + hexagon_hta_nn_append_node(nn_id_, + op_node.node_id, + op_node.operation, + op_node.padding, + op_node.inputs, + op_node.inputsLen, + op_node.outputs, + op_node.outputsLen); + } + + // input info + num_inputs_ = net_def.input_info_size(); + input_info_.reserve(num_inputs_); + for (const InputOutputInfo &input_info : net_def.input_info()) { + std::vector input_shape(input_info.dims().begin(), + input_info.dims().end()); + while (input_shape.size() < 4) { + input_shape.insert(input_shape.begin(), 1); + } + input_info_.emplace_back(input_shape, + input_info.data_type(), + input_info.scale(), + input_info.zero_point(), + make_unique()); + } + + // output info + num_outputs_ = net_def.output_info_size(); + output_info_.reserve(num_outputs_); + for (const InputOutputInfo &output_info : net_def.output_info()) { + std::vector output_shape(output_info.dims().begin(), + output_info.dims().end()); + while (output_shape.size() < 4) { + output_shape.insert(output_shape.begin(), 1); + } + output_info_.emplace_back(output_shape, + output_info.data_type(), + output_info.scale(), + output_info.zero_point(), + make_unique()); + VLOG(1) << "OutputInfo: " + << "\n\t shape: " << output_shape[0] << " " << output_shape[1] + << " " << output_shape[2] << " " << output_shape[3] + << "\n\t type: " << output_info.data_type(); + } + + int64_t t1 = NowMicros(); + + MACE_CHECK(hexagon_hta_nn_prepare(nn_id_) == 0, "hexagon_nn_prepare failed"); + + int64_t t2 = NowMicros(); + + VLOG(1) << "Setup time: " << t1 - t0 << " " << t2 - t1; + + return true; +} + +bool HexagonHTAWrapper::TeardownGraph() { + LOG(INFO) << "Hexagon teardown graph"; + return hexagon_hta_nn_teardown(nn_id_) == 0; +} + +void HexagonHTAWrapper::PrintLog() { + LOG(INFO) << "Print Log"; +} + +void HexagonHTAWrapper::PrintGraph() { + LOG(INFO) << "Print Graph"; +} + +void HexagonHTAWrapper::SetDebugLevel(int level) { + LOG(INFO) << "Set debug level: " << level; + MACE_CHECK(hexagon_hta_nn_set_debug_level(nn_id_, level) == 0, + "set debug level error"); +} + +void HexagonHTAWrapper::GetPerfInfo() { + LOG(INFO) << "Get perf info"; +} + +void HexagonHTAWrapper::ResetPerfInfo() { + LOG(INFO) << "Reset perf info"; +} + +bool HexagonHTAWrapper::ExecuteGraph(const Tensor &input_tensor, + Tensor *output_tensor) { + MACE_UNUSED(input_tensor); + MACE_UNUSED(output_tensor); + MACE_NOT_IMPLEMENTED; + return false; +} + +bool HexagonHTAWrapper::ExecuteGraphNew( + const std::vector &input_tensors, + std::vector *output_tensors) { + VLOG(2) << "Execute graph new: " << nn_id_; + uint32_t num_inputs = static_cast(input_tensors.size()); + uint32_t num_outputs = static_cast(output_tensors->size()); + MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num"); + MACE_ASSERT(num_outputs_ == num_outputs, "Wrong outputs num"); + + std::vector inputs(num_inputs); + std::vector outputs(num_outputs); + + for (size_t i = 0; i < num_inputs; ++i) { + std::vector input_shape = input_tensors[i]->shape(); + inputs[i].batches = static_cast(input_shape[0]); + inputs[i].height = static_cast(input_shape[1]); + inputs[i].width = static_cast(input_shape[2]); + inputs[i].depth = static_cast(input_shape[3]); + input_info_[i].tensor_u8->SetDtype(DT_UINT8); + input_info_[i].tensor_u8->Resize(input_shape); + + const float *input_data = input_tensors[i]->data(); + uint8_t *input_data_u8 = input_info_[i].tensor_u8->mutable_data(); + QuantizeWithScaleAndZeropoint(input_data, + input_tensors[i]->size(), + input_info_[i].scale, + input_info_[i].zero_point, + input_data_u8); + + inputs[i].data = const_cast( + reinterpret_cast( + input_info_[i].tensor_u8->raw_data())); + inputs[i].dataLen = static_cast(input_info_[i].tensor_u8->raw_size()); + inputs[i].data_valid_len = static_cast( + input_info_[i].tensor_u8->raw_size()); + inputs[i].unused = 0; + } + + for (size_t i = 0; i < num_outputs; ++i) { + (*output_tensors)[i]->SetDtype(output_info_[i].data_type); + (*output_tensors)[i]->Resize(output_info_[i].shape); + output_info_[i].tensor_u8->SetDtype(DT_UINT8); + output_info_[i].tensor_u8->Resize(output_info_[i].shape); + outputs[i].data = reinterpret_cast( + output_info_[i].tensor_u8->raw_mutable_data()); + outputs[i].dataLen = + static_cast(output_info_[i].tensor_u8->raw_size()); + } + + int res = hexagon_hta_nn_execute_new(nn_id_, + inputs.data(), + num_inputs, + outputs.data(), + num_outputs); + + for (size_t i = 0; i < num_outputs; ++i) { + std::vector output_shape{ + outputs[i].batches, outputs[i].height, outputs[i].width, + outputs[i].depth}; + MACE_ASSERT(output_shape.size() == output_info_[i].shape.size(), + "wrong output shape inferred"); + for (size_t j = 0; j < output_shape.size(); ++j) { + MACE_ASSERT(static_cast(output_shape[j]) + == output_info_[i].shape[j], + "wrong output shape inferred"); + } + MACE_ASSERT(static_cast(outputs[i].data_valid_len) + == (*output_tensors)[i]->raw_size(), + "wrong output bytes inferred."); + + const uint8_t *output_data_u8 = output_info_[i].tensor_u8->data(); + float *output_data = (*output_tensors)[i]->mutable_data(); + Dequantize(output_data_u8, + output_info_[i].tensor_u8->size(), + output_info_[i].scale, + output_info_[i].zero_point, + output_data); + } + + return res == 0; +} + +} // namespace mace diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..26ea17bde45da1853efe222e9f7d30baa25d3471 --- /dev/null +++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h @@ -0,0 +1,51 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_ + +#include + +#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#include "mace/core/tensor.h" +#include "mace/public/mace.h" + +namespace mace { + +class HexagonHTAWrapper : public HexagonControlWrapper { + public: + HexagonHTAWrapper() = default; + + int GetVersion() override; + bool Config() override; + bool Init() override; + bool Finalize() override; + bool SetupGraph(const NetDef &net_def, + const unsigned char *model_data) override; + bool ExecuteGraph(const Tensor &input_tensor, + Tensor *output_tensor) override; + bool ExecuteGraphNew(const std::vector &input_tensors, + std::vector *output_tensors) override; + bool TeardownGraph() override; + void PrintLog() override; + void PrintGraph() override; + void GetPerfInfo() override; + void ResetPerfInfo() override; + void SetDebugLevel(int level) override; + + MACE_DISABLE_COPY_AND_ASSIGN(HexagonHTAWrapper); +}; +} // namespace mace + +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_ diff --git a/mace/examples/cli/BUILD.bazel b/mace/examples/cli/BUILD.bazel index e661c10b80989f3aa4238b0fb40bbad53976f2d4..693009e37f0a5a49fc1ca4ffab771c67de25b7c5 100644 --- a/mace/examples/cli/BUILD.bazel +++ b/mace/examples/cli/BUILD.bazel @@ -3,6 +3,7 @@ load( "//mace:mace.bzl", "if_android", "if_hexagon_enabled", + "if_hta_enabled", "if_opencl_enabled", "if_openmp_enabled", ) @@ -36,6 +37,8 @@ cc_binary( "//mace/utils:utils_hdrs", ] + if_hexagon_enabled([ "//third_party/nnlib:libhexagon", + ]) + if_hta_enabled([ + "//third_party/hta", ]), ) diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc index 7e485bc65790797fde97516d8418c4ab58035030..0e26b9f5c0bd0ab1db390f26a6b5d8a3b2ece767 100644 --- a/mace/examples/cli/example.cc +++ b/mace/examples/cli/example.cc @@ -79,6 +79,8 @@ DeviceType ParseDeviceType(const std::string &device_str) { return DeviceType::GPU; } else if (device_str.compare("HEXAGON") == 0) { return DeviceType::HEXAGON; + } else if (device_str.compare("HTA") == 0) { + return DeviceType::HTA; } else { return DeviceType::CPU; } diff --git a/mace/libmace/BUILD.bazel b/mace/libmace/BUILD.bazel index 29127a1499de0dc7c0e7a5d464cc5e1371976731..36eff0c80a76c3adb0b9e8738281974bf1aa2280 100644 --- a/mace/libmace/BUILD.bazel +++ b/mace/libmace/BUILD.bazel @@ -16,6 +16,7 @@ load( "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled", + "if_hta_enabled", "if_opencl_enabled", "if_quantize_enabled", ) @@ -40,6 +41,8 @@ cc_library( "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", + ]) + if_hta_enabled([ + "-DMACE_ENABLE_HTA", ]), deps = [ "//mace/ops", diff --git a/mace/libmace/capability.cc b/mace/libmace/capability.cc index c9dff5dc73782d6831a9b4a59d0e9aa22ada2e99..d37a62b6616b03bc476e7549b4e1b5d73357148d 100644 --- a/mace/libmace/capability.cc +++ b/mace/libmace/capability.cc @@ -142,7 +142,7 @@ void BMNet::SetUp() { // Add input and output information for (size_t i = 0; i < input_names_.size(); ++i) { - InputInfo *info = net_.add_input_info(); + InputOutputInfo *info = net_.add_input_info(); info->set_data_format(DataFormat::NHWC); info->set_name(input_names_[i]); for (auto d : input_shapes_[i]) { @@ -150,7 +150,7 @@ void BMNet::SetUp() { } } for (auto output_name : output_names_) { - OutputInfo *info = net_.add_output_info(); + InputOutputInfo *info = net_.add_output_info(); info->set_name(output_name); } // allocate weight data diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 78991a71c74f206abdfe4cf8d547b2fd6d6b2826..2b626cf51320d11e8e50e2494b70913e268ffe57 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -33,10 +33,9 @@ #include "mace/core/runtime/opencl/opencl_runtime.h" #endif // MACE_ENABLE_OPENCL -#ifdef MACE_ENABLE_HEXAGON -#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) #include "mace/core/runtime/hexagon/hexagon_device.h" -#endif // MACE_ENABLE_HEXAGON +#endif namespace mace { namespace { @@ -387,11 +386,11 @@ class MaceEngine::Impl { std::unique_ptr ws_; std::unique_ptr net_; bool is_quantized_model_; -#ifdef MACE_ENABLE_HEXAGON +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) std::unique_ptr hexagon_controller_; #endif - std::map input_info_map_; - std::map output_info_map_; + std::map input_info_map_; + std::map output_info_map_; MACE_DISABLE_COPY_AND_ASSIGN(Impl); }; @@ -404,7 +403,7 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) ws_(new Workspace()), net_(nullptr), is_quantized_model_(false) -#ifdef MACE_ENABLE_HEXAGON +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) , hexagon_controller_(nullptr) #endif { @@ -427,9 +426,9 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) config.impl_->use_gemmlowp())); } #endif -#ifdef MACE_ENABLE_HEXAGON - if (device_type_ == DeviceType::HEXAGON) { - device_.reset(new HexagonDevice()); +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) + if (device_type_ == DeviceType::HEXAGON || device_type_ == DeviceType::HTA) { + device_.reset(new HexagonDevice(device_type_)); } #endif MACE_CHECK_NOTNULL(device_); @@ -481,13 +480,13 @@ MaceStatus MaceEngine::Impl::Init( << "' does not belong to model's outputs " << MakeString(MapKeys(output_info_map_)); } -#ifdef MACE_ENABLE_HEXAGON +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) ws_->CreateTensor(output_name, device_->allocator(), DT_FLOAT); #endif } -#ifdef MACE_ENABLE_HEXAGON - if (device_type_ == HEXAGON) { - hexagon_controller_.reset(new HexagonControlWrapper()); +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) + if (device_type_ == HEXAGON || device_type_ == HTA) { + hexagon_controller_ = CreateHexagonControlWrapper(device_type_); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); hexagon_controller_->SetDebugLevel( @@ -519,7 +518,7 @@ MaceStatus MaceEngine::Impl::Init( ws_->RemoveAndReloadBuffer(*net_def, model_data, device_->allocator()); } MACE_RETURN_IF_ERROR(net_->Init()); -#ifdef MACE_ENABLE_HEXAGON +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) } #endif @@ -541,6 +540,7 @@ MaceStatus MaceEngine::Impl::Init( reinterpret_cast(model_data_->data()))); if (device_type_ == DeviceType::GPU || device_type_ == DeviceType::HEXAGON || + device_type_ == DeviceType::HTA || (device_type_ == DeviceType::CPU && ws_->diffused_buffer())) { model_data_.reset(); } @@ -549,8 +549,8 @@ MaceStatus MaceEngine::Impl::Init( MaceEngine::Impl::~Impl() { LOG(INFO) << "Destroying MaceEngine"; -#ifdef MACE_ENABLE_HEXAGON - if (device_type_ == HEXAGON) { +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) + if (device_type_ == HEXAGON || device_type_ == HTA) { if (VLOG_IS_ON(2)) { hexagon_controller_->GetPerfInfo(); hexagon_controller_->PrintLog(); @@ -699,15 +699,15 @@ MaceStatus MaceEngine::Impl::Run( Tensor *output_tensor = ws_->GetTensor(output.first); output_tensors.push_back(output_tensor); } -#ifdef MACE_ENABLE_HEXAGON - if (device_type_ == HEXAGON) { +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) + if (device_type_ == HEXAGON || device_type_ == HTA) { MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, "HEXAGON not support multiple inputs and outputs yet."); - hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors, true); + hexagon_controller_->ExecuteGraphNew(input_tensors, &output_tensors); } else { #endif MACE_RETURN_IF_ERROR(net_->Run(run_metadata)); -#ifdef MACE_ENABLE_HEXAGON +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) } #endif diff --git a/mace/libmace/mace_version_script.lds b/mace/libmace/mace_version_script.lds index 88f748edcd629658d107cb59e93e35d231309d3e..a088736de4d1e6c0ab07a397ae5d4164689726b7 100644 --- a/mace/libmace/mace_version_script.lds +++ b/mace/libmace/mace_version_script.lds @@ -15,8 +15,7 @@ mace { *mace*NetDef*; *mace*MemoryType*; *mace*DataType*; - *mace*InputInfo*; - *mace*OutputInfo*; + *mace*InputOutputInfo*; *mace*OutputShape*; *mace*OperatorDef*; *mace*ConstTensor*; diff --git a/mace/mace.bzl b/mace/mace.bzl index ee9f8c59dc94ab15dd0749205c4630ce9f4b1ce4..1f577e7e47d02f6ce23391205110687b49d1efdf 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -60,6 +60,19 @@ def if_not_hexagon_enabled(a): "//conditions:default": a, }) +def if_hta_enabled(a): + return select({ + "//mace:hta_enabled": a, + "//conditions:default": [], + }) + +def if_hexagon_or_hta_enabled(a): + return select({ + "//mace:hexagon_enabled": a, + "//mace:hta_enabled": a, + "//conditions:default": [], + }) + def if_openmp_enabled(a): return select({ "//mace:openmp_enabled": a, diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 530de3aedfcd6a94d9ee840f8e368a4447d6cd8c..d3b564fc6a9de2b7b79f9c73df53b3fa9e310788 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -86,21 +86,15 @@ message OperatorDef { } // for hexagon mace-nnlib -message InputInfo { - optional string name = 1; - optional int32 node_id = 2; - repeated int32 dims = 3; - optional int32 max_byte_size = 4; // only support 32-bit len - optional DataType data_type = 5 [default = DT_FLOAT]; - optional int32 data_format = 6 [default = 1]; // NHWC -} -message OutputInfo { +message InputOutputInfo { optional string name = 1; optional int32 node_id = 2; repeated int32 dims = 3; optional int32 max_byte_size = 4; // only support 32-bit len optional DataType data_type = 5 [default = DT_FLOAT]; optional int32 data_format = 6 [default = 1]; // NHWC + optional float scale = 7; + optional int32 zero_point = 8; } message NetDef { @@ -109,6 +103,6 @@ message NetDef { repeated ConstTensor tensors = 3; // for hexagon mace-nnlib - repeated InputInfo input_info = 100; - repeated OutputInfo output_info = 101; + repeated InputOutputInfo input_info = 100; + repeated InputOutputInfo output_info = 101; } diff --git a/mace/public/mace.h b/mace/public/mace.h index 3d210d3801bd899a5ad27951f61a898648845096..1e0bdc187b11b898db7b5c4430c26b3452a35998 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -32,7 +32,7 @@ namespace mace { class NetDef; -enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; +enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3, HTA = 4 }; enum DataFormat { DF_NONE = 0, NHWC = 1, NCHW = 2}; diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index 6d2e31d3de67502b77ac76f051d90a2bc0678659..bd588d0cabde72bb5dd567c52fbeb7d997f826fa 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -37,6 +37,7 @@ FLAGS = None device_type_map = {'cpu': cvt.DeviceType.CPU.value, 'gpu': cvt.DeviceType.GPU.value, 'dsp': cvt.DeviceType.HEXAGON.value, + 'hta': cvt.DeviceType.HTA.value, 'cpu+gpu': cvt.DeviceType.CPU.value} data_format_map = { @@ -53,10 +54,11 @@ def parse_data_type(data_type, device_type): return mace_pb2.DT_FLOAT else: return mace_pb2.DT_HALF - elif device_type == cvt.DeviceType.HEXAGON.value: + elif device_type == cvt.DeviceType.HEXAGON.value or \ + device_type == cvt.DeviceType.HTA.value: return mace_pb2.DT_FLOAT else: - print("Invalid device type: " + device_type) + print("Invalid device type: " + str(device_type)) def file_checksum(fname): @@ -121,7 +123,7 @@ def main(unused_args): six.print_("platform %s is not supported." % FLAGS.platform, file=sys.stderr) sys.exit(-1) - if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'cpu+gpu']: + if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'hta', 'cpu+gpu']: six.print_("runtime %s is not supported." % FLAGS.runtime, file=sys.stderr) sys.exit(-1) @@ -220,7 +222,8 @@ def main(unused_args): option, output_graph_def) output_graph_def, quantize_activation_info = mace_transformer.run() - if FLAGS.runtime == 'dsp': + if option.device in [cvt.DeviceType.HEXAGON.value, + cvt.DeviceType.HTA.value]: from mace.python.tools.converter_tool import hexagon_converter converter = hexagon_converter.HexagonConverter( option, output_graph_def, quantize_activation_info) diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 204b1d21fe79d0cd0c266933f2b4d80591eaf24e..fbfb1b0239693c05d14236ec841f073654090062 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -22,6 +22,7 @@ class DeviceType(Enum): CPU = 0 GPU = 2 HEXAGON = 3 + HTA = 4 class DataFormat(Enum): diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py index 081adde9c0881caa57bc81a9aaa8118c8e77bcf4..53598243b247094ce43b5a832b65d1498c796547 100644 --- a/mace/python/tools/converter_tool/hexagon_converter.py +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -20,6 +20,7 @@ from operator import mul from mace.proto import mace_pb2 from mace.python.tools.converter_tool import base_converter from mace.python.tools.converter_tool.base_converter import ConverterUtil +from mace.python.tools.converter_tool.base_converter import DeviceType from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import MaceOp @@ -36,6 +37,8 @@ HexagonSupportedOps = [ 'BatchToSpaceND_8', 'DepthwiseSupernode_8x8p32to8', 'DequantizeOUTPUT_8tof', + 'INPUT', + 'OUTPUT', 'QuantizedAdd_8p8to8', 'QuantizedAvgPool_8', 'QuantizedConcat_8', @@ -332,7 +335,7 @@ class HexagonConverter(base_converter.ConverterInterface): else: op.type = self._hexagon_ops.map_nn_op(op.type) - def add_min_max(self, name, val): + def add_const_node(self, name, val): if name not in self._consts: tensor = self._model.tensors.add() self._consts[name] = tensor @@ -364,14 +367,14 @@ class HexagonConverter(base_converter.ConverterInterface): min_tensor_name = op + ':1' else: min_tensor_name = op + '_min:0' - self.add_min_max(min_tensor_name, minval) + self.add_const_node(min_tensor_name, minval) this_op.input.extend([min_tensor_name]) if add_max: if is_activation and diff_port: max_tensor_name = op + ':2' else: max_tensor_name = op + '_max:0' - self.add_min_max(max_tensor_name, maxval) + self.add_const_node(max_tensor_name, maxval) this_op.input.extend([max_tensor_name]) def add_shape_const_node(self, op, values, name): @@ -382,27 +385,48 @@ class HexagonConverter(base_converter.ConverterInterface): tensor.dims.extend(values) return tensor.name - def add_input_output_node(self): - for op in self._model.op: - if op.name.startswith(MaceKeyword.mace_input_node_name): - del op.input[0] - break + def add_constant_min_max_for_first_op(self, op): + minval = self._quantize_activation_info[op.input[0]].minval + maxval = self._quantize_activation_info[op.input[0]].maxval + input_op, _ = get_op_and_port_from_tensor(op.input[0]) + input_min = input_op + '_min:0' + input_max = input_op + '_max:0' + self.add_const_node(input_min, minval) + self.add_const_node(input_max, maxval) + for i in range(len(op.input)): + if op.input[i] == input_op + ':1': + op.input[i] = input_min + elif op.input[i] == input_op + ':2': + op.input[i] = input_max - output_node = None - if not self._option.check_nodes: - output_name = list(self._option.output_nodes.values())[0].name - else: - output_name = list(self._option.check_nodes.values())[0].name - output_name = normalize_name(output_name) - for op in self._model.op: - if op.name == output_name: - output_node = op - break - mace_check(output_node is not None, - "mace_output_node_* not found.") - del output_node.output_shape[:] - del output_node.output_type[:] - del output_node.out_max_byte_size[:] + def add_input_output_node(self): + mace_check( + self._model.op[0].type == HexagonOp.QuantizeINPUT_f_to_8.name, + "Not started with Quantize op.") + quantize_input_op = self._model.op[0] + del quantize_input_op.input[:] + + mace_check( + self._model.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name, + "Not ended with Dequantize op.") + dequantize_output_op = self._model.op[-1] + del dequantize_output_op.output_shape[:] + del dequantize_output_op.output_type[:] + del dequantize_output_op.out_max_byte_size[:] + + if self._option.device == DeviceType.HTA.value: + # replace QuantizeINPUT_f_to_8 with INPUT + quantize_input_op.type = HexagonOp.INPUT.name + del quantize_input_op.output_shape[1:] + del quantize_input_op.output_type[1:] + del quantize_input_op.out_max_byte_size[1:] + + # replace first op's input min max with constant + self.add_constant_min_max_for_first_op(self._model.op[1]) + + # replace DequantizeOUTPUT_8tof with OUTPUT + dequantize_output_op.type = HexagonOp.OUTPUT.name + del dequantize_output_op.input[1:] def add_node_id(self): node_id_counter = 0 diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 02fc3a12ace0808e083482ea93ae9fe2d6a7c65a..a3976913c76e70b5fcc48e498c0a3683c78c7005 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -1174,7 +1174,8 @@ class Transformer(base_converter.ConverterInterface): self.set_filter_format(FilterFormat.OHWI) elif self._option.quantize and \ - self._option.device == DeviceType.HEXAGON.value: + (self._option.device == DeviceType.HEXAGON.value or + self._option.device == DeviceType.HTA.value): print("Transpose filters to HWIO/HWIM") mace_check(filter_format == FilterFormat.HWIO, "HEXAGON only support HWIO/HWIM filter format.") @@ -1456,7 +1457,7 @@ class Transformer(base_converter.ConverterInterface): % (op.name, op.type, mace_pb2.DataType.Name(data_type_arg.i))) - for input_node in self._option.input_nodes.values(): + for i, input_node in enumerate(self._option.input_nodes.values()): new_input_name = self.input_name_map[input_node.name] op_def = self._model.op.add() op_def.name = self.normalize_op_name(new_input_name) @@ -1465,8 +1466,10 @@ class Transformer(base_converter.ConverterInterface): op_def.output.extend([new_input_name]) output_shape = op_def.output_shape.add() output_shape.dims.extend(input_node.shape) - self.copy_quantize_info( - op_def, self._quantize_activation_info[new_input_name]) + quantize_info = self._quantize_activation_info[new_input_name] + self.copy_quantize_info(op_def, quantize_info) + self._model.input_info[i].scale = quantize_info.scale + self._model.input_info[i].zero_point = quantize_info.zero_point ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) ConverterUtil.add_data_format_arg(op_def, DataFormat.NHWC) @@ -1477,16 +1480,19 @@ class Transformer(base_converter.ConverterInterface): find_range_every_time_arg.i = 1 output_nodes = self._option.check_nodes.values() - for output_node in output_nodes: + for i, output_node in enumerate(output_nodes): op_def = self._model.op.add() op_def.name = self.normalize_op_name(output_node.name) op_def.type = MaceOp.Dequantize.name op_def.input.extend([self.output_name_map[output_node.name]]) op_def.output.extend([output_node.name]) output_shape = op_def.output_shape.add() - output_shape.dims.extend( - self._producer[output_node.name].output_shape[0].dims) + producer_op = self._producer[output_node.name] + output_shape.dims.extend(producer_op.output_shape[0].dims) op_def.output_type.extend([mace_pb2.DT_FLOAT]) + quantize_info = producer_op.quantize_info[0] + self._model.output_info[i].scale = quantize_info.scale + self._model.output_info[i].zero_point = quantize_info.zero_point ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) @@ -1533,7 +1539,8 @@ class Transformer(base_converter.ConverterInterface): quantized_tensor = \ quantize_util.quantize_with_scale_and_zero( tensor.float_data, scale, 0) - elif self._option.device == DeviceType.HEXAGON.value: + elif self._option.device == DeviceType.HEXAGON.value or \ + self._option.device == DeviceType.HTA.value: quantized_tensor = \ quantize_util.quantize_bias_for_hexagon( tensor.float_data) @@ -1691,7 +1698,7 @@ class Transformer(base_converter.ConverterInterface): return False print("Add default quantize info for input") - for input_node in self._option.input_nodes.values(): + for i, input_node in enumerate(self._option.input_nodes.values()): if input_node.name not in self._quantize_activation_info: print("Input range %s: %s" % (input_node.name, str(input_node.range))) diff --git a/mace/python/tools/model.jinja2 b/mace/python/tools/model.jinja2 index c7d936c0318527423e8b251b06647048c446a17a..89bee8d8f9dba8ce27ff97ff016381eb7b9da5e7 100644 --- a/mace/python/tools/model.jinja2 +++ b/mace/python/tools/model.jinja2 @@ -75,7 +75,7 @@ void CreateNetArg(NetDef *net_def) { {% if net.input_info | length > 0 %} void CreateInputInfo(NetDef *net_def) { net_def->mutable_input_info()->Reserve({{ net.input_info | length }}); - InputInfo *input_info = nullptr; + InputOutputInfo *input_info = nullptr; {% for idx in range(net.input_info|length) %} input_info = net_def->add_input_info(); input_info->set_name({{ net.input_info[idx].name|tojson }}); @@ -92,7 +92,7 @@ void CreateInputInfo(NetDef *net_def) { {% if net.output_info | length > 0 %} void CreateOutputInfo(NetDef *net_def) { net_def->mutable_output_info()->Reserve({{ net.output_info | length }}); - OutputInfo *output_info = nullptr; + InputOutputInfo *output_info = nullptr; {% for idx in range(net.output_info|length) %} output_info = net_def->add_output_info(); output_info->set_name({{ net.output_info[idx].name|tojson }}); diff --git a/mace/test/BUILD.bazel b/mace/test/BUILD.bazel index 36a2b6472d46db4360b1840b6031f32f94212e40..a5c5f974552dd13b35faff26f7e14266e042b3fc 100644 --- a/mace/test/BUILD.bazel +++ b/mace/test/BUILD.bazel @@ -11,6 +11,7 @@ load( "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled", + "if_hta_enabled", "if_opencl_enabled", "if_quantize_enabled", ) @@ -45,6 +46,8 @@ cc_test( "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", + ]) + if_hta_enabled([ + "-DMACE_ENABLE_HTA", ]), linkopts = ["-fopenmp"], linkstatic = 1, @@ -78,6 +81,8 @@ cc_test( "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", + ]) + if_hta_enabled([ + "-DMACE_ENABLE_HTA", ]), linkopts = ["-fopenmp"], linkstatic = 1, @@ -111,6 +116,8 @@ cc_test( "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", + ]) + if_hta_enabled([ + "-DMACE_ENABLE_HTA", ]), linkopts = ["-fopenmp"], linkstatic = 1, @@ -143,6 +150,8 @@ cc_test( "-DMACE_ENABLE_QUANTIZE", ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", + ]) + if_hta_enabled([ + "-DMACE_ENABLE_HTA", ]), linkopts = ["-fopenmp"], linkstatic = 1, diff --git a/mace/test/mace_api_exception_test.cc b/mace/test/mace_api_exception_test.cc index 075b04b40c7467d2d6a6dff10b6cb245521b68f5..232023dace17584f49c15a499b196c538f6598eb 100644 --- a/mace/test/mace_api_exception_test.cc +++ b/mace/test/mace_api_exception_test.cc @@ -29,7 +29,7 @@ TEST(MaceAPIExceptionTest, WrongInputTest) { std::shared_ptr net_def(new NetDef()); for (size_t i = 0; i < input_names.size(); ++i) { - InputInfo *info = net_def->add_input_info(); + InputOutputInfo *info = net_def->add_input_info(); info->set_name(input_names[i]); } diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index f13d05b621c9d32e659b3b908b7fe85836112b7a..ee14129a05dd23d7d2fa6b3bcc491da375c12096 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -45,7 +45,7 @@ void MaceRunFunc(const int in_out_size) { filter_tensor_name, filter_shape, 0, data.size(), net_def.get()); for (size_t i = 0; i < input_names.size(); ++i) { - InputInfo *info = net_def->add_input_info(); + InputOutputInfo *info = net_def->add_input_info(); info->set_data_format(DataFormat::NHWC); info->set_name(input_names[i]); for (auto d : input_shapes[0]) { @@ -53,7 +53,7 @@ void MaceRunFunc(const int in_out_size) { } } for (size_t i = 0; i < output_names.size(); ++i) { - OutputInfo *info = net_def->add_output_info(); + InputOutputInfo *info = net_def->add_output_info(); info->set_name(output_names[i]); } for (size_t i = 0; i < output_names.size(); ++i) { diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index baff89112786ea9ac569f06007c96d81cffa6bd7..0a852a17a9a9cfd6a7d331556b1ad1b1a85e397a 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -44,7 +44,7 @@ void MaceRun(const int in_out_size, AddTensor(filter_tensor_name, filter_shape, 0, data.size(), net_def.get()); for (size_t i = 0; i < input_names.size(); ++i) { - InputInfo *info = net_def->add_input_info(); + InputOutputInfo *info = net_def->add_input_info(); info->set_data_format(DataFormat::NHWC); info->set_name(input_names[i]); for (auto d : max_shape) { @@ -52,7 +52,7 @@ void MaceRun(const int in_out_size, } } for (size_t i = 0; i < output_names.size(); ++i) { - OutputInfo *info = net_def->add_output_info(); + InputOutputInfo *info = net_def->add_output_info(); info->set_name(output_names[i]); } for (size_t i = 0; i < output_names.size(); ++i) { diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index 4dd885a55992300f83c268fc704975272a4ae71d..d1139e519cd78d5e815f3da66808567a310da039 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -76,6 +76,8 @@ DeviceType ParseDeviceType(const std::string &device_str) { return DeviceType::GPU; } else if (device_str.compare("HEXAGON") == 0) { return DeviceType::HEXAGON; + } else if (device_str.compare("HTA") == 0) { + return DeviceType::HTA; } else { return DeviceType::CPU; } diff --git a/third_party/hta/BUILD b/third_party/hta/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..7385472755eab0a1fb75df4bb089a63aa01e110e --- /dev/null +++ b/third_party/hta/BUILD @@ -0,0 +1,31 @@ +# These files are generated fron nnlib project + +licenses(["notice"]) + +exports_files(["license.txt"]) + +load( + "//mace:mace.bzl", + "if_android_armv7", + "if_android_arm64", +) + +cc_library( + name = "hta", + srcs = if_android_armv7([ + "armeabi-v7a/libhta_controller.so", + "armeabi-v7a/libhta_hexagon_runtime.so", + "armeabi-v7a/libnpu.so", + ]) + if_android_arm64([ + "arm64-v8a/libcdsprpc.so", + "arm64-v8a/libhta_controller.so", + "arm64-v8a/libhta_hexagon_runtime.so", + "arm64-v8a/libnpu.so", + ]), + hdrs = [ + "hta_hexagon_api.h", + "hta_hexagon_nn_ops.h", + "hta_ops.h", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/hta/arm64-v8a/libcdsprpc.so b/third_party/hta/arm64-v8a/libcdsprpc.so new file mode 100755 index 0000000000000000000000000000000000000000..57de01f4887197b0b510f395f828289d74597069 Binary files /dev/null and b/third_party/hta/arm64-v8a/libcdsprpc.so differ diff --git a/third_party/hta/arm64-v8a/libhta_controller.so b/third_party/hta/arm64-v8a/libhta_controller.so new file mode 100644 index 0000000000000000000000000000000000000000..3cb5ea31a24d319779521454720c3b587120d2e0 Binary files /dev/null and b/third_party/hta/arm64-v8a/libhta_controller.so differ diff --git a/third_party/hta/arm64-v8a/libhta_hexagon_runtime.so b/third_party/hta/arm64-v8a/libhta_hexagon_runtime.so new file mode 100644 index 0000000000000000000000000000000000000000..32b5d784a19a6390ffe25f4c4e4853172b4d5074 Binary files /dev/null and b/third_party/hta/arm64-v8a/libhta_hexagon_runtime.so differ diff --git a/third_party/hta/arm64-v8a/libnpu.so b/third_party/hta/arm64-v8a/libnpu.so new file mode 100644 index 0000000000000000000000000000000000000000..9b6633769db106f516ac7cfebea0b40b491996e1 Binary files /dev/null and b/third_party/hta/arm64-v8a/libnpu.so differ diff --git a/third_party/hta/armeabi-v7a/libhta_controller.so b/third_party/hta/armeabi-v7a/libhta_controller.so new file mode 100644 index 0000000000000000000000000000000000000000..03b267889d96e74b965fd485313d35ce59b8bc97 Binary files /dev/null and b/third_party/hta/armeabi-v7a/libhta_controller.so differ diff --git a/third_party/hta/armeabi-v7a/libhta_hexagon_runtime.so b/third_party/hta/armeabi-v7a/libhta_hexagon_runtime.so new file mode 100644 index 0000000000000000000000000000000000000000..9136f520d74901ca068c5377eccb578978ca9fa6 Binary files /dev/null and b/third_party/hta/armeabi-v7a/libhta_hexagon_runtime.so differ diff --git a/third_party/hta/armeabi-v7a/libnpu.so b/third_party/hta/armeabi-v7a/libnpu.so new file mode 100644 index 0000000000000000000000000000000000000000..a88605929cfdca12ecd720749064d880a6d48ab4 Binary files /dev/null and b/third_party/hta/armeabi-v7a/libnpu.so differ diff --git a/third_party/hta/hta_hexagon_api.h b/third_party/hta/hta_hexagon_api.h new file mode 100644 index 0000000000000000000000000000000000000000..cb13fe62bcd8bbdcb8f50f4dfb725df292aa87fd --- /dev/null +++ b/third_party/hta/hta_hexagon_api.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted (subject to the limitations in the + * disclaimer below) provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE + * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef THIRD_PARTY_HTA_HEXAGON_API_H_ +#define THIRD_PARTY_HTA_HEXAGON_API_H_ + +#include "hta_hexagon_nn_ops.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int hexagon_hta_nn_nn_id; + +struct input { + uint32_t src_id; + uint32_t output_idx; +}; + +#define NODE_ID_RESERVED_CONSTANT 0 + +#define MAX_DIMENSIONS 8 +struct output { + uint32_t rank; // dimensions in the tensor + uint32_t max_sizes[MAX_DIMENSIONS]; // max num elements in each dimension + uint32_t elementsize; // size of each element + int32_t zero_offset; // 0 for float / integer values + float stepsize; // 0 for float/integer values +}; + +struct perfinfo { + uint32_t node_id; + uint32_t executions; + union { + uint64_t counter; + struct { + uint32_t counter_lo; + uint32_t counter_hi; + }; + }; +}; + +typedef struct input hexagon_hta_nn_input; +typedef struct output hexagon_hta_nn_output; +typedef struct perfinfo hexagon_hta_nn_perfinfo; +typedef int32_t hexagon_hta_nn_padding_type; + +typedef enum padding_type_enum { + HTA_NN_PAD_NA = 0, + HTA_NN_PAD_SAME, + HTA_NN_PAD_VALID, + HTA_NN_PAD_MIRROR_REFLECT, + HTA_NN_PAD_MIRROR_SYMMETRIC, + HTA_NN_PAD_SAME_CAFFE, +} hta_padding_type; + +typedef struct { + unsigned int batches; + unsigned int height; + unsigned int width; + unsigned int depth; + unsigned char *data; + int dataLen; /* For input and output */ + unsigned int data_valid_len; /* for output only */ + unsigned int unused; +} hexagon_hta_nn_tensordef; + +typedef struct hexagon_nn_op_node hexagon_nn_op_node; +struct hexagon_nn_op_node { + unsigned int node_id; + hta_op_type operation; + hta_padding_type padding; + hexagon_hta_nn_input* inputs; + int inputsLen; + hexagon_hta_nn_output* outputs; + int outputsLen; +}; +typedef struct hexagon_nn_const_node hexagon_nn_const_node; +struct hexagon_nn_const_node { + unsigned int node_id; + hexagon_hta_nn_tensordef tensor; +}; + +/* Actual functions in the interface */ +/* Returns 0 on success, nonzero on error unless otherwise noted */ +/* Configure the hardware and software environment. Should be called once before doing anything */ +int hexagon_hta_nn_config( void ); + +/* Initialize a new graph, returns a new nn_id or -1 on error */ +int hexagon_hta_nn_init(hexagon_hta_nn_nn_id *g); + +/* Set debug verbosity. Default is 0, higher values are more verbose */ +int hexagon_hta_nn_set_debug_level(hexagon_hta_nn_nn_id id, int level); + +/* Append a node to the graph. Nodes are executed in the appended order. */ +int hexagon_hta_nn_append_node( + hexagon_hta_nn_nn_id id, + uint32_t node_id, + hta_op_type operation, + hta_padding_type padding, + const struct input *inputs, + uint32_t num_inputs, + const struct output *outputs, + uint32_t num_outputs); + +/* + * Append a const node into the graph. The data is copied locally during this + * call, the caller does not need it to persist. + */ +int hexagon_hta_nn_append_const_node( + hexagon_hta_nn_nn_id id, + uint32_t node_id, + uint32_t batches, + uint32_t height, + uint32_t width, + uint32_t depth, + const uint8_t *data, + uint32_t data_len); + +/* + * Prepare a graph for execution. Must be done before attempting to execute the graph. + */ +int hexagon_hta_nn_prepare(hexagon_hta_nn_nn_id id); + +/* Execute the graph with a single input and a single output. */ +int hexagon_hta_nn_execute( + hexagon_hta_nn_nn_id id, + uint32_t batches_in, + uint32_t height_in, + uint32_t width_in, + uint32_t depth_in, + const uint8_t *data_in, + uint32_t data_len_in, + uint32_t *batches_out, + uint32_t *height_out, + uint32_t *width_out, + uint32_t *depth_out, + uint8_t *data_out, + uint32_t data_out_max, + uint32_t *data_out_size); + +/* Tear down a graph, destroying it and freeing resources. */ +int hexagon_hta_nn_teardown(hexagon_hta_nn_nn_id id); + +/* Get the version of the library */ +int hexagon_hta_nn_version(int *ver); + +/* Execute the graph with a multiple input and a multiple output. */ +int hexagon_hta_nn_execute_new( + hexagon_hta_nn_nn_id id, + const hexagon_hta_nn_tensordef *inputs, + uint32_t n_inputs, + hexagon_hta_nn_tensordef *outputs, + uint32_t n_outputs); + +int hexagon_hta_nn_serialize_size(hexagon_hta_nn_nn_id id, unsigned int *serialized_obj_size_out); +int hexagon_hta_nn_serialize(hexagon_hta_nn_nn_id id, void *buf, unsigned int buf_len); +int hexagon_hta_nn_deserialize(void *buf, unsigned len, hexagon_hta_nn_nn_id *g); + +#ifdef __cplusplus +} +#endif + +#endif //THIRD_PARTY_HTA_HEXAGON_API_H_ diff --git a/third_party/hta/hta_hexagon_nn_ops.h b/third_party/hta/hta_hexagon_nn_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e2aaa5881c842d12892d21dead102efad08df270 --- /dev/null +++ b/third_party/hta/hta_hexagon_nn_ops.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted (subject to the limitations in the + * disclaimer below) provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE + * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef THIRD_PARTY_HTA_HEXAGON_NN_OPS_H_ +#define THIRD_PARTY_HTA_HEXAGON_NN_OPS_H_ + +typedef enum hta_op_type_enum { +#define HTA_DEF_OP(NAME, ...) HTA_OP_##NAME, + +#include "hta_ops.h" + HTA_NN_OPS_MAX + +#undef HTA_DEF_OP +} hta_op_type; + +#endif // THIRD_PARTY_HTA_HEXAGON_NN_OPS_H_ diff --git a/third_party/hta/hta_ops.h b/third_party/hta/hta_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3becf1d3a79534131a8cfb3c9508bada52752623 --- /dev/null +++ b/third_party/hta/hta_ops.h @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted (subject to the limitations in the + * disclaimer below) provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE + * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * You probably want to + * + * ## ##### ##### + * # # # # # # + * # # # # # # + * ###### # # # # + * # # # # # # + * # # ##### ##### + * + * + * # # #### ##### ###### #### + * ## # # # # # # # + * # # # # # # # ##### #### + * # # # # # # # # # + * # ## # # # # # # # + * # # #### ##### ###### #### + * + * + * ## ##### + * # # # + * # # # + * ###### # + * # # # + * # # # + * + * + * ##### # # ###### + * # # # # + * # ###### ##### + * # # # # + * # # # # + * # # # ###### + * + * + * ###### # # ##### + * # ## # # # + * ##### # # # # # + * # # # # # # + * # # ## # # + * ###### # # ##### + * + * otherwise the interface becomes incompatible. + */ +HTA_DEF_OP(INPUT) +HTA_DEF_OP(OUTPUT) +HTA_DEF_OP(Nop) +HTA_DEF_OP(Const) +HTA_DEF_OP(Check) +HTA_DEF_OP(Close_f) +HTA_DEF_OP(Close_quint8) +HTA_DEF_OP(Close_q_quint8) +HTA_DEF_OP(Close_int32) +HTA_DEF_OP(Close_qint32) +HTA_DEF_OP(PPrint_8) +HTA_DEF_OP(PPrint_32) +HTA_DEF_OP(PPrint_f) +HTA_DEF_OP(PreFree) +HTA_DEF_OP(Flatten) + +#ifndef HTA_DEF_OP_WREF +#define HTA_DEF_OP_WREF(NAME) HTA_DEF_OP(NAME) HTA_DEF_OP(NAME##_ref) +#define __SELF_HTA_DEF_OP_WREF +#endif + +HTA_DEF_OP_WREF(QuantizedConv2d_8x8to32) +HTA_DEF_OP_WREF(QuantizedMatMul_8x8to32) +HTA_DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8) +HTA_DEF_OP_WREF(QuantizedRelu_8) +HTA_DEF_OP_WREF(QuantizedReluX_8) +HTA_DEF_OP_WREF(QuantizedMaxPool_8) +HTA_DEF_OP_WREF(QuantizedAvgPool_8) +HTA_DEF_OP_WREF(QuantizedL2Pool_8) +HTA_DEF_OP_WREF(QuantizedConcat_8) +HTA_DEF_OP_WREF(QuantizedBiasAdd_8p8to32) +HTA_DEF_OP_WREF(Min_f) +HTA_DEF_OP_WREF(Max_f) +HTA_DEF_OP_WREF(Quantize) +HTA_DEF_OP_WREF(Dequantize) +HTA_DEF_OP_WREF(Supernode_8x8p8to8) + +HTA_DEF_OP(QuantizedFlatten) +HTA_DEF_OP(Softmax_f) +HTA_DEF_OP(Conv2d_f) +HTA_DEF_OP(MatMul_f) +HTA_DEF_OP(Relu_f) +HTA_DEF_OP(ReluX_f) +HTA_DEF_OP(AvgPool_f) +HTA_DEF_OP(L2Pool_f) +HTA_DEF_OP(MaxPool_f) +HTA_DEF_OP(Concat_f) +HTA_DEF_OP(BiasAdd_f) +HTA_DEF_OP(LRN_f) + +HTA_DEF_OP(Variable) +HTA_DEF_OP(Assign) +HTA_DEF_OP(Reshape) +HTA_DEF_OP(QuantizedReshape) +HTA_DEF_OP(Tanh_f) +HTA_DEF_OP(Sigmoid_f) +HTA_DEF_OP(Slice_8) +HTA_DEF_OP(Slice_f) +HTA_DEF_OP(QuantizedSlice_8) +HTA_DEF_OP(Add_f) +HTA_DEF_OP(Mul_f) +HTA_DEF_OP(Minimum_f) +HTA_DEF_OP(Maximum_f) + +HTA_DEF_OP_WREF(Requantize_32to8) +HTA_DEF_OP_WREF(RequantizationRange_32) + +HTA_DEF_OP(Neg_f) +HTA_DEF_OP(Sub_f) +HTA_DEF_OP(AddN_f) +HTA_DEF_OP(Range_int32) +HTA_DEF_OP(Rank_int32) +HTA_DEF_OP(Transpose_int32) +HTA_DEF_OP(Transpose_f) +HTA_DEF_OP(InstanceNorm_f) +HTA_DEF_OP_WREF(QuantizedInstanceNorm_8) +HTA_DEF_OP(Sub_int32) +HTA_DEF_OP(Add_int32) +HTA_DEF_OP(Split_f) +HTA_DEF_OP(Dequantize_qint32_f) +HTA_DEF_OP(PRelu_f) +HTA_DEF_OP_WREF(QuantizedPRelu_8) +HTA_DEF_OP(Sum_f) +HTA_DEF_OP(Prod_f) +HTA_DEF_OP(Mul_int32) +HTA_DEF_OP(LogicalAnd_int32) +HTA_DEF_OP(LogicalOr_int32) +HTA_DEF_OP(LogicalXor_int32) +HTA_DEF_OP(Shape_int32) +HTA_DEF_OP(Pack_int32) +HTA_DEF_OP(MirrorPad_f) +HTA_DEF_OP(ResizeNearestNeighbor_f) +HTA_DEF_OP(StridedSlice_int32) +HTA_DEF_OP(StridedSlice_f) +HTA_DEF_OP(ExpandDims_int32) +HTA_DEF_OP(ExpandDims_f) + +HTA_DEF_OP(LogSoftmax_f) +HTA_DEF_OP(Split_int32) +HTA_DEF_OP(QuantizedSplit_8) + +HTA_DEF_OP(Deconv_f) +HTA_DEF_OP_WREF(QuantizedDeconv_8x8to32) + +HTA_DEF_OP_WREF(QuantizedMul_8x8to32) +HTA_DEF_OP_WREF(QuantizedAdd_8p8to32) +HTA_DEF_OP_WREF(QuantizedSigmoid_8) +HTA_DEF_OP_WREF(QuantizedTanh_8) +HTA_DEF_OP_WREF(QuantizedSoftmax_8) +HTA_DEF_OP_WREF(QuantizedLRN_8) +HTA_DEF_OP_WREF(Quantizedpad2d_frame_8p) +HTA_DEF_OP_WREF(QuantizedSub_8p8to32) +HTA_DEF_OP_WREF(QuantizedMaximum_8) +HTA_DEF_OP_WREF(QuantizedMinimum_8) + +HTA_DEF_OP(Pad_f) +HTA_DEF_OP(SpaceToBatchND_f) +HTA_DEF_OP(BatchToSpaceND_f) +HTA_DEF_OP(QuantizedPad_8) +HTA_DEF_OP(ResizeBilinear_f) +HTA_DEF_OP(ConcatV2_f) +HTA_DEF_OP(ConcatV2_int32) +HTA_DEF_OP(Prod_int32) +HTA_DEF_OP(Slice_int32) + +HTA_DEF_OP(QuantizedAdd_8p8to8) +HTA_DEF_OP(QuantizedResizeBilinear_8) +HTA_DEF_OP(Supernode_8x8p8to8_d32) +HTA_DEF_OP(Convert_to_d32) +HTA_DEF_OP(Convert_from_d32) +HTA_DEF_OP_WREF(QuantizedMaxPool_8_d32) +HTA_DEF_OP_WREF(QuantizedConcat_8_d32) +HTA_DEF_OP_WREF(QuantizedAvgPool_8_d32) + +HTA_DEF_OP(Sink) + +HTA_DEF_OP_WREF(QuantizedPRelu_8_d32) +HTA_DEF_OP_WREF(AutoQuantize) +HTA_DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32) +HTA_DEF_OP_WREF(DepthwiseConv2d_f) +HTA_DEF_OP(DepthwiseSupernode_8x8p8to8) +HTA_DEF_OP(DepthwiseSupernode_8x8p8to8_d32) + +HTA_DEF_OP_WREF(QuantizedMul_8x8to8_d32) + +HTA_DEF_OP(FullyConnected_u8) +#if 0 +HTA_DEF_OP_WREF(QuantizedFC_8x8p8to8) +#endif + +HTA_DEF_OP_WREF(QuantizedAdd_8p8to8_d32) + +HTA_DEF_OP_WREF(QuantizedClamp_8) +HTA_DEF_OP(Clamp_f) +HTA_DEF_OP(QuantizeForTest_d32) +HTA_DEF_OP(Close_d32) +HTA_DEF_OP_WREF(QuantizedSub_8p8to8_d32) + +HTA_DEF_OP(InputSupernode_8x8p8to8_outd32) +HTA_DEF_OP(QuantizedLRN_8_d32) +HTA_DEF_OP_WREF(QuantizedBiasAdd_32p32to32) +HTA_DEF_OP_WREF(Quantize_int32) + +HTA_DEF_OP(Supernode_8x8p32to8) +HTA_DEF_OP(DepthwiseSupernode_8x8p32to8) +HTA_DEF_OP(Supernode_8x8p32to8_d32) +HTA_DEF_OP(DepthwiseSupernode_8x8p32to8_d32) +HTA_DEF_OP(InputSupernode_8x8p32to8_outd32) + +HTA_DEF_OP(PPrint_8_d32) +HTA_DEF_OP(PPrintWithPadding_8_d32) +HTA_DEF_OP_WREF(AutoQuantize_d32) + +HTA_DEF_OP_WREF(QuantizedTanh_8_d32) +HTA_DEF_OP_WREF(QuantizedSigmoid_8_d32) +HTA_DEF_OP_WREF(QuantizedSoftmax_8_d32) + + +HTA_DEF_OP_WREF(QuantizedL2Pool_8_d32) + +HTA_DEF_OP(Gather_f) +HTA_DEF_OP(Gather_int32) +HTA_DEF_OP(Gather_8) +HTA_DEF_OP(Table_f) +HTA_DEF_OP(Table_int32) +HTA_DEF_OP(Table_8) + +HTA_DEF_OP(FillPadding_8_d32) +HTA_DEF_OP(QuantizedResizeBilinear_8_d32) + +HTA_DEF_OP(QuantizeINPUT_f_to_8) +HTA_DEF_OP_WREF(DeconvBias_8x8to32) + +HTA_DEF_OP(SpaceToBatchND_8) +HTA_DEF_OP(BatchToSpaceND_8) + + +HTA_DEF_OP(SpaceToDepth_f) +HTA_DEF_OP(DepthToSpace_f) +HTA_DEF_OP(SpaceToDepth_8) +HTA_DEF_OP(DepthToSpace_8) + +HTA_DEF_OP(DequantizeOUTPUT_8tof) +HTA_DEF_OP(QuantizedBatchNorm_8x8p8to8) +HTA_DEF_OP(QuantizedBatchNorm_8x8p32to8) +HTA_DEF_OP(QuantizedBatchNorm_8x8p8to8_d32) +HTA_DEF_OP(QuantizedBatchNorm_8x8p32to8_d32) + +HTA_DEF_OP_WREF(QuantizedInstanceNorm_8_d32) +HTA_DEF_OP_WREF(QuantizedInstanceNormBG_8) +HTA_DEF_OP_WREF(QuantizedInstanceNormBG_8_d32) + +HTA_DEF_OP(SuperFC_8x8p32to8) +HTA_DEF_OP(SuperFC_8x8p32to8_ref) +HTA_DEF_OP(SuperFC_8x8p32to8_d32) + +HTA_DEF_OP(ChannelShuffle_f) +HTA_DEF_OP(ChannelShuffle_int32) +HTA_DEF_OP_WREF(QuantizedChannelShuffle_8) +HTA_DEF_OP(QuantizedChannelShuffle_8_d32) +/* this is in op_chanshuffle_d32.c*/ +HTA_DEF_OP(QuantizedSplit_8_d32) + +HTA_DEF_OP(QuantizedCrop_8) +HTA_DEF_OP(ResizeUnitSquare_f) +HTA_DEF_OP_WREF(ResizeUnitSquare_8) +HTA_DEF_OP_WREF(Nv21ToRgb_8) +HTA_DEF_OP_WREF(RgbaToRgb_8) +HTA_DEF_OP_WREF(Argb32ToRgb_8) +HTA_DEF_OP(Permute_f) +HTA_DEF_OP(QuantizedPermute_8) +HTA_DEF_OP_WREF(QuantizedRoiPool_8) +HTA_DEF_OP(Proposal_f) +HTA_DEF_OP(RoiAlign_f) +HTA_DEF_OP_WREF(QuantizedRoiAlign_8) +HTA_DEF_OP_WREF(Implode_8) +HTA_DEF_OP(QuantizedConcat_8_nond32) + +HTA_DEF_OP(Close_16tof) +HTA_DEF_OP(QuantizedLstmInput_16x16to16) +HTA_DEF_OP(QuantizedLstmOutput_16x16to8) + +HTA_DEF_OP(Quantize_16) +HTA_DEF_OP(Dequantize_16) +HTA_DEF_OP(Convert_8_16) +HTA_DEF_OP(QuantizedTanh_16) +HTA_DEF_OP(QuantizedSigmoid_16) + +HTA_DEF_OP_WREF(QuantizeDownAndShrinkRange_32to16) +HTA_DEF_OP_WREF(Requantize_32to16) +HTA_DEF_OP_WREF(QuantizedMatMul_8x8p32to16) + +HTA_DEF_OP(QuantizedStridedSlice_8) +HTA_DEF_OP(Bbox_Transform_f) +HTA_DEF_OP(Softmax_uint8) + +HTA_DEF_OP(QuantizedFakeConcat_8_d32) + +HTA_DEF_OP(DepthToSpace_8_d32) +HTA_DEF_OP(OemNode) + +HTA_DEF_OP(QuantizedPad_8_d32) +// Add new operations above this line +#ifdef __SELF_HTA_DEF_OP_WREF +#undef __SELF_HTA_DEF_OP_WREF +#undef HTA_DEF_OP_WREF +#endif + diff --git a/third_party/hta/libhta_dsp_skel.so b/third_party/hta/libhta_dsp_skel.so new file mode 100644 index 0000000000000000000000000000000000000000..6a371cfef8f47e6541be0f6bc307d9ed72aa5c7a Binary files /dev/null and b/third_party/hta/libhta_dsp_skel.so differ diff --git a/third_party/hta/license.txt b/third_party/hta/license.txt new file mode 100644 index 0000000000000000000000000000000000000000..1fc186df55d1d4b6d43eaea9f7e77be6bc470459 --- /dev/null +++ b/third_party/hta/license.txt @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted (subject to the limitations in the + * disclaimer below) provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE + * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ diff --git a/tools/common.py b/tools/common.py index 450ca58b5fbca3953f4ce52f2a8698be9cbff18d..4ed60a3632f23bb07e5517174d25336498ef4a64 100644 --- a/tools/common.py +++ b/tools/common.py @@ -129,6 +129,7 @@ class DeviceType(object): CPU = 'CPU' GPU = 'GPU' HEXAGON = 'HEXAGON' + HTA = 'HTA' class DataFormat(object): @@ -199,6 +200,8 @@ def parse_device_type(runtime): if runtime == RuntimeType.dsp: device_type = DeviceType.HEXAGON + elif runtime == RuntimeType.hta: + device_type = DeviceType.HTA elif runtime == RuntimeType.gpu: device_type = DeviceType.GPU elif runtime == RuntimeType.cpu: @@ -513,6 +516,7 @@ class RuntimeType(object): cpu = 'cpu' gpu = 'gpu' dsp = 'dsp' + hta = 'hta' cpu_gpu = 'cpu+gpu' diff --git a/tools/converter.py b/tools/converter.py index 8bb9adad2d6f1a1db75c2a7c6fb4ee1470495d7b..0349dd397dbf1977b64e4310b04ea7c29db90c01 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -61,6 +61,7 @@ RuntimeTypeStrs = [ "cpu", "gpu", "dsp", + "hta", "cpu+gpu" ] @@ -142,6 +143,8 @@ def parse_device_type(runtime): if runtime == RuntimeType.dsp: device_type = DeviceType.HEXAGON + elif runtime == RuntimeType.hta: + device_type = DeviceType.HTA elif runtime == RuntimeType.gpu: device_type = DeviceType.GPU elif runtime == RuntimeType.cpu: @@ -163,6 +166,19 @@ def get_hexagon_mode(configs): return False +def get_hta_mode(configs): + runtime_list = [] + for model_name in configs[YAMLKeyword.models]: + model_runtime = \ + configs[YAMLKeyword.models][model_name].get( + YAMLKeyword.runtime, "") + runtime_list.append(model_runtime.lower()) + + if RuntimeType.hta in runtime_list: + return True + return False + + def get_opencl_mode(configs): runtime_list = [] for model_name in configs[YAMLKeyword.models]: @@ -452,6 +468,8 @@ def format_model_config(flags): DeviceType.GPU: ValidationThreshold.gpu_threshold, DeviceType.HEXAGON + "_QUANTIZE": ValidationThreshold.hexagon_threshold, + DeviceType.HTA + "_QUANTIZE": + ValidationThreshold.hexagon_threshold, DeviceType.CPU + "_QUANTIZE": ValidationThreshold.cpu_quantize_threshold, } @@ -461,6 +479,7 @@ def format_model_config(flags): if k.upper() not in (DeviceType.CPU, DeviceType.GPU, DeviceType.HEXAGON, + DeviceType.HTA, DeviceType.CPU + "_QUANTIZE"): raise argparse.ArgumentTypeError( 'Unsupported validation threshold runtime: %s' % k) @@ -740,7 +759,6 @@ def build_model_lib(configs, address_sanitizer): # create model library dir library_name = configs[YAMLKeyword.library_name] for target_abi in configs[YAMLKeyword.target_abis]: - hexagon_mode = get_hexagon_mode(configs) model_lib_output_path = get_model_lib_output_path(library_name, target_abi) library_out_dir = os.path.dirname(model_lib_output_path) @@ -751,7 +769,8 @@ def build_model_lib(configs, address_sanitizer): MODEL_LIB_TARGET, abi=target_abi, toolchain=toolchain, - hexagon_mode=hexagon_mode, + enable_hexagon=get_hexagon_mode(configs), + enable_hta=get_hta_mode(configs), enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), address_sanitizer=address_sanitizer, @@ -842,7 +861,6 @@ def report_run_statistics(stdout, def build_mace_run(configs, target_abi, toolchain, enable_openmp, address_sanitizer, mace_lib_type): library_name = configs[YAMLKeyword.library_name] - hexagon_mode = get_hexagon_mode(configs) build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) if os.path.exists(build_tmp_binary_dir): @@ -865,7 +883,8 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, mace_run_target, abi=target_abi, toolchain=toolchain, - hexagon_mode=hexagon_mode, + enable_hexagon=get_hexagon_mode(configs), + enable_hta=get_hta_mode(configs), enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), @@ -880,7 +899,6 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, def build_example(configs, target_abi, toolchain, enable_openmp, mace_lib_type, cl_binary_to_code, device): library_name = configs[YAMLKeyword.library_name] - hexagon_mode = get_hexagon_mode(configs) build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) if os.path.exists(build_tmp_binary_dir): @@ -914,7 +932,8 @@ def build_example(configs, target_abi, toolchain, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), - hexagon_mode=hexagon_mode, + enable_hexagon=get_hexagon_mode(configs), + enable_hta=get_hta_mode(configs), address_sanitizer=flags.address_sanitizer, symbol_hidden=symbol_hidden) @@ -945,7 +964,8 @@ def build_example(configs, target_abi, toolchain, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), - hexagon_mode=hexagon_mode, + enable_hexagon=get_hexagon_mode(configs), + enable_hta=get_hta_mode(configs), address_sanitizer=flags.address_sanitizer, extra_args=build_arg) @@ -1028,7 +1048,6 @@ def build_benchmark_model(configs, enable_openmp, mace_lib_type): library_name = configs[YAMLKeyword.library_name] - hexagon_mode = get_hexagon_mode(configs) link_dynamic = mace_lib_type == MACELibType.dynamic if link_dynamic: @@ -1051,7 +1070,8 @@ def build_benchmark_model(configs, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), - hexagon_mode=hexagon_mode, + enable_hexagon=get_hexagon_mode(configs), + enable_hta=get_hta_mode(configs), symbol_hidden=symbol_hidden, extra_args=build_arg) # clear tmp binary dir diff --git a/tools/sh_commands.py b/tools/sh_commands.py index ac00dc66f77a8a3996a9276160e6411b01d60aef..e24d3055a09a4c37e5626bb33d3bb1c56a59549d 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -263,7 +263,8 @@ def find_simpleperf_library(abi, simpleperf_path=''): def bazel_build(target, abi="armeabi-v7a", toolchain='android', - hexagon_mode=False, + enable_hexagon=False, + enable_hta=False, enable_openmp=True, enable_neon=True, enable_opencl=True, @@ -299,7 +300,9 @@ def bazel_build(target, "--define", "quantize=%s" % str(enable_quantize).lower(), "--define", - "hexagon=%s" % str(hexagon_mode).lower()) + "hexagon=%s" % str(enable_hexagon).lower(), + "--define", + "hta=%s" % str(enable_hta).lower()) if address_sanitizer: bazel_args += ("--config", "asan") else: