提交 5a0a017c 编写于 作者: 卢旭辉

Merge branch 'power' into 'master'

Support Hexagon power control

See merge request !1218
......@@ -90,6 +90,18 @@ enum CPUAffinityPolicy {
AFFINITY_POWER_SAVE = 4,
};
// Voltage corners for clock frequencies, please refer to
// docs/Hap_power_set_dcvs_2.html in Hexagon SDK for more detailed information.
enum HexagonNNCornerType {
HEXAGON_NN_CORNER_RELEASE,
HEXAGON_NN_CORNER_TURBO,
HEXAGON_NN_CORNER_NOMPLUS,
HEXAGON_NN_CORNER_NOMINAL,
HEXAGON_NN_CORNER_SVSPLUS,
HEXAGON_NN_CORNER_SVS,
HEXAGON_NN_CORNER_SVS2,
};
struct CallStats {
int64_t start_micros;
int64_t end_micros;
......@@ -281,7 +293,7 @@ class MACE_API MaceEngineConfig {
///
/// Just use one GPUContext for multiple models run on GPU.
/// \param context created use GPUContextBuilder
/// \return MaceStatus::MACE_SUCCESS for success, other for failed.
/// \return MaceStatus::MACE_SUCCESS for success, other for failure.
MaceStatus SetGPUContext(std::shared_ptr<GPUContext> context);
/// \brief Set GPU hints, currently only supports Adreno GPU.
......@@ -291,7 +303,7 @@ class MACE_API MaceEngineConfig {
///
/// \param perf_hint performance hint
/// \param priority_hint priority hint
/// \return MaceStatus::MACE_SUCCESS for success, other for failed.
/// \return MaceStatus::MACE_SUCCESS for success, other for failure.
MaceStatus SetGPUHints(GPUPerfHint perf_hint,
GPUPriorityHint priority_hint);
......@@ -312,10 +324,27 @@ class MACE_API MaceEngineConfig {
/// \param status MACE_SUCCESS for successful, or it can't reliabley
/// detect big-LITTLE cores (see GetBigLittleCoreIDs). In such cases, it's
/// suggested to use AFFINITY_NONE to use all cores.
/// \return MaceStatus::MACE_SUCCESS for success, other for failed.
/// \return MaceStatus::MACE_SUCCESS for success, other for failure.
MaceStatus SetCPUThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy);
/// \brief Set Hexagon DSP power parameters
///
/// Caution: this function may hurt performance if improper
/// parameters provided. For most performance critical applications, set
/// HexagonNNCornerType to HEXAGON_NN_CORNER_TURBO, enable dynamic clock
/// voltage scaling(DCVS) and set sleep latency to 100us works just fine.
/// If a more balanced scheme between performance and power consumption
/// is needed, these three parameters may be tweaked to achieve that.
/// \param corner DCVS voltage target corner, can be set even when DCVS
/// is disabled.
/// \param dcvs_enable enable or disable DCVS.
/// \param latency sleep latency, in micro seconds.
/// \return MaceStatus::MACE_SUCCESS for success, other for failure.
MaceStatus SetHexagonPower(HexagonNNCornerType corner,
bool dcvs_enable,
int latency);
private:
class Impl;
std::unique_ptr<Impl> impl_;
......
......@@ -37,7 +37,7 @@ endif(MACE_ENABLE_HEXAGON_DSP)
if(MACE_ENABLE_HEXAGON_HTA)
set(CORE_SRCS ${CORE_SRCS} runtime/hexagon/hexagon_hta_wrapper.cc)
set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} hta_controller hta_hexagon_runtime npu)
set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} hta_hexagon_runtime)
endif(MACE_ENABLE_HEXAGON_HTA)
if(MACE_ENABLE_MTK_APU)
......
......@@ -87,17 +87,53 @@ std::string FloatToString(const FloatType v, const int32_t precision) {
stream << std::fixed << std::setprecision(precision) << v;
return stream.str();
}
hexagon_nn_corner_type TransformCornerType(HexagonNNCornerType corner) {
switch (corner) {
case HEXAGON_NN_CORNER_RELEASE: return NN_CORNER_RELEASE;
case HEXAGON_NN_CORNER_TURBO: return NN_CORNER_TURBO;
case HEXAGON_NN_CORNER_NOMPLUS: return NN_CORNER_NOMPLUS;
case HEXAGON_NN_CORNER_NOMINAL: return NN_CORNER_NOMINAL;
case HEXAGON_NN_CORNER_SVSPLUS: return NN_CORNER_SVSPLUS;
case HEXAGON_NN_CORNER_SVS: return NN_CORNER_SVS;
case HEXAGON_NN_CORNER_SVS2: return NN_CORNER_SVS2;
default:
LOG(FATAL) << "Wrong Hexagon NN corner type: " << corner;
return NN_CORNER_TURBO;
}
}
} // namespace
HexagonDSPWrapper::HexagonDSPWrapper() {
std::string env_log_execute_time_str;
GetEnv("MACE_DSP_LOG_EXECUTE_TIME", &env_log_execute_time_str);
if (env_log_execute_time_str.empty()) {
log_execute_time_ = false;
} else {
log_execute_time_ = static_cast<bool>(std::stoi(env_log_execute_time_str));
}
}
int HexagonDSPWrapper::GetVersion() {
int version;
MACE_CHECK(hexagon_nn_version(&version) == 0, "get version error");
return version;
}
bool HexagonDSPWrapper::SetPower(HexagonNNCornerType corner,
bool dcvs_enable,
int latency) {
int ret = hexagon_nn_set_clocks(TransformCornerType(corner),
dcvs_enable ? NN_DCVS_ENABLE
: NN_DCVS_DISABLE,
static_cast<uint32_t>(std::max(0, latency)));
return ret == 0;
}
bool HexagonDSPWrapper::Config() {
LOG(INFO) << "Hexagon config";
MACE_CHECK(hexagon_nn_set_powersave_level(0) == 0, "hexagon power error");
MACE_CHECK(hexagon_nn_config() == 0, "hexagon config error");
return true;
}
......@@ -111,7 +147,7 @@ bool HexagonDSPWrapper::Init() {
bool HexagonDSPWrapper::Finalize() {
LOG(INFO) << "Hexagon finalize";
return hexagon_nn_set_powersave_level(1) == 0;
return hexagon_nn_remove_clocks() == 0;
}
bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def,
......@@ -432,6 +468,11 @@ bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor,
}
MACE_CHECK(output_bytes == output_tensor->raw_size(),
"wrong output bytes inferred.");
if (log_execute_time_) {
LOG(INFO) << "dsp cycles: " << GetLastExecuteCycles();
}
return true;
}
......@@ -439,8 +480,8 @@ bool HexagonDSPWrapper::ExecuteGraphNew(
const std::map<std::string, Tensor*> &input_tensors,
std::map<std::string, Tensor*> *output_tensors) {
VLOG(2) << "Execute graph new: " << nn_id_;
uint32_t num_inputs = static_cast<uint32_t>(input_tensors.size());
uint32_t num_outputs = static_cast<uint32_t>(output_tensors->size());
auto num_inputs = static_cast<uint32_t>(input_tensors.size());
auto num_outputs = static_cast<uint32_t>(output_tensors->size());
MACE_CHECK(num_inputs_ == static_cast<int>(num_inputs), "Wrong inputs num");
MACE_CHECK(num_outputs_ == static_cast<int>(num_outputs),
"Wrong outputs num");
......@@ -519,7 +560,18 @@ bool HexagonDSPWrapper::ExecuteGraphNew(
" wrong output bytes inferred.");
}
if (log_execute_time_) {
LOG(INFO) << "dsp cycles: " << GetLastExecuteCycles();
}
return true;
}
uint64_t HexagonDSPWrapper::GetLastExecuteCycles() {
uint32_t cycle_lo;
uint32_t cycle_hi;
hexagon_nn_last_execution_cycles(nn_id_, &cycle_lo, &cycle_hi);
return (static_cast<uint64_t>(cycle_hi) << 32) | cycle_lo;
}
} // namespace mace
......@@ -27,7 +27,7 @@ namespace mace {
class HexagonDSPWrapper : public HexagonControlWrapper {
public:
HexagonDSPWrapper() = default;
HexagonDSPWrapper();
int GetVersion() override;
bool Config() override;
......@@ -46,6 +46,15 @@ class HexagonDSPWrapper : public HexagonControlWrapper {
void ResetPerfInfo() override;
void SetDebugLevel(int level) override;
static bool SetPower(HexagonNNCornerType corner,
bool dcvs_enable,
int latency);
private:
uint64_t GetLastExecuteCycles();
bool log_execute_time_;
MACE_DISABLE_COPY_AND_ASSIGN(HexagonDSPWrapper);
};
} // namespace mace
......
......@@ -34,7 +34,10 @@
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
#if defined(MACE_ENABLE_HEXAGON)
#include "mace/core/runtime/hexagon/hexagon_device.h"
#include "mace/core/runtime/hexagon/hexagon_dsp_wrapper.h"
#elif defined(MACE_ENABLE_HTA)
#include "mace/core/runtime/hexagon/hexagon_device.h"
#endif
......@@ -189,6 +192,10 @@ class MaceEngineConfig::Impl {
MaceStatus SetCPUThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy);
MaceStatus SetHexagonPower(HexagonNNCornerType corner,
bool dcvs_enable,
int latency);
inline DeviceType device_type() const {
return device_type_;
}
......@@ -228,7 +235,13 @@ MaceEngineConfig::Impl::Impl(const DeviceType device_type)
cpu_affinity_policy_(CPUAffinityPolicy::AFFINITY_NONE),
gpu_context_(nullptr),
gpu_priority_hint_(GPUPriorityHint::PRIORITY_LOW),
gpu_perf_hint_(GPUPerfHint::PERF_NORMAL) {}
gpu_perf_hint_(GPUPerfHint::PERF_NORMAL) {
#ifdef MACE_ENABLE_HEXAGON
if (!HexagonDSPWrapper::SetPower(HEXAGON_NN_CORNER_TURBO, true, 100)) {
LOG(WARNING) << "Hexagon set default clocks failed!";
}
#endif
}
MaceStatus MaceEngineConfig::Impl::SetGPUContext(
std::shared_ptr<GPUContext> context) {
......@@ -252,6 +265,20 @@ MaceStatus MaceEngineConfig::Impl::SetCPUThreadPolicy(
return MaceStatus::MACE_SUCCESS;
}
MaceStatus MaceEngineConfig::Impl::SetHexagonPower(
HexagonNNCornerType corner,
bool dcvs_enable,
int latency) {
MACE_UNUSED(corner);
MACE_UNUSED(dcvs_enable);
MACE_UNUSED(latency);
bool ret = false;
#ifdef MACE_ENABLE_HEXAGON
ret = HexagonDSPWrapper::SetPower(corner, dcvs_enable, latency);
#endif
return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
}
MaceEngineConfig::MaceEngineConfig(
const DeviceType device_type)
: impl_(new MaceEngineConfig::Impl(device_type)) {}
......@@ -275,6 +302,13 @@ MaceStatus MaceEngineConfig::SetCPUThreadPolicy(
return impl_->SetCPUThreadPolicy(num_threads_hint, policy);
}
MaceStatus MaceEngineConfig::SetHexagonPower(
HexagonNNCornerType corner,
bool dcvs_enable,
int latency) {
return impl_->SetHexagonPower(corner, dcvs_enable, latency);
}
// Mace Tensor
class MaceTensor::Impl {
public:
......
......@@ -19,7 +19,11 @@ cc_binary(
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]),
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkstatic = 1,
deps = [
"//external:gflags_nothreads",
......@@ -39,7 +43,11 @@ cc_binary(
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]),
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = [
"-lm",
] + if_android([
......
......@@ -196,7 +196,9 @@ bool RunModel(const std::string &model_name,
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_HEXAGON
config.SetHexagonPower(HEXAGON_NN_CORNER_TURBO, true, 100);
#endif
std::unique_ptr<mace::port::ReadOnlyMemoryRegion> model_graph_data =
make_unique<mace::port::ReadOnlyBufferMemoryRegion>();
if (FLAGS_model_file != "") {
......
......@@ -165,6 +165,8 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(hexagon_nn_nn_id id,
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)(unsigned int level) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_details)(hexagon_nn_corner_type corner, hexagon_nn_dcvs_type dcvs, unsigned int latency) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_clocks)(hexagon_nn_corner_type corner, hexagon_nn_dcvs_type dcvs, unsigned int latency) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_remove_clocks)() __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)(hexagon_nn_nn_id id, hexagon_nn_perfinfo* info_out, int info_outLen, unsigned int* n_items) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)(hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)(hexagon_nn_nn_id id, unsigned int* cycles_lo, unsigned int* cycles_hi) __QAIC_HEADER_ATTRIBUTE;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册