提交 c8d89a2b 编写于 作者: H hong19860320

make all ArmContext share the same DeviceInfo, and export SetRunMode to set thread num

test=develop
上级 ce6c24e6
......@@ -28,9 +28,10 @@ double time_diff(Time t1, Time t2) {
return counter.count() / 1000.0;
}
void Run(const char* model_dir, int repeat) {
void Run(const char* model_dir, int repeat, int thread_num) {
#ifdef LITE_WITH_ARM
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num);
#endif
lite::ExecutorLite predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
......@@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) {
} // namespace paddle
int main(int argc, char** argv) {
CHECK_EQ(argc, 3) << "usage: ./cmd <model_dir> <repeat>";
paddle::lite::Run(argv[1], std::stoi(argv[2]));
CHECK_EQ(argc, 4) << "usage: ./cmd <model_dir> <repeat> <thread_num>";
paddle::lite::Run(argv[1], std::stoi(argv[2]), std::stoi(argv[3]));
return 0;
}
......
......@@ -13,322 +13,7 @@
// limitations under the License.
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/cpu_info.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void Context<TargetType::kARM>::SetCache(int l1size, int l2size, int l3size) {
DeviceInfo& dev = DeviceInfo::Global();
int cpu_count = arm_get_cpucount();
dev.L1_cache_.resize(cpu_count);
dev.L2_cache_.resize(cpu_count);
dev.L3_cache_.resize(cpu_count);
for (int i = 0; i < cpu_count; ++i) {
dev.L1_cache_[i] = l1size;
dev.L2_cache_[i] = l2size;
dev.L3_cache_[i] = l3size;
}
workspace_.Resize({2 * (l1size + l2size)});
}
Context<TargetType::kARM>::Context() {
active_ids_ = {0};
mode_ = LITE_POWER_HIGH;
DeviceInfo& dev = DeviceInfo::Global();
workspace_.Resize(
{static_cast<int64_t>(dev.L2_cache_[active_ids_[0]] / sizeof(float))});
#ifdef TARGET_IOS
arch_ = APPLE; // use 6x8
#else
if (dev.big_core_ids_.size() > 0) {
arch_ = dev.archs_[dev.big_core_ids_[0]];
}
#endif
}
PowerMode Context<TargetType::kARM>::mode() const { return mode_; }
int Context<TargetType::kARM>::threads() const { return active_ids_.size(); }
Context<TargetType::kARM>::Context(const ARMContext& ctx) {
mode_ = ctx.mode_;
active_ids_ = ctx.active_ids_;
workspace_ = ctx.workspace_;
arch_ = ctx.arch_;
count_ = ctx.count_;
}
ARMContext& Context<TargetType::kARM>::operator=(const ARMContext& ctx) {
mode_ = ctx.mode_;
active_ids_ = ctx.active_ids_;
workspace_ = ctx.workspace_;
arch_ = ctx.arch_;
count_ = ctx.count_;
return *this;
}
void Context<TargetType::kARM>::BindDev() {
#ifdef ARM_WITH_OMP
int num_threads = active_ids_.size();
omp_set_num_threads(num_threads);
#ifdef LITE_WITH_LINUX
std::vector<int> ssarets;
for (int j = 0; j < num_threads; ++j) {
ssarets.push_back(0);
}
#pragma omp parallel for
for (int i = 0; i < num_threads; i++) {
ssarets[i] = set_sched_affinity(active_ids_);
}
for (int i = 0; i < num_threads; i++) {
if (ssarets[i] != 0) {
LOG(ERROR) << "set cpu affinity failed, cpuID: " << active_ids_[i];
return;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std::vector<int> cpuid1;
cpuid1.push_back(active_ids_[0]);
int ssaret = set_sched_affinity(cpuid1);
if (ssaret != 0) {
printf("set cpu affinity failed, cpuID: %d\n", active_ids_[0]);
return;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
DeviceInfo& dev = DeviceInfo::Global();
int big_core_size = dev.big_core_ids_.size();
int small_core_size = dev.little_core_ids_.size();
if (threads > big_core_size + small_core_size) {
threads = big_core_size + small_core_size;
}
#ifdef ARM_WITH_OMP
count_++;
int shift_num = (count_ / 10) % big_core_size;
switch (mode) {
case LITE_POWER_FULL:
mode_ = mode;
active_ids_.clear();
for (int i = 0; i < threads; ++i) {
if (i < big_core_size) {
active_ids_.push_back(dev.big_core_ids_[i]);
} else {
active_ids_.push_back(dev.little_core_ids_[i - big_core_size]);
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_HIGH:
active_ids_.clear();
if (big_core_size > 0) {
mode_ = LITE_POWER_HIGH;
if (threads > big_core_size) {
LOG(ERROR) << "threads: " << threads
<< ", exceed the big cores size: " << big_core_size;
active_ids_ = dev.big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(dev.big_core_ids_[i]);
}
}
} else {
mode_ = LITE_POWER_LOW;
LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores";
if (threads > small_core_size) {
active_ids_ = dev.little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(dev.little_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_LOW:
active_ids_.clear();
if (small_core_size > 0) {
mode_ = LITE_POWER_LOW;
if (threads > small_core_size) {
LOG(WARNING) << "threads: " << threads
<< ", exceed the little cores size: " << small_core_size;
active_ids_ = dev.little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(dev.little_core_ids_[i]);
}
}
} else {
mode_ = LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores";
if (threads > big_core_size) {
active_ids_ = dev.big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(dev.big_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_NO_BIND:
mode_ = LITE_POWER_NO_BIND;
active_ids_.clear();
if (threads > dev.core_ids_.size()) {
active_ids_.resize(dev.core_ids_.size());
} else {
active_ids_.resize(threads);
}
break;
case LITE_POWER_RAND_HIGH:
active_ids_.clear();
if (big_core_size > 0) {
mode_ = LITE_POWER_RAND_HIGH;
if (threads > big_core_size) {
LOG(WARNING) << "threads: " << threads
<< ", exceed the big cores size: " << big_core_size;
active_ids_ = dev.big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(
dev.big_core_ids_[(i + shift_num) % big_core_size]);
}
}
} else {
mode_ = LITE_POWER_LOW;
LOG(WARNING)
<< "HIGH POWER MODE is not support, switch to little cores";
if (threads > small_core_size) {
active_ids_ = dev.little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(dev.little_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_RAND_LOW:
active_ids_.clear();
if (small_core_size > 0) {
mode_ = LITE_POWER_RAND_LOW;
if (threads > small_core_size) {
LOG(WARNING) << "threads: " << threads
<< ", exceed the little cores size: " << small_core_size;
active_ids_ = dev.little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(
dev.little_core_ids_[(i + shift_num) % small_core_size]);
}
}
} else {
mode_ = LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores";
if (threads > big_core_size) {
active_ids_ = dev.big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(dev.big_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
}
//! fix multi-threads LITE_POWER_HIGH mode
if (mode_ == LITE_POWER_NO_BIND || threads > 1) {
int threads = active_ids_.size();
omp_set_num_threads(threads);
} else {
if (check_online(active_ids_)) {
BindDev();
} else {
LOG(ERROR) << "core id " << active_ids_[0]
<< " is offline, switch to NO BIND MODE";
int threads = active_ids_.size();
omp_set_num_threads(threads);
}
}
#else
if (big_core_size > 0) {
active_ids_ = {dev.big_core_ids_[0]};
} else {
active_ids_ = {0};
}
#endif
//! alloc memory for sgemm in this context
int temp_mem_size =
DeviceInfo::Global().L2_cache_[active_ids_[0]] / sizeof(float);
workspace_.Resize({temp_mem_size});
arch_ = DeviceInfo::Global().archs_[active_ids_[0]];
}
ARMArch Context<TargetType::kARM>::arch() const { return arch_; }
void Context<TargetType::kARM>::SetArch(ARMArch arch) { arch_ = arch; }
int Context<TargetType::kARM>::l1_cache_size() const {
DeviceInfo& dev = DeviceInfo::Global();
return dev.L1_cache_[active_ids_[0]];
}
int Context<TargetType::kARM>::l2_cache_size() const {
DeviceInfo& dev = DeviceInfo::Global();
return dev.L2_cache_[active_ids_[0]];
}
int Context<TargetType::kARM>::l3_cache_size() const {
DeviceInfo& dev = DeviceInfo::Global();
return dev.L3_cache_[active_ids_[0]];
}
bool Context<TargetType::kARM>::ExtendWorkspace(DDimLite dims) {
auto count = dims.product();
auto old = workspace_.dims();
if (count == old.product()) {
return false;
}
workspace_.Resize(
{static_cast<int64_t>(count + l2_cache_size() / sizeof(float))});
return true;
}
#endif // LITE_WITH_ARM
} // namespace lite
namespace lite {} // namespace lite
} // namespace paddle
......@@ -61,47 +61,42 @@ class Context<TargetType::kHost> {
template <>
class Context<TargetType::kARM> {
public:
Context();
Context(PowerMode mode, int threads);
Context() {}
explicit Context(const ARMContext& ctx);
ARMContext& operator=(const ARMContext& ctx);
ARMContext& operator=(const ARMContext& ctx) {}
// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() { DeviceInfo::Init(); }
void InitOnce() {}
void CopyShared(const ARMContext* ctx) {}
void SetRunMode(PowerMode mode, int threads);
void SetCache(int l1size, int l2size, int l3size);
void SetArch(ARMArch arch);
void BindDev();
void SetRunMode(PowerMode mode, int threads) {
return DeviceInfo::Global().SetRunMode(mode, threads);
}
void SetCache(int l1size, int l2size, int l3size) {
return DeviceInfo::Global().SetCache(l1size, l2size, l3size);
}
void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); }
void BindDev() { return DeviceInfo::Global().BindDev(); }
PowerMode mode() const;
int threads() const;
ARMArch arch() const;
PowerMode mode() const { return DeviceInfo::Global().mode(); }
int threads() const { return DeviceInfo::Global().threads(); }
ARMArch arch() const { return DeviceInfo::Global().arch(); }
template <typename T>
T* workspace_data() {
return workspace_.mutable_data<T>();
return DeviceInfo::Global().workspace_data<T>();
}
int l1_cache_size() const;
int l2_cache_size() const;
int l3_cache_size() const;
bool ExtendWorkspace(DDimLite dims);
int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); }
int l2_cache_size() const { return DeviceInfo::Global().l2_cache_size(); }
int l3_cache_size() const { return DeviceInfo::Global().l3_cache_size(); }
bool ExtendWorkspace(DDimLite dims) {
return DeviceInfo::Global().ExtendWorkspace(dims);
}
std::string name() const { return "ARMContext"; }
private:
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
ARMArch arch_;
PowerMode mode_;
std::vector<int> active_ids_;
TensorLite workspace_;
int64_t count_{0};
};
#endif
......
......@@ -12,8 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
#include "paddle/fluid/lite/core/cpu_info.h"
#include <cstdarg>
namespace paddle {
namespace lite {
......@@ -73,6 +89,252 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) {
#elif defined(TARGET_IOS)
arm_get_cpu_arch(&dev->archs_);
#endif
dev->active_ids_ = {0};
dev->mode_ = LITE_POWER_HIGH;
dev->workspace_.Resize({static_cast<int64_t>(
dev->L2_cache_[dev->active_ids_[0]] / sizeof(float))});
#ifdef TARGET_IOS
dev->arch_ = APPLE; // use 6x8
#else
if (dev->big_core_ids_.size() > 0) {
dev->arch_ = dev->archs_[dev->big_core_ids_[0]];
}
#endif
}
void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
int cpu_count = arm_get_cpucount();
L1_cache_.resize(cpu_count);
L2_cache_.resize(cpu_count);
L3_cache_.resize(cpu_count);
for (int i = 0; i < cpu_count; ++i) {
L1_cache_[i] = l1size;
L2_cache_[i] = l2size;
L3_cache_[i] = l3size;
}
workspace_.Resize({2 * (l1size + l2size)});
}
void DeviceInfo::BindDev() {
#ifdef ARM_WITH_OMP
int num_threads = active_ids_.size();
omp_set_num_threads(num_threads);
#ifdef LITE_WITH_LINUX
std::vector<int> ssarets;
for (int j = 0; j < num_threads; ++j) {
ssarets.push_back(0);
}
#pragma omp parallel for
for (int i = 0; i < num_threads; i++) {
ssarets[i] = set_sched_affinity(active_ids_);
}
for (int i = 0; i < num_threads; i++) {
if (ssarets[i] != 0) {
LOG(ERROR) << "set cpu affinity failed, cpuID: " << active_ids_[i];
return;
}
}
#endif // LITE_WITH_LINUX
#else // ARM_WITH_OMP
#ifdef LITE_WITH_LINUX
std::vector<int> cpuid1;
cpuid1.push_back(active_ids_[0]);
int ssaret = set_sched_affinity(cpuid1);
if (ssaret != 0) {
printf("set cpu affinity failed, cpuID: %d\n", active_ids_[0]);
return;
}
#endif // LITE_WITH_LINUX
#endif // ARM_WITH_OMP
}
void DeviceInfo::SetRunMode(PowerMode mode, int threads) {
LOG(INFO) << "ARM SetRunMode called";
int big_core_size = big_core_ids_.size();
int small_core_size = little_core_ids_.size();
if (threads > big_core_size + small_core_size) {
threads = big_core_size + small_core_size;
}
#ifdef ARM_WITH_OMP
count_++;
int shift_num = (count_ / 10) % big_core_size;
switch (mode) {
case LITE_POWER_FULL:
mode_ = mode;
active_ids_.clear();
for (int i = 0; i < threads; ++i) {
if (i < big_core_size) {
active_ids_.push_back(big_core_ids_[i]);
} else {
active_ids_.push_back(little_core_ids_[i - big_core_size]);
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_HIGH:
active_ids_.clear();
if (big_core_size > 0) {
mode_ = LITE_POWER_HIGH;
if (threads > big_core_size) {
LOG(ERROR) << "threads: " << threads
<< ", exceed the big cores size: " << big_core_size;
active_ids_ = big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(big_core_ids_[i]);
}
}
} else {
mode_ = LITE_POWER_LOW;
LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores.";
if (threads > small_core_size) {
active_ids_ = little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(little_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_LOW:
active_ids_.clear();
if (small_core_size > 0) {
mode_ = LITE_POWER_LOW;
if (threads > small_core_size) {
LOG(WARNING) << "threads: " << threads
<< ", exceed the little cores size: " << small_core_size;
active_ids_ = little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(little_core_ids_[i]);
}
}
} else {
mode_ = LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores";
if (threads > big_core_size) {
active_ids_ = big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(big_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_NO_BIND:
mode_ = LITE_POWER_NO_BIND;
active_ids_.clear();
if (threads > core_ids_.size()) {
active_ids_.resize(core_ids_.size());
} else {
active_ids_.resize(threads);
}
break;
case LITE_POWER_RAND_HIGH:
active_ids_.clear();
if (big_core_size > 0) {
mode_ = LITE_POWER_RAND_HIGH;
if (threads > big_core_size) {
LOG(WARNING) << "threads: " << threads
<< ", exceed the big cores size: " << big_core_size;
active_ids_ = big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(
big_core_ids_[(i + shift_num) % big_core_size]);
}
}
} else {
mode_ = LITE_POWER_LOW;
LOG(WARNING)
<< "HIGH POWER MODE is not support, switch to little cores.";
if (threads > small_core_size) {
active_ids_ = little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(little_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
case LITE_POWER_RAND_LOW:
active_ids_.clear();
if (small_core_size > 0) {
mode_ = LITE_POWER_RAND_LOW;
if (threads > small_core_size) {
LOG(WARNING) << "threads: " << threads
<< ", exceed the little cores size: " << small_core_size;
active_ids_ = little_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(
little_core_ids_[(i + shift_num) % small_core_size]);
}
}
} else {
mode_ = LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores.";
if (threads > big_core_size) {
active_ids_ = big_core_ids_;
} else {
for (int i = 0; i < threads; ++i) {
active_ids_.push_back(big_core_ids_[i]);
}
}
}
if (active_ids_.size() == 0) {
active_ids_.push_back(0);
}
break;
}
//! fix multi-threads LITE_POWER_HIGH mode
if (mode_ == LITE_POWER_NO_BIND || threads > 1) {
int threads = active_ids_.size();
omp_set_num_threads(threads);
} else {
if (check_online(active_ids_)) {
BindDev();
} else {
LOG(WARNING) << "core id " << active_ids_[0]
<< " is offline, switch to NO BIND MODE";
int threads = active_ids_.size();
omp_set_num_threads(threads);
}
}
#else
if (big_core_size > 0) {
active_ids_ = {big_core_ids_[0]};
} else {
active_ids_ = {0};
}
#endif
//! alloc memory for sgemm in this context
int temp_mem_size = L2_cache_[active_ids_[0]] / sizeof(float);
workspace_.Resize({temp_mem_size});
arch_ = archs_[active_ids_[0]];
}
bool DeviceInfo::ExtendWorkspace(DDimLite dims) {
auto count = dims.product();
auto old = workspace_.dims();
if (count == old.product()) {
return false;
}
workspace_.Resize({static_cast<int64_t>(
count + L2_cache_[active_ids_[0]] / sizeof(float))});
return true;
}
// cache_id : 0 -> L1, 1 -> L2, 2 -> L3
......
......@@ -16,22 +16,9 @@
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/utils/cp_logging.h"
#ifdef LITE_WITH_LINUX
#include <sys/syscall.h>
#include <unistd.h>
#endif
#if __APPLE__
#include "TargetConditionals.h"
#if TARGET_OS_IPHONE
#include <mach/machine.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
namespace paddle {
namespace lite {
......@@ -80,6 +67,15 @@ class DeviceInfo {
std::vector<int> cluster_ids_;
std::vector<ARMArch> archs_;
ARMArch arch_;
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode mode_;
std::vector<int> active_ids_;
TensorLite workspace_;
int64_t count_{0};
static DeviceInfo& Global() {
static auto* x = new DeviceInfo;
return *x;
......@@ -90,6 +86,25 @@ class DeviceInfo {
InitInternal(&info);
}
void SetRunMode(PowerMode mode, int threads);
void SetCache(int l1size, int l2size, int l3size);
void SetArch(ARMArch arch) { arch_ = arch; }
void BindDev();
PowerMode mode() const { return mode_; }
int threads() const { return active_ids_.size(); }
ARMArch arch() const { return arch_; }
template <typename T>
T* workspace_data() {
return workspace_.mutable_data<T>();
}
int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
int l2_cache_size() const { return L2_cache_[active_ids_[0]]; }
int l3_cache_size() const { return L3_cache_[active_ids_[0]]; }
bool ExtendWorkspace(DDimLite dims);
private:
DeviceInfo() = default;
static void InitInternal(DeviceInfo* dev);
......
......@@ -28,8 +28,6 @@ void ConvCompute::PrepareForRun() {
auto o_dims = param.output->dims();
auto& ctx = this->ctx_->template As<ARMContext>();
// TODO(xxx): make api and expose it
ctx.SetRunMode(LITE_POWER_HIGH, 4);
int win = x_dims[3]; // nchw
int hin = x_dims[2];
......
......@@ -28,7 +28,6 @@ void FcCompute::PrepareForRun() {
auto w_dims = param.w->dims();
auto& ctx = this->ctx_->template As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 4);
CHECK_GE(x_dims.size(), 2UL);
CHECK_EQ(w_dims.size(), 2UL);
......
......@@ -24,7 +24,6 @@ namespace arm {
void MulCompute::PrepareForRun() {
auto& ctx = this->ctx_->template As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 4);
}
void MulCompute::Run() {
......
......@@ -26,7 +26,6 @@ namespace arm {
void PoolCompute::PrepareForRun() {
auto& ctx = this->ctx_->template As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 4);
}
void PoolCompute::Run() {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册