/*! * Copyright (c) 2016 by Contributors * \file acl_operator.cc * \brief * \author Joey */ #if USE_ACL == 1 #include "acl_operator.h" unsigned int bypass_acl_class_layer = (0 | FLAGS_ENABLE_ACL_CONCAT | /*0xffffffff |*/ /*FLAGS_ENABLE_ACL_FC |*/ /*FLAGS_ENABLE_ACL_LRN |*/ 0); int enable_schedule = 0; #ifdef USE_PROFILING #include "arm_neon.h" unsigned int acl_log_flags = (0 | MASK_LOG_APP_TIME | /*MASK_LOG_ALLOCATE | */ /*MASK_LOG_ALLOCATE | */ /*MASK_LOG_RUN | */ /*MASK_LOG_CONFIG | */ /*MASK_LOG_COPY | */ MASK_LOG_ABSVAL | MASK_LOG_BNLL | MASK_LOG_CONV | MASK_LOG_FC | MASK_LOG_LRN | MASK_LOG_POOLING | MASK_LOG_RELU | MASK_LOG_SIGMOID | MASK_LOG_SOFTMAX | MASK_LOG_TANH | MASK_LOG_LC | MASK_LOG_BN | MASK_LOG_CONCAT | 0); #include /* printf */ #include /* getenv */ #endif // USE_PROFILING static bool force_enable_gpu = false; bool AclEnableSchedule(int enable) { enable_schedule = enable; if (enable) { force_enable_gpu = true; } return true; } int isScheduleEnable() { return enable_schedule; } namespace paddle_mobile { namespace operators { namespace acl { bool ACLOperator::init_gpu_env = true; #ifdef USE_OPENCL bool ACLOperator::support_opencl_ = false; bool opencl_is_available() { return arm_compute::opencl_is_available(); } #elif defined(USE_OPENGLES) bool ACLOperator::support_opengles_ = false; #endif ACLOperator::ACLOperator(bool is_gpu) : operator_state_(operator_not_init), force_bypass_acl_path_(false), target_hint_(TargetHint::DONT_CARE), convolution_method_hint_(ConvolutionMethodHint::GEMM), _group(1), name_(""), input_idx_(0), output_idx_(0), is_gpu_(is_gpu) { const char* pBypassACL; if (init_gpu_env) { #ifdef USE_OPENCL try { if (opencl_is_available()) { arm_compute::CLScheduler::get().default_init(); support_opencl_ = true; } } catch (std::exception& e) { support_opencl_ = false; } #elif defined(USE_OPENGLES) try { arm_compute::GCScheduler::get().default_init(); support_opengles_ = true; } catch (std::exception& e) { support_opengles_ = false; } #endif init_gpu_env = false; } if (force_enable_gpu) is_gpu_ = true; pBypassACL = getenv("BYPASSACL"); if (pBypassACL) { unsigned int bacl; sscanf(pBypassACL, "%i", &bacl); if (bacl != bypass_acl_class_layer) { bypass_acl_class_layer = bacl; printf("BYPASSACL<%s>\n", pBypassACL); printf("BYPASSACL: %x\n", bypass_acl_class_layer); } } #ifdef USE_PROFILING const char* pLogACL; pLogACL = getenv("LOGACL"); if (pLogACL) { unsigned int alf; sscanf(pLogACL, "%i", &alf); if (alf != acl_log_flags) { acl_log_flags = alf; printf("LOGACL<%s>\n", pLogACL); printf("LOGACL: %x\n", acl_log_flags); } } #endif // USE_PROFILING const char* pEnableSchedule; pEnableSchedule = getenv("ENABLESCHEDULE"); if (pEnableSchedule) { int bshedule; sscanf(pEnableSchedule, "%i", &bshedule); if (bshedule != enable_schedule) { enable_schedule = bshedule; printf("ENABLESCHEDULE<%s>\n", pEnableSchedule); printf("ENABLESCHEDULE: %x\n", enable_schedule); } if (enable_schedule) { AclEnableSchedule(1); } } } ACLOperator::~ACLOperator() {} bool ACLOperator::new_tensor(std::unique_ptr& tensor, arm_compute::TensorShape& shape, void* mem, bool commit) { auto acl_tensor = new ACLTensor(arm_compute::TensorInfo(shape, arm_compute::Format::F32)); acl_tensor->set_target(getTargetHint()); acl_tensor->bindmem(mem); if (commit) acl_tensor->commit(); tensor = (std::unique_ptr)std::move(acl_tensor); return true; } bool ACLOperator::new_tensor(std::unique_ptr& tensor, std::unique_ptr& parent, arm_compute::TensorShape& shape, arm_compute::Coordinates& coord) { auto acl_tensor = new ACLSubTensor(parent, shape, coord); acl_tensor->set_target(getTargetHint()); tensor = (std::unique_ptr)std::move(acl_tensor); return true; } void ACLTensor::commit(TensorType type) { settensortype(type); if (mem_) { if (!allocate_) { #ifdef USE_PROFILING logtime_util log_time(ACL_ALLOCATE_INFO); #endif // USE_PROFILING allocate(); allocate_ = true; } if (type_ != tensor_output) { tensor_copy(mem_); } mem_ = nullptr; } } int BaseACLTensor::tensor_copy(arm_compute::ITensor* tensor, void* mem, bool toTensor) { #ifdef USE_PROFILING logtime_util log_time(ACL_COPY_INFO); #endif // USE_PROFILING arm_compute::Window window; // Iterate through the rows (not each element) window.use_tensor_dimensions(tensor->info()->tensor_shape(), /* first_dimension =*/arm_compute::Window::DimY); int width = tensor->info()->tensor_shape()[0]; int height = tensor->info()->tensor_shape()[1]; int deepth = tensor->info()->tensor_shape()[2]; map(); // Create an iterator: arm_compute::Iterator it(tensor, window); // Except it works for an arbitrary number of dimensions if (toTensor) { // mem->tensor arm_compute::execute_window_loop( window, [&](const arm_compute::Coordinates& id) { memcpy(it.ptr(), ((char*)mem) + ((id[3] * (width * height * deepth) + id.z() * (width * height) + id.y() * width + id.x()) * tensor->info()->element_size()), width * tensor->info()->element_size()); }, it); } else { // tensor-->mem arm_compute::execute_window_loop( window, [&](const arm_compute::Coordinates& id) { memcpy(((char*)mem) + ((id[3] * (width * height * deepth) + id.z() * (width * height) + id.y() * width) * tensor->info()->element_size()), it.ptr(), width * tensor->info()->element_size()); }, it); } unmap(); return 0; } } // namespace acl } // namespace operators } // namespace paddle_mobile #endif