diff --git a/mace/core/quantize.cc b/mace/core/quantize.cc index 167c6da356cb975eaed53ce87343fdd3185ce854..ec4c65ac0c9a63a416fe04a549b442bbb026fd68 100644 --- a/mace/core/quantize.cc +++ b/mace/core/quantize.cc @@ -77,7 +77,7 @@ void QuantizeUtil::Dequantize(const uint8_t *input, thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) { for (index_t i = start; i < end; i += step) { uint8x16_t vi = vld1q_u8(input + i * 16); - float32x4x4_t vo = { + float32x4x4_t vo = {{ vmulq_f32(vscale, vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16( vget_low_u16(vmovl_u8(vget_low_u8(vi))))), vzero))), @@ -90,7 +90,7 @@ void QuantizeUtil::Dequantize(const uint8_t *input, vmulq_f32(vscale, vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16( vget_high_u16(vmovl_u8(vget_high_u8(vi))))), vzero))), - }; + }}; vst1q_f32(output + i * 16, vo.val[0]); vst1q_f32(output + i * 16 + 4, vo.val[1]); vst1q_f32(output + i * 16 + 8, vo.val[2]); diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index e63b252b50be2f31a498d4422688f2f9472252c6..0a42e2957e543f8d604db808a08e2e0dbf29bb64 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -93,6 +93,12 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, << image_shape[0] << ", " << image_shape[1] << "] failed because of " << OpenCLErrorToString(error); + // Many users have doubts at CL_INVALID_IMAGE_SIZE, add some tips. + if (error == CL_INVALID_IMAGE_SIZE) { + auto max_2d_size = opencl_runtime_->GetMaxImage2DSize(); + LOG(WARNING) << "The allowable OpenCL image size is: " + << max_2d_size[0] << "x" << max_2d_size[1]; + } delete cl_image; *result = nullptr; return MaceStatus::MACE_OUT_OF_RESOURCES; diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index bcf264983fad70e8efae4eb5c7fb5a8e692d03d3..e26c604869535ce856c78ea27b1463e36e20b28d 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -713,7 +713,7 @@ std::vector OpenCLRuntime::GetMaxImage2DSize() { LOG(ERROR) << "error: " << OpenCLErrorToString(err); return {}; } - return {max_height, max_width}; + return {max_width, max_height}; } uint64_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) { diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index c5e16b762a57e6eddcebc269d7f369ffabac28dd..fe6ea48818611aa8bfc1de1ae9f8063e2ac26944 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -391,12 +391,12 @@ class MaceEngine::Impl { std::unique_ptr ws_; std::unique_ptr net_; bool is_quantized_model_; -#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) - std::unique_ptr hexagon_controller_; -#endif std::map input_info_map_; std::map output_info_map_; std::unique_ptr thread_pool_; +#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) + std::unique_ptr hexagon_controller_; +#endif MACE_DISABLE_COPY_AND_ASSIGN(Impl); }; diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index ea6458d475751a064cacb118cef64ef498a29e48..1f0fa7a1fcec392d35fc36c6438adda32d2e9af7 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -29,8 +29,6 @@ namespace mace { namespace ops { -static constexpr int kCostPerGroup = 1024; - template class AddNOp; diff --git a/mace/ops/common/transpose.h b/mace/ops/common/transpose.h index 0c0751851f695ac9974bf3e386b32adf2cf28370..b7b42490c64d3c053403125a9c43c77e34ccbffa 100644 --- a/mace/ops/common/transpose.h +++ b/mace/ops/common/transpose.h @@ -132,7 +132,7 @@ inline void TransposeNCHWToNHWCC2(utils::ThreadPool *thread_pool, for (w = 0; w + 3 < width; w += 4) { float32x4_t vi0 = vld1q_f32(input + in_offset); float32x4_t vi1 = vld1q_f32(input + in_offset + image_size); - float32x4x2_t vi = {vi0, vi1}; + float32x4x2_t vi = {{vi0, vi1}}; vst2q_f32(output + out_offset, vi); in_offset += 4; out_offset += 8; diff --git a/mace/ops/opencl/image/split.h b/mace/ops/opencl/image/split.h index 04eaaa8792dd6c64ae738250e4d9d676aae2862c..12755910a75cd812725b02dd76d35c052a6f6826 100644 --- a/mace/ops/opencl/image/split.h +++ b/mace/ops/opencl/image/split.h @@ -133,7 +133,7 @@ MaceStatus SplitKernel::Compute( } } if (context->future() != nullptr) { - context->future()->wait_fn = [runtime, call_stats](CallStats *stats) { + context->future()->wait_fn = [call_stats](CallStats *stats) { if (stats != nullptr) { stats->start_micros = call_stats.start_micros; stats->end_micros = stats->start_micros + call_stats.end_micros; diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index ab61e8c627fd72d4cb8c2c279f9567e92692df23..bcf1282d2211fe5ae022aced1fa5a896c3545b44 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -101,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const { } namespace { +#ifdef MACE_ENABLE_OPENCL std::string GetStoragePathFromEnv() { char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH"); if (storage_path_str == nullptr) return ""; return storage_path_str; } +#endif } // namespace OpTestContext *OpTestContext::Get(int num_threads, @@ -117,15 +119,21 @@ OpTestContext *OpTestContext::Get(int num_threads, OpTestContext::OpTestContext(int num_threads, CPUAffinityPolicy cpu_affinity_policy) +#ifdef MACE_ENABLE_OPENCL : gpu_context_(std::make_shared(GetStoragePathFromEnv())), opencl_mem_types_({MemoryType::GPU_IMAGE}), thread_pool_(make_unique(num_threads, cpu_affinity_policy)) { +#else + : thread_pool_(make_unique(num_threads, + cpu_affinity_policy)) { +#endif thread_pool_->Init(); device_map_[DeviceType::CPU] = make_unique( num_threads, cpu_affinity_policy, thread_pool_.get()); +#ifdef MACE_ENABLE_OPENCL device_map_[DeviceType::GPU] = make_unique( gpu_context_->opencl_tuner(), gpu_context_->opencl_cache_storage(), @@ -135,16 +143,18 @@ OpTestContext::OpTestContext(int num_threads, num_threads, cpu_affinity_policy, thread_pool_.get()); -} - -std::shared_ptr OpTestContext::gpu_context() const { - return gpu_context_; +#endif } Device *OpTestContext::GetDevice(DeviceType device_type) { return device_map_[device_type].get(); } +#ifdef MACE_ENABLE_OPENCL +std::shared_ptr OpTestContext::gpu_context() const { + return gpu_context_; +} + std::vector OpTestContext::opencl_mem_types() { return opencl_mem_types_; } @@ -160,6 +170,7 @@ void OpTestContext::SetOCLImageTestFlag() { void OpTestContext::SetOCLImageAndBufferTestFlag() { opencl_mem_types_ = {MemoryType::GPU_IMAGE, MemoryType::GPU_BUFFER}; } +#endif // MACE_ENABLE_OPENCL bool OpsTestNet::Setup(mace::DeviceType device) { NetDef net_def; @@ -231,6 +242,7 @@ MaceStatus OpsTestNet::Run() { MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { if (device == DeviceType::GPU) { +#ifdef MACE_ENABLE_OPENCL auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types(); for (auto type : opencl_mem_types) { OpTestContext::Get()->GetDevice(device) @@ -239,6 +251,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { MACE_RETURN_IF_ERROR(Run()); } return MaceStatus::MACE_SUCCESS; +#else + return MaceStatus::MACE_UNSUPPORTED; +#endif // MACE_ENABLE_OPENCL } else { Setup(device); return Run(); diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index e9ef4d90f89807f8b123b5e3cba75c075ab52657..d2212a659078075a60df305db95d5dee1b0cd584 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -29,8 +29,6 @@ #include "mace/core/types.h" #include "mace/core/net.h" #include "mace/core/device_context.h" -#include "mace/core/runtime/opencl/gpu_device.h" -#include "mace/core/runtime/opencl/opencl_util.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" #include "mace/ops/ops_registry.h" @@ -40,6 +38,11 @@ #include "mace/core/quantize.h" #include "mace/ops/testing/test_utils.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/gpu_device.h" +#include "mace/core/runtime/opencl/opencl_util.h" +#endif + namespace mace { namespace ops { namespace test { @@ -78,12 +81,14 @@ class OpTestContext { static OpTestContext *Get( int num_threads = -1, CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY); - std::shared_ptr gpu_context() const; Device *GetDevice(DeviceType device_type); +#ifdef MACE_ENABLE_OPENCL + std::shared_ptr gpu_context() const; std::vector opencl_mem_types(); void SetOCLBufferTestFlag(); void SetOCLImageTestFlag(); void SetOCLImageAndBufferTestFlag(); +#endif utils::ThreadPool *thread_pool() { return thread_pool_.get(); } @@ -93,8 +98,10 @@ class OpTestContext { CPUAffinityPolicy cpu_affinity_policy); MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext); +#ifdef MACE_ENABLE_OPENCL std::shared_ptr gpu_context_; std::vector opencl_mem_types_; +#endif std::map> device_map_; std::unique_ptr thread_pool_; }; @@ -424,7 +431,9 @@ class OpsTestBase : public ::testing::Test { } virtual void TearDown() { +#ifdef MACE_ENABLE_OPENCL OpTestContext::Get()->SetOCLImageTestFlag(); +#endif } }; diff --git a/mace/ops/strided_slice.cc b/mace/ops/strided_slice.cc index c10914f27fb87e7e1159749eb990a66bb6506f42..4218d1f78614b487c85d4d645a09495b9c380a6b 100644 --- a/mace/ops/strided_slice.cc +++ b/mace/ops/strided_slice.cc @@ -199,7 +199,7 @@ class StridedSliceOp : public Operation { strides_indices_vec[d] > 0 ? 0 : -1, strides_indices_vec[d] > 0 ? dim_len : dim_len - 1}; - auto format_indices = [valid_range, d, dim_len](index_t indice) { + auto format_indices = [valid_range, dim_len](index_t indice) { index_t forward = indice < 0 ? indice + dim_len : indice; return Clamp(forward, valid_range[0], valid_range[1]); }; diff --git a/tools/bazel.rc b/tools/bazel.rc index 629129cf4cc38015ac49dee5b9f3a97ed77b09da..f0c6d64f27ebc1bc964cd6e487a9f6d1b5112a20 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -19,6 +19,9 @@ build:android --linkopt=-lm build:android --distinct_host_configuration=true build:android --crosstool_top=//external:android/crosstool build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:android --copt -Wall +build:android --copt -Wno-mismatched-tags +build:android --copt -Wno-missing-braces # Linux host build, --config linux build:linux --define linux=true