From 3ec4034215f20fd091ffa109b11b08ce59f4ce8f Mon Sep 17 00:00:00 2001 From: liuqi Date: Fri, 7 Dec 2018 18:05:42 +0800 Subject: [PATCH] Bug: Replace OPENCLRuntime with GPURuntime in GPUDevice. 1. Add GPURuntime for GPUDevice 2. Move OpenCLRuntime to GPURuntime 3. Move ScratchImageManager from OPENCLRuntime to GPURuntime --- mace/core/device.cc | 4 +- mace/core/device.h | 6 +-- mace/core/runtime/opencl/gpu_device.cc | 7 +-- mace/core/runtime/opencl/gpu_device.h | 4 +- mace/core/runtime/opencl/gpu_runtime.cc | 45 +++++++++++++++++++ mace/core/runtime/opencl/gpu_runtime.h | 45 +++++++++++++++++++ mace/core/runtime/opencl/opencl_runtime.cc | 16 +------ mace/core/runtime/opencl/opencl_runtime.h | 6 --- mace/core/workspace.cc | 2 +- mace/libmace/mace.cc | 8 ++-- mace/ops/activation.cc | 2 +- mace/ops/addn.cc | 2 +- mace/ops/batch_norm.cc | 2 +- mace/ops/batch_to_space.cc | 2 +- mace/ops/bias_add.cc | 2 +- mace/ops/channel_shuffle.cc | 2 +- mace/ops/concat.cc | 2 +- mace/ops/conv_2d.cc | 4 +- mace/ops/crop.cc | 2 +- mace/ops/deconv_2d.cc | 2 +- mace/ops/depth_to_space.cc | 2 +- mace/ops/depthwise_conv2d.cc | 2 +- mace/ops/depthwise_deconv2d.cc | 2 +- mace/ops/eltwise.cc | 2 +- mace/ops/fully_connected.cc | 2 +- mace/ops/lstm_cell.cc | 2 +- mace/ops/opencl/buffer/buffer_transform.cc | 6 +-- .../opencl/buffer/buffer_type_transform.cc | 2 +- mace/ops/opencl/buffer/conv_2d_1x1.cc | 2 +- mace/ops/opencl/buffer/conv_2d_general.cc | 2 +- mace/ops/opencl/buffer/depthwise_conv2d.cc | 2 +- mace/ops/opencl/buffer/pooling.h | 2 +- mace/ops/opencl/buffer/softmax.h | 2 +- mace/ops/opencl/buffer/utils.cc | 2 +- mace/ops/opencl/image/activation.h | 2 +- mace/ops/opencl/image/addn.h | 2 +- mace/ops/opencl/image/batch_norm.h | 2 +- mace/ops/opencl/image/batch_to_space.h | 2 +- mace/ops/opencl/image/bias_add.h | 2 +- mace/ops/opencl/image/buffer_to_image.h | 2 +- mace/ops/opencl/image/channel_shuffle.h | 2 +- mace/ops/opencl/image/concat.cc | 4 +- mace/ops/opencl/image/conv_2d_1x1.cc | 2 +- mace/ops/opencl/image/conv_2d_3x3.cc | 2 +- mace/ops/opencl/image/conv_2d_general.cc | 2 +- mace/ops/opencl/image/crop.h | 2 +- mace/ops/opencl/image/deconv_2d.h | 2 +- mace/ops/opencl/image/depth_to_space.h | 2 +- mace/ops/opencl/image/depthwise_conv2d.cc | 2 +- mace/ops/opencl/image/depthwise_deconv2d.h | 2 +- mace/ops/opencl/image/eltwise.h | 2 +- mace/ops/opencl/image/fully_connected.h | 2 +- mace/ops/opencl/image/image_to_buffer.h | 2 +- mace/ops/opencl/image/lstm_cell.h | 2 +- mace/ops/opencl/image/matmul.h | 2 +- mace/ops/opencl/image/pad.h | 2 +- mace/ops/opencl/image/pooling.h | 2 +- mace/ops/opencl/image/reduce_mean.h | 2 +- mace/ops/opencl/image/resize_bicubic.h | 2 +- mace/ops/opencl/image/resize_bilinear.h | 2 +- mace/ops/opencl/image/softmax.h | 2 +- mace/ops/opencl/image/space_to_batch.h | 2 +- mace/ops/opencl/image/space_to_depth.h | 2 +- mace/ops/opencl/image/split.h | 2 +- mace/ops/opencl/image/sqrdiff_mean.h | 2 +- mace/ops/opencl/image/winograd_conv2d.cc | 9 ++-- mace/ops/opencl/out_of_range_check_test.cc | 2 +- mace/ops/ops_test_util.cc | 6 +-- mace/ops/pad.cc | 2 +- mace/ops/pooling.cc | 2 +- mace/ops/reduce_mean.cc | 2 +- mace/ops/resize_bicubic.cc | 2 +- mace/ops/resize_bilinear.cc | 2 +- mace/ops/softmax.cc | 2 +- mace/ops/space_to_batch.cc | 2 +- mace/ops/space_to_depth.cc | 2 +- mace/ops/split.cc | 2 +- mace/ops/sqrdiff_mean.cc | 2 +- 78 files changed, 186 insertions(+), 112 deletions(-) create mode 100644 mace/core/runtime/opencl/gpu_runtime.cc create mode 100644 mace/core/runtime/opencl/gpu_runtime.h diff --git a/mace/core/device.cc b/mace/core/device.cc index 35e8c7af..4eb547c2 100644 --- a/mace/core/device.cc +++ b/mace/core/device.cc @@ -33,8 +33,8 @@ CPURuntime *CPUDevice::cpu_runtime() { } #ifdef MACE_ENABLE_OPENCL -OpenCLRuntime *CPUDevice::opencl_runtime() { - LOG(FATAL) << "CPU device should not call OpenCL Runtime"; +GPURuntime *CPUDevice::gpu_runtime() { + LOG(FATAL) << "CPU device should not call GPU Runtime"; return nullptr; } #endif diff --git a/mace/core/device.h b/mace/core/device.h index b7fe5f32..627d46be 100644 --- a/mace/core/device.h +++ b/mace/core/device.h @@ -21,7 +21,7 @@ #include "mace/core/allocator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/core/runtime/opencl/gpu_runtime.h" #endif namespace mace { @@ -33,7 +33,7 @@ class Device { virtual ~Device() {} #ifdef MACE_ENABLE_OPENCL - virtual OpenCLRuntime *opencl_runtime() = 0; + virtual GPURuntime *gpu_runtime() = 0; #endif // MACE_ENABLE_OPENCL virtual CPURuntime *cpu_runtime() = 0; @@ -50,7 +50,7 @@ class CPUDevice : public Device { virtual ~CPUDevice(); #ifdef MACE_ENABLE_OPENCL - OpenCLRuntime *opencl_runtime() override; + GPURuntime *gpu_runtime() override; #endif CPURuntime *cpu_runtime() override; diff --git a/mace/core/runtime/opencl/gpu_device.cc b/mace/core/runtime/opencl/gpu_device.cc index 09bb9181..caea5767 100644 --- a/mace/core/runtime/opencl/gpu_device.cc +++ b/mace/core/runtime/opencl/gpu_device.cc @@ -30,12 +30,13 @@ GPUDevice::GPUDevice(std::shared_ptr> tuner, runtime_(new OpenCLRuntime(opencl_cache_storage, priority, perf, opencl_binary_storage, tuner)), allocator_(new OpenCLAllocator(runtime_.get())), - scratch_buffer_(new ScratchBuffer(allocator_.get())) {} + scratch_buffer_(new ScratchBuffer(allocator_.get())), + gpu_runtime_(new GPURuntime(runtime_.get())) {} GPUDevice::~GPUDevice() = default; -OpenCLRuntime* GPUDevice::opencl_runtime() { - return runtime_.get(); +GPURuntime* GPUDevice::gpu_runtime() { + return gpu_runtime_.get(); } Allocator *GPUDevice::allocator() { diff --git a/mace/core/runtime/opencl/gpu_device.h b/mace/core/runtime/opencl/gpu_device.h index 1d36461b..d3c7d98e 100644 --- a/mace/core/runtime/opencl/gpu_device.h +++ b/mace/core/runtime/opencl/gpu_device.h @@ -19,6 +19,7 @@ #include "mace/core/device_context.h" #include "mace/core/device.h" +#include "mace/core/runtime/opencl/gpu_runtime.h" #include "mace/core/runtime/opencl/opencl_allocator.h" namespace mace { @@ -34,7 +35,7 @@ class GPUDevice : public CPUDevice { CPUAffinityPolicy cpu_affinity_policy = AFFINITY_NONE, bool use_gemmlowp = false); ~GPUDevice(); - OpenCLRuntime *opencl_runtime() override; + GPURuntime *gpu_runtime() override; Allocator *allocator() override; DeviceType device_type() const override; ScratchBuffer *scratch_buffer() override; @@ -42,6 +43,7 @@ class GPUDevice : public CPUDevice { std::unique_ptr runtime_; std::unique_ptr allocator_; std::unique_ptr scratch_buffer_; + std::unique_ptr gpu_runtime_; }; } // namespace mace diff --git a/mace/core/runtime/opencl/gpu_runtime.cc b/mace/core/runtime/opencl/gpu_runtime.cc new file mode 100644 index 00000000..8574ad48 --- /dev/null +++ b/mace/core/runtime/opencl/gpu_runtime.cc @@ -0,0 +1,45 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/runtime/opencl/gpu_runtime.h" + +#include "mace/core/runtime/opencl/scratch_image.h" + +namespace mace { + +GPURuntime::GPURuntime(mace::OpenCLRuntime *runtime) + : runtime_(runtime), + scratch_image_manager_(new ScratchImageManager), + mem_type_(MemoryType::GPU_IMAGE) {} + +GPURuntime::~GPURuntime() = default; + +OpenCLRuntime* GPURuntime::opencl_runtime() { + return runtime_; +} + +ScratchImageManager* GPURuntime::scratch_image_manager() const { + return scratch_image_manager_.get(); +} + +bool GPURuntime::UseImageMemory() { + return this->mem_type_ == MemoryType::GPU_IMAGE; +} + +void GPURuntime::set_mem_type(MemoryType type) { + this->mem_type_ = type; +} + + +} // namespace mace diff --git a/mace/core/runtime/opencl/gpu_runtime.h b/mace/core/runtime/opencl/gpu_runtime.h new file mode 100644 index 00000000..fee776ed --- /dev/null +++ b/mace/core/runtime/opencl/gpu_runtime.h @@ -0,0 +1,45 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_ +#define MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_ + +#include + +#include "mace/proto/mace.pb.h" + +namespace mace { + +class OpenCLRuntime; +class ScratchImageManager; + +class GPURuntime { + public: + explicit GPURuntime(OpenCLRuntime *runtime); + ~GPURuntime(); + OpenCLRuntime *opencl_runtime(); + ScratchImageManager *scratch_image_manager() const; + + // TODO(liuqi): remove this function in the future, make decision at runtime. + bool UseImageMemory(); + void set_mem_type(MemoryType type); + + private: + OpenCLRuntime *runtime_; + std::unique_ptr scratch_image_manager_; + MemoryType mem_type_; +}; + +} // namespace mace +#endif // MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_ diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index b552c65a..904e74f6 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -284,9 +284,7 @@ OpenCLRuntime::OpenCLRuntime( is_opencl_avaliable_(false), is_profiling_enabled_(false), opencl_version_(CL_VER_UNKNOWN), - gpu_type_(UNKNOWN), - mem_type_(MemoryType::GPU_IMAGE), - scratch_image_manager_(new ScratchImageManager) { + gpu_type_(UNKNOWN) { std::vector all_platforms; cl::Platform::get(&all_platforms); if (all_platforms.size() == 0) { @@ -471,14 +469,6 @@ uint32_t OpenCLRuntime::device_compute_units() const { return device_compute_units_; } -bool OpenCLRuntime::UseImageMemory() { - return this->mem_type_ == MemoryType::GPU_IMAGE; -} - -void OpenCLRuntime::set_mem_type(MemoryType type) { - this->mem_type_ = type; -} - bool OpenCLRuntime::BuildProgramFromCache( const std::string &built_program_key, const std::string &build_options_str, @@ -792,8 +782,4 @@ bool OpenCLRuntime::is_profiling_enabled() const { return is_profiling_enabled_; } -ScratchImageManager* OpenCLRuntime::scratch_image_manager() const { - return scratch_image_manager_.get(); -} - } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 546b8008..1e189b8e 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -83,11 +83,7 @@ class OpenCLRuntime { uint64_t device_global_mem_cache_size() const; uint32_t device_compute_units() const; Tuner *tuner(); - ScratchImageManager *scratch_image_manager() const; bool is_opencl_avaliable(); - // TODO(liuqi): remove this function in the future, make decision at runtime. - bool UseImageMemory(); - void set_mem_type(MemoryType type); void GetCallStats(const cl::Event &event, CallStats *stats); uint64_t GetDeviceMaxWorkGroupSize(); @@ -135,8 +131,6 @@ class OpenCLRuntime { bool is_profiling_enabled_; OpenCLVersion opencl_version_; GPUType gpu_type_; - MemoryType mem_type_; - std::unique_ptr scratch_image_manager_; // All OpenCL object must be a pointer and manually deleted before unloading // OpenCL library. std::shared_ptr context_; diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index bbef2c5d..5123e670 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -109,7 +109,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, (!is_quantize_model && HasQuantizedTensor(net_def)))); #ifdef MACE_ENABLE_OPENCL diffused_buffer_ = diffused_buffer_ || (device_type == DeviceType::GPU && - device->opencl_runtime()->GetDeviceMaxMemAllocSize() <= + device->gpu_runtime()->opencl_runtime()->GetDeviceMaxMemAllocSize() <= static_cast(model_data_size)); #endif if (diffused_buffer_) { diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 9244d62a..bcaff34d 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -69,8 +69,8 @@ void UnloadModelData(const unsigned char *model_data, #ifdef MACE_ENABLE_OPENCL MaceStatus CheckGPUAvalibility(const NetDef *net_def, Device *device) { // Check OpenCL avaliable - auto runtime = device->opencl_runtime(); - if (!runtime->is_opencl_avaliable()) { + auto runtime = device->gpu_runtime(); + if (!runtime->opencl_runtime()->is_opencl_avaliable()) { LOG(WARNING) << "The device does not support OpenCL"; return MaceStatus::MACE_OUT_OF_RESOURCES; } @@ -678,8 +678,8 @@ MaceStatus MaceEngine::Impl::Run( #ifdef MACE_ENABLE_OPENCL if (device_type_ == GPU) { - device_->opencl_runtime()->command_queue().finish(); - device_->opencl_runtime()->SaveBuiltCLProgram(); + device_->gpu_runtime()->opencl_runtime()->command_queue().finish(); + device_->gpu_runtime()->opencl_runtime()->SaveBuiltCLProgram(); } #endif for (auto &output : *outputs) { diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index b904b5c2..fe8862bb 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -81,7 +81,7 @@ class ActivationOp : public Operation { auto relux_max_limit = static_cast( Operation::GetOptionalArg("max_limit", 0.0f)); MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset( new opencl::image::ActivationKernel(type, relux_max_limit)); diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index 4040de1f..0fe0c7b4 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -106,7 +106,7 @@ class AddNOp : public Operation { public: explicit AddNOp(OpConstructContext *context) : Operation(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::AddNKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 1758f79b..3ca5592a 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -149,7 +149,7 @@ class BatchNormOp : public Operation { Operation::GetOptionalArg("activation", "NOOP")); float relux_max_limit = Operation::GetOptionalArg("max_limit", 0.0f); MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::BatchNormKernel( epsilon, activation, relux_max_limit)); diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 5cc6a1e0..3aa5acec 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -265,7 +265,7 @@ class BatchToSpaceNDOp : public BatchToSpaceOpBase { public: explicit BatchToSpaceNDOp(OpConstructContext *context) : BatchToSpaceOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::BatchToSpaceKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index 59579fa5..9190cf95 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -101,7 +101,7 @@ class BiasAddOp : public Operation { data_format_(static_cast(Operation::GetOptionalArg( "data_format", NHWC))) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::BiasAddKernel); } else { diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index 04c6a88d..d4404c61 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -84,7 +84,7 @@ class ChannelShuffleOp : public Operation { explicit ChannelShuffleOp(OpConstructContext *context) : Operation(context) { const int groups = Operation::GetOptionalArg("group", 1); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ChannelShuffleKernel(groups)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index eec11e0b..3fa5ef2c 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -196,7 +196,7 @@ class ConcatOp : public ConcatOpBase { public: explicit ConcatOp(OpConstructContext *context) : ConcatOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ConcatKernel(axis_)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index a5cbec74..0a0d3bb5 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -963,7 +963,7 @@ class Conv2dOp : public ConvPool2dOpBase { relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), wino_block_size_(Operation::GetOptionalArg("wino_block_size", 0)) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::Conv2dKernel); } else { @@ -974,7 +974,7 @@ class Conv2dOp : public ConvPool2dOpBase { // Transform filter tensor to target format if ((wino_block_size_ == 2 || wino_block_size_ == 4) && (kernel_->CheckUseWinograd( - context->device()->opencl_runtime(), + context->device()->gpu_runtime()->opencl_runtime(), context->workspace()->GetTensor( operator_def_->input(1))->shape(), std::vector(operator_def_->output_shape(0).dims().begin(), diff --git a/mace/ops/crop.cc b/mace/ops/crop.cc index b056f21c..7b705069 100644 --- a/mace/ops/crop.cc +++ b/mace/ops/crop.cc @@ -113,7 +113,7 @@ class CropOp : public Operation { explicit CropOp(OpConstructContext *context) : Operation(context) { const int axis = Operation::GetOptionalArg("axis", 2); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::CropKernel( axis, Operation::GetRepeatedArgs("offset"))); } else { diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc index 5697c841..575e81ad 100644 --- a/mace/ops/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -360,7 +360,7 @@ class Deconv2dOp : public Deconv2dOpBase { explicit Deconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context) { MemoryType mem_type = MemoryType::GPU_IMAGE; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::Deconv2dKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc index e18cc106..ee06075a 100644 --- a/mace/ops/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -96,7 +96,7 @@ class DepthToSpaceOp : public Operation { explicit DepthToSpaceOp(OpConstructContext *context) : Operation(context) { int block_size = Operation::GetOptionalArg("block_size", 1); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::DepthToSpaceKernel(block_size)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 8a85ab46..2f849ef7 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -492,7 +492,7 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { explicit DepthwiseConv2dOp(OpConstructContext *context) : DepthwiseConv2dOpBase(context) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::DepthwiseConv2dKernel); } else { diff --git a/mace/ops/depthwise_deconv2d.cc b/mace/ops/depthwise_deconv2d.cc index 3f10a514..a4e7148e 100644 --- a/mace/ops/depthwise_deconv2d.cc +++ b/mace/ops/depthwise_deconv2d.cc @@ -410,7 +410,7 @@ class DepthwiseDeconv2dOp : public Deconv2dOpBase { explicit DepthwiseDeconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context) { MemoryType mem_type = MemoryType::GPU_IMAGE; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::DepthwiseDeconv2dKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index 863b69ed..1a2e0908 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -1088,7 +1088,7 @@ class EltwiseOp : public Operation { int32_t scalar_input_index = Operation::GetOptionalArg( "scalar_input_index", 1); MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::EltwiseKernel( type, coeff, scalar_input, scalar_input_index)); diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index ef919d92..31b1fb05 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -194,7 +194,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { explicit FullyConnectedOp(OpConstructContext *context) : FullyConnectedOpBase(context) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::FullyConnectedKernel); } else { diff --git a/mace/ops/lstm_cell.cc b/mace/ops/lstm_cell.cc index dfbfa155..bc34b969 100644 --- a/mace/ops/lstm_cell.cc +++ b/mace/ops/lstm_cell.cc @@ -34,7 +34,7 @@ class LSTMCellOp : public Operation { Operation::GetOptionalArg("scalar_input", 0.0)); MemoryType mem_type = MemoryType::GPU_IMAGE; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::LSTMCellKernel(forget_bias)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/opencl/buffer/buffer_transform.cc b/mace/ops/opencl/buffer/buffer_transform.cc index 9ba3f81d..5bfc5389 100644 --- a/mace/ops/opencl/buffer/buffer_transform.cc +++ b/mace/ops/opencl/buffer/buffer_transform.cc @@ -47,7 +47,7 @@ MaceStatus TransformConv2DFilter( MACE_RETURN_IF_ERROR(output->Resize(transformed_shape)); output->Reshape(input->shape()); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { std::set built_options; @@ -116,7 +116,7 @@ MaceStatus TransformDWConv2DFilter( MACE_RETURN_IF_ERROR(output->Resize(transformed_shape)); output->Reshape(input->shape()); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { std::set built_options; @@ -173,7 +173,7 @@ MaceStatus TransformArgument( MACE_RETURN_IF_ERROR(output->Resize(transformed_shape)); output->Reshape(input->shape()); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { std::set built_options; diff --git a/mace/ops/opencl/buffer/buffer_type_transform.cc b/mace/ops/opencl/buffer/buffer_type_transform.cc index ce405e9f..75728379 100644 --- a/mace/ops/opencl/buffer/buffer_type_transform.cc +++ b/mace/ops/opencl/buffer/buffer_type_transform.cc @@ -31,7 +31,7 @@ MaceStatus BufferTypeTransform( Tensor *output) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION const uint32_t gws = diff --git a/mace/ops/opencl/buffer/conv_2d_1x1.cc b/mace/ops/opencl/buffer/conv_2d_1x1.cc index 62e77b17..abe7d93b 100644 --- a/mace/ops/opencl/buffer/conv_2d_1x1.cc +++ b/mace/ops/opencl/buffer/conv_2d_1x1.cc @@ -43,7 +43,7 @@ MaceStatus Conv2d1x1(OpContext *context, const index_t in_height = padded_input->dim(1); const index_t in_width = padded_input->dim(2); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/buffer/conv_2d_general.cc b/mace/ops/opencl/buffer/conv_2d_general.cc index f9cc804d..e8ac509c 100644 --- a/mace/ops/opencl/buffer/conv_2d_general.cc +++ b/mace/ops/opencl/buffer/conv_2d_general.cc @@ -48,7 +48,7 @@ MaceStatus Conv2dGeneral(OpContext *context, const index_t filter_height = filter->dim(2); const index_t filter_width = filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/buffer/depthwise_conv2d.cc b/mace/ops/opencl/buffer/depthwise_conv2d.cc index 0ba4526c..d2c33599 100644 --- a/mace/ops/opencl/buffer/depthwise_conv2d.cc +++ b/mace/ops/opencl/buffer/depthwise_conv2d.cc @@ -48,7 +48,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, const index_t filter_height = filter->dim(2); const index_t filter_width = filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/buffer/pooling.h b/mace/ops/opencl/buffer/pooling.h index 4684d687..de7d7610 100644 --- a/mace/ops/opencl/buffer/pooling.h +++ b/mace/ops/opencl/buffer/pooling.h @@ -92,7 +92,7 @@ MaceStatus PoolingKernel::Compute( bool input_changed = !IsVecEqual(input_shape_, input->shape()); input_shape_ = input->shape(); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); // pad input std::vector padded_input_shape = input->shape(); diff --git a/mace/ops/opencl/buffer/softmax.h b/mace/ops/opencl/buffer/softmax.h index 3147a935..248ee0c8 100644 --- a/mace/ops/opencl/buffer/softmax.h +++ b/mace/ops/opencl/buffer/softmax.h @@ -75,7 +75,7 @@ MaceStatus SoftmaxKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/buffer/utils.cc b/mace/ops/opencl/buffer/utils.cc index b4214a0a..141a96b7 100644 --- a/mace/ops/opencl/buffer/utils.cc +++ b/mace/ops/opencl/buffer/utils.cc @@ -47,7 +47,7 @@ MaceStatus PadInput(OpContext *context, static_cast(padded_height * batch) }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/activation.h b/mace/ops/opencl/image/activation.h index 93944b5b..80713c36 100644 --- a/mace/ops/opencl/image/activation.h +++ b/mace/ops/opencl/image/activation.h @@ -66,7 +66,7 @@ MaceStatus ActivationKernel::Compute( const index_t channel_blocks = RoundUpDiv4(channels); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/addn.h b/mace/ops/opencl/image/addn.h index 7692ac06..48f6d8f8 100644 --- a/mace/ops/opencl/image/addn.h +++ b/mace/ops/opencl/image/addn.h @@ -57,7 +57,7 @@ MaceStatus AddNKernel::Compute( const index_t width = input_tensors[0]->dim(2); const index_t channels = input_tensors[0]->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; for (size_t i = 1; i < size; ++i) { diff --git a/mace/ops/opencl/image/batch_norm.h b/mace/ops/opencl/image/batch_norm.h index 5685c514..68908830 100644 --- a/mace/ops/opencl/image/batch_norm.h +++ b/mace/ops/opencl/image/batch_norm.h @@ -85,7 +85,7 @@ MaceStatus BatchNormKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/batch_to_space.h b/mace/ops/opencl/image/batch_to_space.h index 9d918026..35281c70 100644 --- a/mace/ops/opencl/image/batch_to_space.h +++ b/mace/ops/opencl/image/batch_to_space.h @@ -68,7 +68,7 @@ MaceStatus BatchToSpaceKernel::Compute( chan_blk, static_cast(batch_tensor->dim(2)), static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/bias_add.h b/mace/ops/opencl/image/bias_add.h index 25e2392e..a37ee2b1 100644 --- a/mace/ops/opencl/image/bias_add.h +++ b/mace/ops/opencl/image/bias_add.h @@ -62,7 +62,7 @@ MaceStatus BiasAddKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/buffer_to_image.h b/mace/ops/opencl/image/buffer_to_image.h index 14a0ae4b..6ff3284e 100644 --- a/mace/ops/opencl/image/buffer_to_image.h +++ b/mace/ops/opencl/image/buffer_to_image.h @@ -98,7 +98,7 @@ MaceStatus BufferToImage::Compute( } } - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/channel_shuffle.h b/mace/ops/opencl/image/channel_shuffle.h index 53acbf15..f890c0c3 100644 --- a/mace/ops/opencl/image/channel_shuffle.h +++ b/mace/ops/opencl/image/channel_shuffle.h @@ -70,7 +70,7 @@ MaceStatus ChannelShuffleKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/concat.cc b/mace/ops/opencl/image/concat.cc index aab72c54..5dfe666e 100644 --- a/mace/ops/opencl/image/concat.cc +++ b/mace/ops/opencl/image/concat.cc @@ -65,7 +65,7 @@ MaceStatus Concat2(OpContext *context, static_cast(batch * height), }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { @@ -126,7 +126,7 @@ MaceStatus ConcatN(OpContext *context, const index_t height = output->dim(1); const index_t width = output->dim(2); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc index f88882ee..57be0750 100644 --- a/mace/ops/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -95,7 +95,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context, const index_t width_blocks = RoundUpDiv4(width); const index_t input_channel_blocks = RoundUpDiv4(input_channels); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc index 3e5aee90..f7905a0c 100644 --- a/mace/ops/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -83,7 +83,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context, const index_t input_channel_blocks = RoundUpDiv4(input_channels); const index_t width_blocks = RoundUpDiv(width); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc index 120a3daa..28bdea6c 100644 --- a/mace/ops/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -91,7 +91,7 @@ extern MaceStatus Conv2d(OpContext *context, const index_t input_channel_blocks = RoundUpDiv4(input_channels); const index_t width_blocks = RoundUpDiv4(width); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/crop.h b/mace/ops/opencl/image/crop.h index c8f98a4c..a83349c4 100644 --- a/mace/ops/opencl/image/crop.h +++ b/mace/ops/opencl/image/crop.h @@ -141,7 +141,7 @@ MaceStatus CropKernel::Compute( static_cast(output->dim(0) * output->dim(1)) }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/deconv_2d.h b/mace/ops/opencl/image/deconv_2d.h index f3d6cbe9..a8dd9c26 100644 --- a/mace/ops/opencl/image/deconv_2d.h +++ b/mace/ops/opencl/image/deconv_2d.h @@ -92,7 +92,7 @@ MaceStatus Deconv2dKernel::Compute( const int align_w = stride_w - 1 - padding_w; const int kernel_size = filter->dim(2) * filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/depth_to_space.h b/mace/ops/opencl/image/depth_to_space.h index 77c4bd53..1783b813 100644 --- a/mace/ops/opencl/image/depth_to_space.h +++ b/mace/ops/opencl/image/depth_to_space.h @@ -87,7 +87,7 @@ MaceStatus DepthToSpaceKernel::Compute( static_cast(output_width), static_cast(output_height * batch) }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/depthwise_conv2d.cc b/mace/ops/opencl/image/depthwise_conv2d.cc index 02409ebe..57a4415e 100644 --- a/mace/ops/opencl/image/depthwise_conv2d.cc +++ b/mace/ops/opencl/image/depthwise_conv2d.cc @@ -93,7 +93,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, static_cast(width_blocks), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/depthwise_deconv2d.h b/mace/ops/opencl/image/depthwise_deconv2d.h index 96fdfa51..d07a1649 100644 --- a/mace/ops/opencl/image/depthwise_deconv2d.h +++ b/mace/ops/opencl/image/depthwise_deconv2d.h @@ -98,7 +98,7 @@ MaceStatus DepthwiseDeconv2dKernel::Compute( const int align_w = stride_w - 1 - padding_w; const int kernel_size = filter->dim(2) * filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/eltwise.h b/mace/ops/opencl/image/eltwise.h index 2afb3342..9600d501 100644 --- a/mace/ops/opencl/image/eltwise.h +++ b/mace/ops/opencl/image/eltwise.h @@ -117,7 +117,7 @@ MaceStatus EltwiseKernel::Compute( static_cast(width), static_cast(batch_height_pixels)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { std::set built_options; diff --git a/mace/ops/opencl/image/fully_connected.h b/mace/ops/opencl/image/fully_connected.h index 962ffaf0..d52e927f 100644 --- a/mace/ops/opencl/image/fully_connected.h +++ b/mace/ops/opencl/image/fully_connected.h @@ -64,7 +64,7 @@ MaceStatus FullyConnectedKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/image_to_buffer.h b/mace/ops/opencl/image/image_to_buffer.h index 6ca73fa6..f9c3b011 100644 --- a/mace/ops/opencl/image/image_to_buffer.h +++ b/mace/ops/opencl/image/image_to_buffer.h @@ -92,7 +92,7 @@ MaceStatus ImageToBuffer::Compute(OpContext *context, break; } - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/lstm_cell.h b/mace/ops/opencl/image/lstm_cell.h index 546b4a79..265f2e10 100644 --- a/mace/ops/opencl/image/lstm_cell.h +++ b/mace/ops/opencl/image/lstm_cell.h @@ -71,7 +71,7 @@ MaceStatus LSTMCellKernel::Compute( const index_t hidden_units = pre_output->dim(1); const index_t w_blocks = hidden_units >> 2; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/matmul.h b/mace/ops/opencl/image/matmul.h index 763082f6..1681a8f8 100644 --- a/mace/ops/opencl/image/matmul.h +++ b/mace/ops/opencl/image/matmul.h @@ -82,7 +82,7 @@ MaceStatus MatMulKernel::Compute( static_cast(height_blocks * batch), }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/pad.h b/mace/ops/opencl/image/pad.h index cb0c390b..8d1cae3e 100644 --- a/mace/ops/opencl/image/pad.h +++ b/mace/ops/opencl/image/pad.h @@ -80,7 +80,7 @@ MaceStatus PadKernel::Compute( const index_t channel_blocks = RoundUpDiv4(channels); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/pooling.h b/mace/ops/opencl/image/pooling.h index f246efa4..1af67740 100644 --- a/mace/ops/opencl/image/pooling.h +++ b/mace/ops/opencl/image/pooling.h @@ -112,7 +112,7 @@ MaceStatus PoolingKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/reduce_mean.h b/mace/ops/opencl/image/reduce_mean.h index 95b51d86..3280691c 100644 --- a/mace/ops/opencl/image/reduce_mean.h +++ b/mace/ops/opencl/image/reduce_mean.h @@ -76,7 +76,7 @@ MaceStatus ReduceMeanKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/resize_bicubic.h b/mace/ops/opencl/image/resize_bicubic.h index bf5bfcf1..bf72ee78 100644 --- a/mace/ops/opencl/image/resize_bicubic.h +++ b/mace/ops/opencl/image/resize_bicubic.h @@ -102,7 +102,7 @@ MaceStatus ResizeBicubicKernel::Compute( static_cast(out_width), static_cast(out_height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/resize_bilinear.h b/mace/ops/opencl/image/resize_bilinear.h index b3f1b09c..1eb599c9 100644 --- a/mace/ops/opencl/image/resize_bilinear.h +++ b/mace/ops/opencl/image/resize_bilinear.h @@ -107,7 +107,7 @@ MaceStatus ResizeBilinearKernel::Compute( static_cast(out_width), static_cast(out_height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/softmax.h b/mace/ops/opencl/image/softmax.h index ffd5ec89..a19d9483 100644 --- a/mace/ops/opencl/image/softmax.h +++ b/mace/ops/opencl/image/softmax.h @@ -102,7 +102,7 @@ MaceStatus SoftmaxKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/space_to_batch.h b/mace/ops/opencl/image/space_to_batch.h index f2baaba4..c2190c68 100644 --- a/mace/ops/opencl/image/space_to_batch.h +++ b/mace/ops/opencl/image/space_to_batch.h @@ -66,7 +66,7 @@ MaceStatus SpaceToBatchKernel::Compute( chan_blk, static_cast(batch_tensor->dim(2)), static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/space_to_depth.h b/mace/ops/opencl/image/space_to_depth.h index e225b376..1df75ef8 100644 --- a/mace/ops/opencl/image/space_to_depth.h +++ b/mace/ops/opencl/image/space_to_depth.h @@ -79,7 +79,7 @@ MaceStatus SpaceToDepthKernel::Compute( &image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/split.h b/mace/ops/opencl/image/split.h index 7b7f7905..d0427a4f 100644 --- a/mace/ops/opencl/image/split.h +++ b/mace/ops/opencl/image/split.h @@ -70,7 +70,7 @@ MaceStatus SplitKernel::Compute( output_list[i]->ResizeImage(output_shape, image_shape)); } - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/sqrdiff_mean.h b/mace/ops/opencl/image/sqrdiff_mean.h index d0c217fe..ba84a5ef 100644 --- a/mace/ops/opencl/image/sqrdiff_mean.h +++ b/mace/ops/opencl/image/sqrdiff_mean.h @@ -72,7 +72,7 @@ MaceStatus SqrDiffMeanKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/winograd_conv2d.cc b/mace/ops/opencl/image/winograd_conv2d.cc index a9bd7171..8d684e59 100644 --- a/mace/ops/opencl/image/winograd_conv2d.cc +++ b/mace/ops/opencl/image/winograd_conv2d.cc @@ -37,7 +37,7 @@ MaceStatus WinogradInputTransform(OpContext *context, Tensor *output_tensor, uint32_t *kwg_size, StatsFuture *future) { - OpenCLRuntime *runtime = context->device()->opencl_runtime(); + OpenCLRuntime *runtime = context->device()->gpu_runtime()->opencl_runtime(); const index_t out_width = output_tensor->dim(2); MACE_OUT_OF_RANGE_DEFINITION; @@ -119,7 +119,7 @@ MaceStatus WinogradOutputTransform(OpContext *context, Tensor *output_tensor, uint32_t *kwg_size, StatsFuture *future) { - OpenCLRuntime *runtime = context->device()->opencl_runtime(); + OpenCLRuntime *runtime = context->device()->gpu_runtime()->opencl_runtime(); auto &output_shape = output_tensor->shape(); MACE_OUT_OF_RANGE_DEFINITION; @@ -227,8 +227,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size[3]) { - OpenCLRuntime *runtime = context->device()->opencl_runtime(); - ScratchImageManager *scratch_manager = runtime->scratch_image_manager(); + OpenCLRuntime *runtime = context->device()->gpu_runtime()->opencl_runtime(); + ScratchImageManager *scratch_manager = + context->device()->gpu_runtime()->scratch_image_manager(); StatsFuture t_input_future, mm_future, t_output_future; bool input_changed = !IsVecEqual(*prev_input_shape, input->shape()); *prev_input_shape = input->shape(); diff --git a/mace/ops/opencl/out_of_range_check_test.cc b/mace/ops/opencl/out_of_range_check_test.cc index eb223693..093e0fb4 100644 --- a/mace/ops/opencl/out_of_range_check_test.cc +++ b/mace/ops/opencl/out_of_range_check_test.cc @@ -35,7 +35,7 @@ MaceStatus BufferToImageOpImpl(OpContext *context, uint32_t gws[2] = {static_cast(image_shape[0]), static_cast(image_shape[1])}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); std::string kernel_name = "in_out_buffer_to_image"; std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL(kernel_name); diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index 6b08761e..5233ccde 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -206,7 +206,7 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types(); for (auto type : opencl_mem_types) { OpTestContext::Get()->GetDevice(device) - ->opencl_runtime()->set_mem_type(type); + ->gpu_runtime()->set_mem_type(type); Setup(device); MACE_RETURN_IF_ERROR(Run()); } @@ -242,8 +242,8 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def, void OpsTestNet::Sync() { #ifdef MACE_ENABLE_OPENCL if (net_ && device_type_ == DeviceType::GPU) { - OpTestContext::Get()->GetDevice(DeviceType::GPU)->opencl_runtime() - ->command_queue().finish(); + OpTestContext::Get()->GetDevice(DeviceType::GPU)->gpu_runtime() + ->opencl_runtime()->command_queue().finish(); } #endif } diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc index cb797906..aa18b7c1 100644 --- a/mace/ops/pad.cc +++ b/mace/ops/pad.cc @@ -97,7 +97,7 @@ class PadOp : public Operation { std::vector paddings = Operation::GetRepeatedArgs("paddings"); float constant_value = Operation::GetOptionalArg( "constant_value", 0.0); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::PadKernel(paddings, constant_value)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index b2aef666..50372c3c 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -429,7 +429,7 @@ class PoolingOp : public PoolingOpBase { public: explicit PoolingOp(OpConstructContext *context) : PoolingOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::PoolingKernel); } else { context->set_output_mem_type(MemoryType::GPU_BUFFER); diff --git a/mace/ops/reduce_mean.cc b/mace/ops/reduce_mean.cc index 20f7e81c..863103b2 100644 --- a/mace/ops/reduce_mean.cc +++ b/mace/ops/reduce_mean.cc @@ -246,7 +246,7 @@ class ReduceMeanOp : public ReduceMeanOpBase { public: explicit ReduceMeanOp(OpConstructContext *context) : ReduceMeanOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ReduceMeanKernel(axis_, keep_dims_)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/resize_bicubic.cc b/mace/ops/resize_bicubic.cc index 40330060..3ccff3e6 100644 --- a/mace/ops/resize_bicubic.cc +++ b/mace/ops/resize_bicubic.cc @@ -195,7 +195,7 @@ class ResizeBicubicOp : public Operation { std::vector size = Operation::GetRepeatedArgs( "size", {-1, -1}); MACE_CHECK(size.size() == 2); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ResizeBicubicKernel(align_corners, size[0], size[1])); diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 5ce6ef4a..748c2efd 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -331,7 +331,7 @@ class ResizeBilinearOp : public Operation { std::vector size = Operation::GetRepeatedArgs( "size", {-1, -1}); MACE_CHECK(size.size() == 2); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ResizeBilinearKernel(align_corners, size[0], size[1])); diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index 4a7505ae..2518b407 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -364,7 +364,7 @@ class SoftmaxOp : public Operation { public: explicit SoftmaxOp(OpConstructContext *context) : Operation(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SoftmaxKernel); } else { context->set_output_mem_type(MemoryType::GPU_BUFFER); diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 7d422938..7fa9081d 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -308,7 +308,7 @@ class SpaceToBatchNDOp : public SpaceToBatchOpBase { public: explicit SpaceToBatchNDOp(OpConstructContext *context) : SpaceToBatchOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SpaceToBatchKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc index 11e5ade3..39e603ae 100644 --- a/mace/ops/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -94,7 +94,7 @@ class SpaceToDepthOp : public Operation { explicit SpaceToDepthOp(OpConstructContext *context) : Operation(context) { int block_size = Operation::GetOptionalArg("block_size", 1); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SpaceToDepthKernel(block_size)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/split.cc b/mace/ops/split.cc index 2e096631..0f9dcc04 100644 --- a/mace/ops/split.cc +++ b/mace/ops/split.cc @@ -105,7 +105,7 @@ class SplitOp : public Operation { explicit SplitOp(OpConstructContext *context) : Operation(context) { int32_t axis = Operation::GetOptionalArg("axis", 3); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SplitKernel(axis)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/sqrdiff_mean.cc b/mace/ops/sqrdiff_mean.cc index 7927da3b..b469a3e3 100644 --- a/mace/ops/sqrdiff_mean.cc +++ b/mace/ops/sqrdiff_mean.cc @@ -82,7 +82,7 @@ class SqrDiffMeanOp : public Operation { public: explicit SqrDiffMeanOp(OpConstructContext *context) : Operation(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SqrDiffMeanKernel()); } else { MACE_NOT_IMPLEMENTED; -- GitLab