diff --git a/mace/core/device.cc b/mace/core/device.cc index 35e8c7af6bda7ba27faa768cedc0cbdbfecef7f7..4eb547c2f4f4e29d6066cce6da93b32f9ffceeb5 100644 --- a/mace/core/device.cc +++ b/mace/core/device.cc @@ -33,8 +33,8 @@ CPURuntime *CPUDevice::cpu_runtime() { } #ifdef MACE_ENABLE_OPENCL -OpenCLRuntime *CPUDevice::opencl_runtime() { - LOG(FATAL) << "CPU device should not call OpenCL Runtime"; +GPURuntime *CPUDevice::gpu_runtime() { + LOG(FATAL) << "CPU device should not call GPU Runtime"; return nullptr; } #endif diff --git a/mace/core/device.h b/mace/core/device.h index b7fe5f329b99401d31b04af102b2ca1d32d06bff..627d46bede29b6d888813a470a6c1f3a76145561 100644 --- a/mace/core/device.h +++ b/mace/core/device.h @@ -21,7 +21,7 @@ #include "mace/core/allocator.h" #ifdef MACE_ENABLE_OPENCL -#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/core/runtime/opencl/gpu_runtime.h" #endif namespace mace { @@ -33,7 +33,7 @@ class Device { virtual ~Device() {} #ifdef MACE_ENABLE_OPENCL - virtual OpenCLRuntime *opencl_runtime() = 0; + virtual GPURuntime *gpu_runtime() = 0; #endif // MACE_ENABLE_OPENCL virtual CPURuntime *cpu_runtime() = 0; @@ -50,7 +50,7 @@ class CPUDevice : public Device { virtual ~CPUDevice(); #ifdef MACE_ENABLE_OPENCL - OpenCLRuntime *opencl_runtime() override; + GPURuntime *gpu_runtime() override; #endif CPURuntime *cpu_runtime() override; diff --git a/mace/core/runtime/opencl/gpu_device.cc b/mace/core/runtime/opencl/gpu_device.cc index 09bb91816d0ff6aff45b68c85473d4a89b0ddc79..caea576773a8e226831a20ac5e4e5f7899e7ed24 100644 --- a/mace/core/runtime/opencl/gpu_device.cc +++ b/mace/core/runtime/opencl/gpu_device.cc @@ -30,12 +30,13 @@ GPUDevice::GPUDevice(std::shared_ptr> tuner, runtime_(new OpenCLRuntime(opencl_cache_storage, priority, perf, opencl_binary_storage, tuner)), allocator_(new OpenCLAllocator(runtime_.get())), - scratch_buffer_(new ScratchBuffer(allocator_.get())) {} + scratch_buffer_(new ScratchBuffer(allocator_.get())), + gpu_runtime_(new GPURuntime(runtime_.get())) {} GPUDevice::~GPUDevice() = default; -OpenCLRuntime* GPUDevice::opencl_runtime() { - return runtime_.get(); +GPURuntime* GPUDevice::gpu_runtime() { + return gpu_runtime_.get(); } Allocator *GPUDevice::allocator() { diff --git a/mace/core/runtime/opencl/gpu_device.h b/mace/core/runtime/opencl/gpu_device.h index 1d36461b219ce5b28e4efa7ce6f769613eb92634..d3c7d98e88868ab02121a1151f360436894cecc9 100644 --- a/mace/core/runtime/opencl/gpu_device.h +++ b/mace/core/runtime/opencl/gpu_device.h @@ -19,6 +19,7 @@ #include "mace/core/device_context.h" #include "mace/core/device.h" +#include "mace/core/runtime/opencl/gpu_runtime.h" #include "mace/core/runtime/opencl/opencl_allocator.h" namespace mace { @@ -34,7 +35,7 @@ class GPUDevice : public CPUDevice { CPUAffinityPolicy cpu_affinity_policy = AFFINITY_NONE, bool use_gemmlowp = false); ~GPUDevice(); - OpenCLRuntime *opencl_runtime() override; + GPURuntime *gpu_runtime() override; Allocator *allocator() override; DeviceType device_type() const override; ScratchBuffer *scratch_buffer() override; @@ -42,6 +43,7 @@ class GPUDevice : public CPUDevice { std::unique_ptr runtime_; std::unique_ptr allocator_; std::unique_ptr scratch_buffer_; + std::unique_ptr gpu_runtime_; }; } // namespace mace diff --git a/mace/core/runtime/opencl/gpu_runtime.cc b/mace/core/runtime/opencl/gpu_runtime.cc new file mode 100644 index 0000000000000000000000000000000000000000..8574ad48e4857eda4de415fdb17bba94a6bec7e1 --- /dev/null +++ b/mace/core/runtime/opencl/gpu_runtime.cc @@ -0,0 +1,45 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/runtime/opencl/gpu_runtime.h" + +#include "mace/core/runtime/opencl/scratch_image.h" + +namespace mace { + +GPURuntime::GPURuntime(mace::OpenCLRuntime *runtime) + : runtime_(runtime), + scratch_image_manager_(new ScratchImageManager), + mem_type_(MemoryType::GPU_IMAGE) {} + +GPURuntime::~GPURuntime() = default; + +OpenCLRuntime* GPURuntime::opencl_runtime() { + return runtime_; +} + +ScratchImageManager* GPURuntime::scratch_image_manager() const { + return scratch_image_manager_.get(); +} + +bool GPURuntime::UseImageMemory() { + return this->mem_type_ == MemoryType::GPU_IMAGE; +} + +void GPURuntime::set_mem_type(MemoryType type) { + this->mem_type_ = type; +} + + +} // namespace mace diff --git a/mace/core/runtime/opencl/gpu_runtime.h b/mace/core/runtime/opencl/gpu_runtime.h new file mode 100644 index 0000000000000000000000000000000000000000..fee776edb041c4b4dd2876f11c6bf46b4afe074c --- /dev/null +++ b/mace/core/runtime/opencl/gpu_runtime.h @@ -0,0 +1,45 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_ +#define MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_ + +#include + +#include "mace/proto/mace.pb.h" + +namespace mace { + +class OpenCLRuntime; +class ScratchImageManager; + +class GPURuntime { + public: + explicit GPURuntime(OpenCLRuntime *runtime); + ~GPURuntime(); + OpenCLRuntime *opencl_runtime(); + ScratchImageManager *scratch_image_manager() const; + + // TODO(liuqi): remove this function in the future, make decision at runtime. + bool UseImageMemory(); + void set_mem_type(MemoryType type); + + private: + OpenCLRuntime *runtime_; + std::unique_ptr scratch_image_manager_; + MemoryType mem_type_; +}; + +} // namespace mace +#endif // MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_ diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index b552c65ab3f663e9e4db9add45f5e04913f0994c..904e74f6cd35d4b172808ba280ff2b53c54405ea 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -284,9 +284,7 @@ OpenCLRuntime::OpenCLRuntime( is_opencl_avaliable_(false), is_profiling_enabled_(false), opencl_version_(CL_VER_UNKNOWN), - gpu_type_(UNKNOWN), - mem_type_(MemoryType::GPU_IMAGE), - scratch_image_manager_(new ScratchImageManager) { + gpu_type_(UNKNOWN) { std::vector all_platforms; cl::Platform::get(&all_platforms); if (all_platforms.size() == 0) { @@ -471,14 +469,6 @@ uint32_t OpenCLRuntime::device_compute_units() const { return device_compute_units_; } -bool OpenCLRuntime::UseImageMemory() { - return this->mem_type_ == MemoryType::GPU_IMAGE; -} - -void OpenCLRuntime::set_mem_type(MemoryType type) { - this->mem_type_ = type; -} - bool OpenCLRuntime::BuildProgramFromCache( const std::string &built_program_key, const std::string &build_options_str, @@ -792,8 +782,4 @@ bool OpenCLRuntime::is_profiling_enabled() const { return is_profiling_enabled_; } -ScratchImageManager* OpenCLRuntime::scratch_image_manager() const { - return scratch_image_manager_.get(); -} - } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 546b80086949215f82ba7fd831f96c509590a712..1e189b8eeb5f6e347d41680cc3977643757af22f 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -83,11 +83,7 @@ class OpenCLRuntime { uint64_t device_global_mem_cache_size() const; uint32_t device_compute_units() const; Tuner *tuner(); - ScratchImageManager *scratch_image_manager() const; bool is_opencl_avaliable(); - // TODO(liuqi): remove this function in the future, make decision at runtime. - bool UseImageMemory(); - void set_mem_type(MemoryType type); void GetCallStats(const cl::Event &event, CallStats *stats); uint64_t GetDeviceMaxWorkGroupSize(); @@ -135,8 +131,6 @@ class OpenCLRuntime { bool is_profiling_enabled_; OpenCLVersion opencl_version_; GPUType gpu_type_; - MemoryType mem_type_; - std::unique_ptr scratch_image_manager_; // All OpenCL object must be a pointer and manually deleted before unloading // OpenCL library. std::shared_ptr context_; diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index bbef2c5d5a35331fbcecb5fc7b8197adeb3b2afa..5123e670bc900fb5e6f4be145f8fc64be5105b5d 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -109,7 +109,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, (!is_quantize_model && HasQuantizedTensor(net_def)))); #ifdef MACE_ENABLE_OPENCL diffused_buffer_ = diffused_buffer_ || (device_type == DeviceType::GPU && - device->opencl_runtime()->GetDeviceMaxMemAllocSize() <= + device->gpu_runtime()->opencl_runtime()->GetDeviceMaxMemAllocSize() <= static_cast(model_data_size)); #endif if (diffused_buffer_) { diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 9244d62a8a9bfae3b7d93c901092ec928970b454..bcaff34da3372019fd1fc4f15b566a3c62402d93 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -69,8 +69,8 @@ void UnloadModelData(const unsigned char *model_data, #ifdef MACE_ENABLE_OPENCL MaceStatus CheckGPUAvalibility(const NetDef *net_def, Device *device) { // Check OpenCL avaliable - auto runtime = device->opencl_runtime(); - if (!runtime->is_opencl_avaliable()) { + auto runtime = device->gpu_runtime(); + if (!runtime->opencl_runtime()->is_opencl_avaliable()) { LOG(WARNING) << "The device does not support OpenCL"; return MaceStatus::MACE_OUT_OF_RESOURCES; } @@ -678,8 +678,8 @@ MaceStatus MaceEngine::Impl::Run( #ifdef MACE_ENABLE_OPENCL if (device_type_ == GPU) { - device_->opencl_runtime()->command_queue().finish(); - device_->opencl_runtime()->SaveBuiltCLProgram(); + device_->gpu_runtime()->opencl_runtime()->command_queue().finish(); + device_->gpu_runtime()->opencl_runtime()->SaveBuiltCLProgram(); } #endif for (auto &output : *outputs) { diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index b904b5c275373e48f59358b8a238f61dd6917bf6..fe8862bb85bfe472ef558411632525ab8e6fd7ef 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -81,7 +81,7 @@ class ActivationOp : public Operation { auto relux_max_limit = static_cast( Operation::GetOptionalArg("max_limit", 0.0f)); MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset( new opencl::image::ActivationKernel(type, relux_max_limit)); diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index 4040de1fa50eacea06c654b47c0515918b505d61..0fe0c7b4371839f6a9c4449b633925d834f91efe 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -106,7 +106,7 @@ class AddNOp : public Operation { public: explicit AddNOp(OpConstructContext *context) : Operation(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::AddNKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 1758f79b799a11df6b075222ffb022be5a71b615..3ca5592a8bebb8126809b50edff8a9ba1a6ef430 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -149,7 +149,7 @@ class BatchNormOp : public Operation { Operation::GetOptionalArg("activation", "NOOP")); float relux_max_limit = Operation::GetOptionalArg("max_limit", 0.0f); MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::BatchNormKernel( epsilon, activation, relux_max_limit)); diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 5cc6a1e025c54b755a61d3e0c5331d0f38aa5450..3aa5acecbd82755dcdfb5aa007e076e3cb950e84 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -265,7 +265,7 @@ class BatchToSpaceNDOp : public BatchToSpaceOpBase { public: explicit BatchToSpaceNDOp(OpConstructContext *context) : BatchToSpaceOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::BatchToSpaceKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index 59579fa518bd613700251ee74b2265025337d58d..9190cf95859ea8dd4a0b27d93e70aad8cf47825f 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -101,7 +101,7 @@ class BiasAddOp : public Operation { data_format_(static_cast(Operation::GetOptionalArg( "data_format", NHWC))) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::BiasAddKernel); } else { diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index 04c6a88dc99c06ac9f401a1839205d349b32ff90..d4404c618d0a06c75892782fab7bcd48866e5ebc 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -84,7 +84,7 @@ class ChannelShuffleOp : public Operation { explicit ChannelShuffleOp(OpConstructContext *context) : Operation(context) { const int groups = Operation::GetOptionalArg("group", 1); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ChannelShuffleKernel(groups)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index eec11e0bb132055238d0dee95091d088729799bc..3fa5ef2c5097e9c2a38f68fac1707a46bb440777 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -196,7 +196,7 @@ class ConcatOp : public ConcatOpBase { public: explicit ConcatOp(OpConstructContext *context) : ConcatOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ConcatKernel(axis_)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index a5cbec7411aaa47f82717e50a71ee1cf3d4d87e6..0a0d3bb51b7e412db06712e401e5268c53bf10b7 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -963,7 +963,7 @@ class Conv2dOp : public ConvPool2dOpBase { relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), wino_block_size_(Operation::GetOptionalArg("wino_block_size", 0)) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::Conv2dKernel); } else { @@ -974,7 +974,7 @@ class Conv2dOp : public ConvPool2dOpBase { // Transform filter tensor to target format if ((wino_block_size_ == 2 || wino_block_size_ == 4) && (kernel_->CheckUseWinograd( - context->device()->opencl_runtime(), + context->device()->gpu_runtime()->opencl_runtime(), context->workspace()->GetTensor( operator_def_->input(1))->shape(), std::vector(operator_def_->output_shape(0).dims().begin(), diff --git a/mace/ops/crop.cc b/mace/ops/crop.cc index b056f21c189f862da14481bec3111edf5af8687c..7b705069f14b76fb785907116939144cf9897d18 100644 --- a/mace/ops/crop.cc +++ b/mace/ops/crop.cc @@ -113,7 +113,7 @@ class CropOp : public Operation { explicit CropOp(OpConstructContext *context) : Operation(context) { const int axis = Operation::GetOptionalArg("axis", 2); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::CropKernel( axis, Operation::GetRepeatedArgs("offset"))); } else { diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc index 5697c8413544742ad1517154c84511f9031cbabb..575e81addca1333b7481b604d3aaff9ef660719b 100644 --- a/mace/ops/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -360,7 +360,7 @@ class Deconv2dOp : public Deconv2dOpBase { explicit Deconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context) { MemoryType mem_type = MemoryType::GPU_IMAGE; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::Deconv2dKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc index e18cc106f4fba10c4f054cd7d8c219b0ef032118..ee06075a9766bf362051cd202dce75c0014ca5a5 100644 --- a/mace/ops/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -96,7 +96,7 @@ class DepthToSpaceOp : public Operation { explicit DepthToSpaceOp(OpConstructContext *context) : Operation(context) { int block_size = Operation::GetOptionalArg("block_size", 1); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::DepthToSpaceKernel(block_size)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 8a85ab464ca0911b95a3ea4f039e1c61eb60da17..2f849ef7f1252087673427be30f24c40da60c58b 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -492,7 +492,7 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { explicit DepthwiseConv2dOp(OpConstructContext *context) : DepthwiseConv2dOpBase(context) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::DepthwiseConv2dKernel); } else { diff --git a/mace/ops/depthwise_deconv2d.cc b/mace/ops/depthwise_deconv2d.cc index 3f10a514cec8712b583b1f0fcae2166fe747da46..a4e7148e6159bc5129f84b8dc68d9aa45b46e3fa 100644 --- a/mace/ops/depthwise_deconv2d.cc +++ b/mace/ops/depthwise_deconv2d.cc @@ -410,7 +410,7 @@ class DepthwiseDeconv2dOp : public Deconv2dOpBase { explicit DepthwiseDeconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context) { MemoryType mem_type = MemoryType::GPU_IMAGE; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::DepthwiseDeconv2dKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index 863b69edc2033e54866f5935b097d4f93c968395..1a2e09081fd6f3fd302aad96d113417a7d65bcba 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -1088,7 +1088,7 @@ class EltwiseOp : public Operation { int32_t scalar_input_index = Operation::GetOptionalArg( "scalar_input_index", 1); MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::EltwiseKernel( type, coeff, scalar_input, scalar_input_index)); diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index ef919d9292bab8b2474a40ab30053b587bd79d96..31b1fb058dcae66f9a64c8cd04d8a7cb5dcdd2a1 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -194,7 +194,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { explicit FullyConnectedOp(OpConstructContext *context) : FullyConnectedOpBase(context) { MemoryType mem_type; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_.reset(new opencl::image::FullyConnectedKernel); } else { diff --git a/mace/ops/lstm_cell.cc b/mace/ops/lstm_cell.cc index dfbfa155a31377dbbbd20cbd7d6c6ebe5df48838..bc34b969bb9b7bebf17f10552cc9a55751fdaae2 100644 --- a/mace/ops/lstm_cell.cc +++ b/mace/ops/lstm_cell.cc @@ -34,7 +34,7 @@ class LSTMCellOp : public Operation { Operation::GetOptionalArg("scalar_input", 0.0)); MemoryType mem_type = MemoryType::GPU_IMAGE; - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::LSTMCellKernel(forget_bias)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/opencl/buffer/buffer_transform.cc b/mace/ops/opencl/buffer/buffer_transform.cc index 9ba3f81d1e7b59bd1c7b0b015616da1cec775ac7..5bfc53899e321964c68d812290ccfee3a9ff9b3f 100644 --- a/mace/ops/opencl/buffer/buffer_transform.cc +++ b/mace/ops/opencl/buffer/buffer_transform.cc @@ -47,7 +47,7 @@ MaceStatus TransformConv2DFilter( MACE_RETURN_IF_ERROR(output->Resize(transformed_shape)); output->Reshape(input->shape()); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { std::set built_options; @@ -116,7 +116,7 @@ MaceStatus TransformDWConv2DFilter( MACE_RETURN_IF_ERROR(output->Resize(transformed_shape)); output->Reshape(input->shape()); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { std::set built_options; @@ -173,7 +173,7 @@ MaceStatus TransformArgument( MACE_RETURN_IF_ERROR(output->Resize(transformed_shape)); output->Reshape(input->shape()); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { std::set built_options; diff --git a/mace/ops/opencl/buffer/buffer_type_transform.cc b/mace/ops/opencl/buffer/buffer_type_transform.cc index ce405e9f3da2865c4a2547389f15cdb9434f6996..757283792f016cf58ec2beb711a107ad11c0172c 100644 --- a/mace/ops/opencl/buffer/buffer_type_transform.cc +++ b/mace/ops/opencl/buffer/buffer_type_transform.cc @@ -31,7 +31,7 @@ MaceStatus BufferTypeTransform( Tensor *output) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION const uint32_t gws = diff --git a/mace/ops/opencl/buffer/conv_2d_1x1.cc b/mace/ops/opencl/buffer/conv_2d_1x1.cc index 62e77b17b3fe8b0d80d0d5b8665c17b0fa8ca728..abe7d93be2a24a513f231cfdd36c71107004d33b 100644 --- a/mace/ops/opencl/buffer/conv_2d_1x1.cc +++ b/mace/ops/opencl/buffer/conv_2d_1x1.cc @@ -43,7 +43,7 @@ MaceStatus Conv2d1x1(OpContext *context, const index_t in_height = padded_input->dim(1); const index_t in_width = padded_input->dim(2); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/buffer/conv_2d_general.cc b/mace/ops/opencl/buffer/conv_2d_general.cc index f9cc804d7ca04529eabe16aea9d4f8a453289640..e8ac509ccfe957717c6e206cdff1837211595326 100644 --- a/mace/ops/opencl/buffer/conv_2d_general.cc +++ b/mace/ops/opencl/buffer/conv_2d_general.cc @@ -48,7 +48,7 @@ MaceStatus Conv2dGeneral(OpContext *context, const index_t filter_height = filter->dim(2); const index_t filter_width = filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/buffer/depthwise_conv2d.cc b/mace/ops/opencl/buffer/depthwise_conv2d.cc index 0ba4526c0199c3262e95a0aace503f0977157e97..d2c335999c6680f8b4ee2c01a28d4c1ca049f87e 100644 --- a/mace/ops/opencl/buffer/depthwise_conv2d.cc +++ b/mace/ops/opencl/buffer/depthwise_conv2d.cc @@ -48,7 +48,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, const index_t filter_height = filter->dim(2); const index_t filter_width = filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/buffer/pooling.h b/mace/ops/opencl/buffer/pooling.h index 4684d687874fdc37d0227b222201a59e29425e79..de7d76108fd40e65cb745aa4172adcc993cc6302 100644 --- a/mace/ops/opencl/buffer/pooling.h +++ b/mace/ops/opencl/buffer/pooling.h @@ -92,7 +92,7 @@ MaceStatus PoolingKernel::Compute( bool input_changed = !IsVecEqual(input_shape_, input->shape()); input_shape_ = input->shape(); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); // pad input std::vector padded_input_shape = input->shape(); diff --git a/mace/ops/opencl/buffer/softmax.h b/mace/ops/opencl/buffer/softmax.h index 3147a935b8116fbbb1daa1e6cb5433df552087f8..248ee0c85c1ddf3c45a52e74c966c87a372528d7 100644 --- a/mace/ops/opencl/buffer/softmax.h +++ b/mace/ops/opencl/buffer/softmax.h @@ -75,7 +75,7 @@ MaceStatus SoftmaxKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/buffer/utils.cc b/mace/ops/opencl/buffer/utils.cc index b4214a0af02f4967374363f88fe54854e80055a8..141a96b748bc5430aff17396bc0661f737a6df40 100644 --- a/mace/ops/opencl/buffer/utils.cc +++ b/mace/ops/opencl/buffer/utils.cc @@ -47,7 +47,7 @@ MaceStatus PadInput(OpContext *context, static_cast(padded_height * batch) }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/activation.h b/mace/ops/opencl/image/activation.h index 93944b5b7810ef6048b623f898fc6c8f69609272..80713c36977b495ae857f0af75c031a424c933ea 100644 --- a/mace/ops/opencl/image/activation.h +++ b/mace/ops/opencl/image/activation.h @@ -66,7 +66,7 @@ MaceStatus ActivationKernel::Compute( const index_t channel_blocks = RoundUpDiv4(channels); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/addn.h b/mace/ops/opencl/image/addn.h index 7692ac06b8e281295381b7ecf77d446784988859..48f6d8f840dc2457ec886842b824c136d1616b1a 100644 --- a/mace/ops/opencl/image/addn.h +++ b/mace/ops/opencl/image/addn.h @@ -57,7 +57,7 @@ MaceStatus AddNKernel::Compute( const index_t width = input_tensors[0]->dim(2); const index_t channels = input_tensors[0]->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; for (size_t i = 1; i < size; ++i) { diff --git a/mace/ops/opencl/image/batch_norm.h b/mace/ops/opencl/image/batch_norm.h index 5685c5145814f0428b3fade69a4672349cd19250..689088300e61ab3561eac831147ac0c4d61c9bf2 100644 --- a/mace/ops/opencl/image/batch_norm.h +++ b/mace/ops/opencl/image/batch_norm.h @@ -85,7 +85,7 @@ MaceStatus BatchNormKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/batch_to_space.h b/mace/ops/opencl/image/batch_to_space.h index 9d91802627c840538b70d5a4f994d3ca572e8504..35281c7072e96fe05257f143b6624058e764bdc9 100644 --- a/mace/ops/opencl/image/batch_to_space.h +++ b/mace/ops/opencl/image/batch_to_space.h @@ -68,7 +68,7 @@ MaceStatus BatchToSpaceKernel::Compute( chan_blk, static_cast(batch_tensor->dim(2)), static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/bias_add.h b/mace/ops/opencl/image/bias_add.h index 25e2392edc055af6d5630b371e665160eb18b147..a37ee2b18b9c1c1344e21bdf5096d5e18b304fea 100644 --- a/mace/ops/opencl/image/bias_add.h +++ b/mace/ops/opencl/image/bias_add.h @@ -62,7 +62,7 @@ MaceStatus BiasAddKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/buffer_to_image.h b/mace/ops/opencl/image/buffer_to_image.h index 14a0ae4b3e474eb464580701446346248f5d1982..6ff3284ea69d8ef1be1d7e9f6c62d62ca8fa8527 100644 --- a/mace/ops/opencl/image/buffer_to_image.h +++ b/mace/ops/opencl/image/buffer_to_image.h @@ -98,7 +98,7 @@ MaceStatus BufferToImage::Compute( } } - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/channel_shuffle.h b/mace/ops/opencl/image/channel_shuffle.h index 53acbf15cdef206bb43b8dac9eb2a7d1b7c1b1ce..f890c0c3309988cad9acc380560c3358f736e775 100644 --- a/mace/ops/opencl/image/channel_shuffle.h +++ b/mace/ops/opencl/image/channel_shuffle.h @@ -70,7 +70,7 @@ MaceStatus ChannelShuffleKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/concat.cc b/mace/ops/opencl/image/concat.cc index aab72c5445709049e78543c1b4246f0eec6f2724..5dfe666eb42fa63be298355f3e0428cfb3f05235 100644 --- a/mace/ops/opencl/image/concat.cc +++ b/mace/ops/opencl/image/concat.cc @@ -65,7 +65,7 @@ MaceStatus Concat2(OpContext *context, static_cast(batch * height), }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { @@ -126,7 +126,7 @@ MaceStatus ConcatN(OpContext *context, const index_t height = output->dim(1); const index_t width = output->dim(2); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc index f88882ee645814f81d13bef5cd80ef9ebcb5092f..57be075076ffa2cf077049b076475db1d1c67454 100644 --- a/mace/ops/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -95,7 +95,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context, const index_t width_blocks = RoundUpDiv4(width); const index_t input_channel_blocks = RoundUpDiv4(input_channels); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc index 3e5aee909c89bbed8e94488c5d38d8be3f93615d..f7905a0c02bc14346cef2cca990d23f2a67d30c1 100644 --- a/mace/ops/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -83,7 +83,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context, const index_t input_channel_blocks = RoundUpDiv4(input_channels); const index_t width_blocks = RoundUpDiv(width); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc index 120a3daa3067d91118c101e8b95798f7bde84a1d..28bdea6c7f7ffa41d07ccf734c9029251d32cf82 100644 --- a/mace/ops/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -91,7 +91,7 @@ extern MaceStatus Conv2d(OpContext *context, const index_t input_channel_blocks = RoundUpDiv4(input_channels); const index_t width_blocks = RoundUpDiv4(width); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/crop.h b/mace/ops/opencl/image/crop.h index c8f98a4ca7a2f2cdf8ba96135444e31e25ed1867..a83349c49ebe16c9ade1356b73a751bfca100d26 100644 --- a/mace/ops/opencl/image/crop.h +++ b/mace/ops/opencl/image/crop.h @@ -141,7 +141,7 @@ MaceStatus CropKernel::Compute( static_cast(output->dim(0) * output->dim(1)) }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/deconv_2d.h b/mace/ops/opencl/image/deconv_2d.h index f3d6cbe92049380634540ae94419b96a2a1444e1..a8dd9c26c485b38d6852c06c79f9c11b962d3b77 100644 --- a/mace/ops/opencl/image/deconv_2d.h +++ b/mace/ops/opencl/image/deconv_2d.h @@ -92,7 +92,7 @@ MaceStatus Deconv2dKernel::Compute( const int align_w = stride_w - 1 - padding_w; const int kernel_size = filter->dim(2) * filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/depth_to_space.h b/mace/ops/opencl/image/depth_to_space.h index 77c4bd53dfc661fd23381d9e8ebac3cf33c15017..1783b81316c13fc7d4eec5aa9004c488e6ab707b 100644 --- a/mace/ops/opencl/image/depth_to_space.h +++ b/mace/ops/opencl/image/depth_to_space.h @@ -87,7 +87,7 @@ MaceStatus DepthToSpaceKernel::Compute( static_cast(output_width), static_cast(output_height * batch) }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/depthwise_conv2d.cc b/mace/ops/opencl/image/depthwise_conv2d.cc index 02409ebeda304dabc78f98c45688b9c4ce4a64de..57a4415e0ff3726dc20ce92024d16cdb48504e5b 100644 --- a/mace/ops/opencl/image/depthwise_conv2d.cc +++ b/mace/ops/opencl/image/depthwise_conv2d.cc @@ -93,7 +93,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, static_cast(width_blocks), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel->get() == nullptr) { diff --git a/mace/ops/opencl/image/depthwise_deconv2d.h b/mace/ops/opencl/image/depthwise_deconv2d.h index 96fdfa51e110395f3028003f3058a029765519f5..d07a164955dcd4e2c54efac3f9fe9b9039d01f90 100644 --- a/mace/ops/opencl/image/depthwise_deconv2d.h +++ b/mace/ops/opencl/image/depthwise_deconv2d.h @@ -98,7 +98,7 @@ MaceStatus DepthwiseDeconv2dKernel::Compute( const int align_w = stride_w - 1 - padding_w; const int kernel_size = filter->dim(2) * filter->dim(3); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/eltwise.h b/mace/ops/opencl/image/eltwise.h index 2afb334233731307582d83ea77d2ec1ad77ce661..9600d501fdb579e3b0e0075c8f13ad28a7ce8705 100644 --- a/mace/ops/opencl/image/eltwise.h +++ b/mace/ops/opencl/image/eltwise.h @@ -117,7 +117,7 @@ MaceStatus EltwiseKernel::Compute( static_cast(width), static_cast(batch_height_pixels)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { std::set built_options; diff --git a/mace/ops/opencl/image/fully_connected.h b/mace/ops/opencl/image/fully_connected.h index 962ffaf082ca93e1f6129fa2f5d123c0e3454603..d52e927fffd8b89c84cb0952864b6f53addc1b62 100644 --- a/mace/ops/opencl/image/fully_connected.h +++ b/mace/ops/opencl/image/fully_connected.h @@ -64,7 +64,7 @@ MaceStatus FullyConnectedKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/image_to_buffer.h b/mace/ops/opencl/image/image_to_buffer.h index 6ca73fa6af9b8a39c43d6586d9167ca8655d6ffa..f9c3b011d120d697e856fe4b997e73eed63d607c 100644 --- a/mace/ops/opencl/image/image_to_buffer.h +++ b/mace/ops/opencl/image/image_to_buffer.h @@ -92,7 +92,7 @@ MaceStatus ImageToBuffer::Compute(OpContext *context, break; } - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/lstm_cell.h b/mace/ops/opencl/image/lstm_cell.h index 546b4a792de1c892a3fd9d6c0e11f255b9cb7501..265f2e10f9f536db9a692bf15d966153c05949e6 100644 --- a/mace/ops/opencl/image/lstm_cell.h +++ b/mace/ops/opencl/image/lstm_cell.h @@ -71,7 +71,7 @@ MaceStatus LSTMCellKernel::Compute( const index_t hidden_units = pre_output->dim(1); const index_t w_blocks = hidden_units >> 2; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/matmul.h b/mace/ops/opencl/image/matmul.h index 763082f610f5b4a115a76fc55be08c459a278d14..1681a8f8d98843c54b62aec24637924a80be9e1d 100644 --- a/mace/ops/opencl/image/matmul.h +++ b/mace/ops/opencl/image/matmul.h @@ -82,7 +82,7 @@ MaceStatus MatMulKernel::Compute( static_cast(height_blocks * batch), }; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/pad.h b/mace/ops/opencl/image/pad.h index cb0c390b667a46329ab4f9728caeea10f1eea0c7..8d1cae3e6fe3bd2830b9742c6abb0e9b4c0371df 100644 --- a/mace/ops/opencl/image/pad.h +++ b/mace/ops/opencl/image/pad.h @@ -80,7 +80,7 @@ MaceStatus PadKernel::Compute( const index_t channel_blocks = RoundUpDiv4(channels); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/pooling.h b/mace/ops/opencl/image/pooling.h index f246efa426618e9c197f30d253e23338bd11f73d..1af677403bfa3160aedf8266bc24cf45baf04b37 100644 --- a/mace/ops/opencl/image/pooling.h +++ b/mace/ops/opencl/image/pooling.h @@ -112,7 +112,7 @@ MaceStatus PoolingKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/reduce_mean.h b/mace/ops/opencl/image/reduce_mean.h index 95b51d86f883338fd0e4e57952edfd5965f85a61..3280691c9d303d08048bcb23bce2ab040c72b9e7 100644 --- a/mace/ops/opencl/image/reduce_mean.h +++ b/mace/ops/opencl/image/reduce_mean.h @@ -76,7 +76,7 @@ MaceStatus ReduceMeanKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/resize_bicubic.h b/mace/ops/opencl/image/resize_bicubic.h index bf5bfcf1921254c3939f77a5f3dc7711ea780289..bf72ee78c9edf67a6de31a0e2c9bae9f3ab35ceb 100644 --- a/mace/ops/opencl/image/resize_bicubic.h +++ b/mace/ops/opencl/image/resize_bicubic.h @@ -102,7 +102,7 @@ MaceStatus ResizeBicubicKernel::Compute( static_cast(out_width), static_cast(out_height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/resize_bilinear.h b/mace/ops/opencl/image/resize_bilinear.h index b3f1b09c6ee08f356f328e9e729c573abd5bb4e4..1eb599c98f27b7adad019540154c1b8bb5c59296 100644 --- a/mace/ops/opencl/image/resize_bilinear.h +++ b/mace/ops/opencl/image/resize_bilinear.h @@ -107,7 +107,7 @@ MaceStatus ResizeBilinearKernel::Compute( static_cast(out_width), static_cast(out_height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/softmax.h b/mace/ops/opencl/image/softmax.h index ffd5ec89a60e90aa57a4192d30022b7cd7586d8d..a19d9483719fee79fdcb3aad9a15191ffb7441a1 100644 --- a/mace/ops/opencl/image/softmax.h +++ b/mace/ops/opencl/image/softmax.h @@ -102,7 +102,7 @@ MaceStatus SoftmaxKernel::Compute( static_cast(width), static_cast(height * batch)}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/space_to_batch.h b/mace/ops/opencl/image/space_to_batch.h index f2baaba48259da64f2f8ed18620da37edd154245..c2190c681d59d7fbf72cc5d4fa821a71a914796d 100644 --- a/mace/ops/opencl/image/space_to_batch.h +++ b/mace/ops/opencl/image/space_to_batch.h @@ -66,7 +66,7 @@ MaceStatus SpaceToBatchKernel::Compute( chan_blk, static_cast(batch_tensor->dim(2)), static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/space_to_depth.h b/mace/ops/opencl/image/space_to_depth.h index e225b37693377acf57f2d91b17cc3269bc8a20a3..1df75ef831f563835317d51241456c2941c55af4 100644 --- a/mace/ops/opencl/image/space_to_depth.h +++ b/mace/ops/opencl/image/space_to_depth.h @@ -79,7 +79,7 @@ MaceStatus SpaceToDepthKernel::Compute( &image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/split.h b/mace/ops/opencl/image/split.h index 7b7f790597f4daba916a0ab2cc1d103fdf11df26..d0427a4f16ce5b18d37c09ce274e9d1fd621661e 100644 --- a/mace/ops/opencl/image/split.h +++ b/mace/ops/opencl/image/split.h @@ -70,7 +70,7 @@ MaceStatus SplitKernel::Compute( output_list[i]->ResizeImage(output_shape, image_shape)); } - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/sqrdiff_mean.h b/mace/ops/opencl/image/sqrdiff_mean.h index d0c217fe450018d038e2d617fe4bdf5e6c4ba5de..ba84a5ef04fabb85f2943db96fe9a044f796d9a1 100644 --- a/mace/ops/opencl/image/sqrdiff_mean.h +++ b/mace/ops/opencl/image/sqrdiff_mean.h @@ -72,7 +72,7 @@ MaceStatus SqrDiffMeanKernel::Compute( &output_image_shape); MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); MACE_OUT_OF_RANGE_DEFINITION; if (kernel_.get() == nullptr) { diff --git a/mace/ops/opencl/image/winograd_conv2d.cc b/mace/ops/opencl/image/winograd_conv2d.cc index a9bd717155b7cfed5f5a6cac32a64d57fad63545..8d684e59f90d3ced55f632414ad8890b7764452b 100644 --- a/mace/ops/opencl/image/winograd_conv2d.cc +++ b/mace/ops/opencl/image/winograd_conv2d.cc @@ -37,7 +37,7 @@ MaceStatus WinogradInputTransform(OpContext *context, Tensor *output_tensor, uint32_t *kwg_size, StatsFuture *future) { - OpenCLRuntime *runtime = context->device()->opencl_runtime(); + OpenCLRuntime *runtime = context->device()->gpu_runtime()->opencl_runtime(); const index_t out_width = output_tensor->dim(2); MACE_OUT_OF_RANGE_DEFINITION; @@ -119,7 +119,7 @@ MaceStatus WinogradOutputTransform(OpContext *context, Tensor *output_tensor, uint32_t *kwg_size, StatsFuture *future) { - OpenCLRuntime *runtime = context->device()->opencl_runtime(); + OpenCLRuntime *runtime = context->device()->gpu_runtime()->opencl_runtime(); auto &output_shape = output_tensor->shape(); MACE_OUT_OF_RANGE_DEFINITION; @@ -227,8 +227,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size[3]) { - OpenCLRuntime *runtime = context->device()->opencl_runtime(); - ScratchImageManager *scratch_manager = runtime->scratch_image_manager(); + OpenCLRuntime *runtime = context->device()->gpu_runtime()->opencl_runtime(); + ScratchImageManager *scratch_manager = + context->device()->gpu_runtime()->scratch_image_manager(); StatsFuture t_input_future, mm_future, t_output_future; bool input_changed = !IsVecEqual(*prev_input_shape, input->shape()); *prev_input_shape = input->shape(); diff --git a/mace/ops/opencl/out_of_range_check_test.cc b/mace/ops/opencl/out_of_range_check_test.cc index eb2236931b08561715ef08e3e3194084261004d8..093e0fb47e0440cdaae8531ae2875bf5c1295763 100644 --- a/mace/ops/opencl/out_of_range_check_test.cc +++ b/mace/ops/opencl/out_of_range_check_test.cc @@ -35,7 +35,7 @@ MaceStatus BufferToImageOpImpl(OpContext *context, uint32_t gws[2] = {static_cast(image_shape[0]), static_cast(image_shape[1])}; - auto runtime = context->device()->opencl_runtime(); + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); std::string kernel_name = "in_out_buffer_to_image"; std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL(kernel_name); diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index 6b08761e34eec22992db490c21740865bdfe3660..5233ccde1e6a7043f864e558045b823bd63c9507 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -206,7 +206,7 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types(); for (auto type : opencl_mem_types) { OpTestContext::Get()->GetDevice(device) - ->opencl_runtime()->set_mem_type(type); + ->gpu_runtime()->set_mem_type(type); Setup(device); MACE_RETURN_IF_ERROR(Run()); } @@ -242,8 +242,8 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def, void OpsTestNet::Sync() { #ifdef MACE_ENABLE_OPENCL if (net_ && device_type_ == DeviceType::GPU) { - OpTestContext::Get()->GetDevice(DeviceType::GPU)->opencl_runtime() - ->command_queue().finish(); + OpTestContext::Get()->GetDevice(DeviceType::GPU)->gpu_runtime() + ->opencl_runtime()->command_queue().finish(); } #endif } diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc index cb7979063097a07be88337b5b14db63a7ffe99f4..aa18b7c1c519f5ce2b27967647ddc900199a01f2 100644 --- a/mace/ops/pad.cc +++ b/mace/ops/pad.cc @@ -97,7 +97,7 @@ class PadOp : public Operation { std::vector paddings = Operation::GetRepeatedArgs("paddings"); float constant_value = Operation::GetOptionalArg( "constant_value", 0.0); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::PadKernel(paddings, constant_value)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index b2aef666266dfcd77b06047eab7891fd6cb82cef..50372c3cf1f1603d80eec28bce0d701535b9467d 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -429,7 +429,7 @@ class PoolingOp : public PoolingOpBase { public: explicit PoolingOp(OpConstructContext *context) : PoolingOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::PoolingKernel); } else { context->set_output_mem_type(MemoryType::GPU_BUFFER); diff --git a/mace/ops/reduce_mean.cc b/mace/ops/reduce_mean.cc index 20f7e81c8b54165388de9f5fd2f359c4d42d1862..863103b28fc607aa4003840ee72aefa88b917312 100644 --- a/mace/ops/reduce_mean.cc +++ b/mace/ops/reduce_mean.cc @@ -246,7 +246,7 @@ class ReduceMeanOp : public ReduceMeanOpBase { public: explicit ReduceMeanOp(OpConstructContext *context) : ReduceMeanOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ReduceMeanKernel(axis_, keep_dims_)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/resize_bicubic.cc b/mace/ops/resize_bicubic.cc index 403300607cfcb929169a18946eff79085d6c534c..3ccff3e6010fd931afcaf1775e5bc21f88836520 100644 --- a/mace/ops/resize_bicubic.cc +++ b/mace/ops/resize_bicubic.cc @@ -195,7 +195,7 @@ class ResizeBicubicOp : public Operation { std::vector size = Operation::GetRepeatedArgs( "size", {-1, -1}); MACE_CHECK(size.size() == 2); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ResizeBicubicKernel(align_corners, size[0], size[1])); diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 5ce6ef4a44a4bdb2f9d3b11057e9b317867d62d5..748c2efd13b4271d7f19964a987dcc0e28b9cc6f 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -331,7 +331,7 @@ class ResizeBilinearOp : public Operation { std::vector size = Operation::GetRepeatedArgs( "size", {-1, -1}); MACE_CHECK(size.size() == 2); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::ResizeBilinearKernel(align_corners, size[0], size[1])); diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index 4a7505ae79bcbc211ae9fa17f65a4f941b8988a2..2518b407301952c4fa1edf16f0a5a128b427a538 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -364,7 +364,7 @@ class SoftmaxOp : public Operation { public: explicit SoftmaxOp(OpConstructContext *context) : Operation(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SoftmaxKernel); } else { context->set_output_mem_type(MemoryType::GPU_BUFFER); diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 7d422938c77516f3e11ef3cf5e9f8b7bc7c5db15..7fa9081dfc3bb59b8ab975f54588b15299c32e49 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -308,7 +308,7 @@ class SpaceToBatchNDOp : public SpaceToBatchOpBase { public: explicit SpaceToBatchNDOp(OpConstructContext *context) : SpaceToBatchOpBase(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SpaceToBatchKernel); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc index 11e5ade322e73fdeeee939f40f6b19243d3afe50..39e603ae615a0c66e0f11174d9c09200b234f003 100644 --- a/mace/ops/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -94,7 +94,7 @@ class SpaceToDepthOp : public Operation { explicit SpaceToDepthOp(OpConstructContext *context) : Operation(context) { int block_size = Operation::GetOptionalArg("block_size", 1); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SpaceToDepthKernel(block_size)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/split.cc b/mace/ops/split.cc index 2e09663178c45495b670b75a72ac7a013f478dc0..0f9dcc04bdb9c5b229f08c2b59f3e9551020f7a6 100644 --- a/mace/ops/split.cc +++ b/mace/ops/split.cc @@ -105,7 +105,7 @@ class SplitOp : public Operation { explicit SplitOp(OpConstructContext *context) : Operation(context) { int32_t axis = Operation::GetOptionalArg("axis", 3); - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SplitKernel(axis)); } else { MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/sqrdiff_mean.cc b/mace/ops/sqrdiff_mean.cc index 7927da3b9a321d417386e2c76c8494e45a3417f2..b469a3e3b83b8d85241e0508b69b1a16fe62fe44 100644 --- a/mace/ops/sqrdiff_mean.cc +++ b/mace/ops/sqrdiff_mean.cc @@ -82,7 +82,7 @@ class SqrDiffMeanOp : public Operation { public: explicit SqrDiffMeanOp(OpConstructContext *context) : Operation(context) { - if (context->device()->opencl_runtime()->UseImageMemory()) { + if (context->device()->gpu_runtime()->UseImageMemory()) { kernel_.reset(new opencl::image::SqrDiffMeanKernel()); } else { MACE_NOT_IMPLEMENTED;