Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
66cf184f
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
66cf184f
编写于
12月 10, 2018
作者:
叶
叶剑武
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'scratch-image-bug' into 'master'
Bug: Replace OPENCLRuntime with GPURuntime in GPUDevice. See merge request !904
上级
b78cc685
3ec40342
变更
78
隐藏空白更改
内联
并排
Showing
78 changed file
with
186 addition
and
112 deletion
+186
-112
mace/core/device.cc
mace/core/device.cc
+2
-2
mace/core/device.h
mace/core/device.h
+3
-3
mace/core/runtime/opencl/gpu_device.cc
mace/core/runtime/opencl/gpu_device.cc
+4
-3
mace/core/runtime/opencl/gpu_device.h
mace/core/runtime/opencl/gpu_device.h
+3
-1
mace/core/runtime/opencl/gpu_runtime.cc
mace/core/runtime/opencl/gpu_runtime.cc
+45
-0
mace/core/runtime/opencl/gpu_runtime.h
mace/core/runtime/opencl/gpu_runtime.h
+45
-0
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+1
-15
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+0
-6
mace/core/workspace.cc
mace/core/workspace.cc
+1
-1
mace/libmace/mace.cc
mace/libmace/mace.cc
+4
-4
mace/ops/activation.cc
mace/ops/activation.cc
+1
-1
mace/ops/addn.cc
mace/ops/addn.cc
+1
-1
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+1
-1
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+1
-1
mace/ops/bias_add.cc
mace/ops/bias_add.cc
+1
-1
mace/ops/channel_shuffle.cc
mace/ops/channel_shuffle.cc
+1
-1
mace/ops/concat.cc
mace/ops/concat.cc
+1
-1
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+2
-2
mace/ops/crop.cc
mace/ops/crop.cc
+1
-1
mace/ops/deconv_2d.cc
mace/ops/deconv_2d.cc
+1
-1
mace/ops/depth_to_space.cc
mace/ops/depth_to_space.cc
+1
-1
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+1
-1
mace/ops/depthwise_deconv2d.cc
mace/ops/depthwise_deconv2d.cc
+1
-1
mace/ops/eltwise.cc
mace/ops/eltwise.cc
+1
-1
mace/ops/fully_connected.cc
mace/ops/fully_connected.cc
+1
-1
mace/ops/lstm_cell.cc
mace/ops/lstm_cell.cc
+1
-1
mace/ops/opencl/buffer/buffer_transform.cc
mace/ops/opencl/buffer/buffer_transform.cc
+3
-3
mace/ops/opencl/buffer/buffer_type_transform.cc
mace/ops/opencl/buffer/buffer_type_transform.cc
+1
-1
mace/ops/opencl/buffer/conv_2d_1x1.cc
mace/ops/opencl/buffer/conv_2d_1x1.cc
+1
-1
mace/ops/opencl/buffer/conv_2d_general.cc
mace/ops/opencl/buffer/conv_2d_general.cc
+1
-1
mace/ops/opencl/buffer/depthwise_conv2d.cc
mace/ops/opencl/buffer/depthwise_conv2d.cc
+1
-1
mace/ops/opencl/buffer/pooling.h
mace/ops/opencl/buffer/pooling.h
+1
-1
mace/ops/opencl/buffer/softmax.h
mace/ops/opencl/buffer/softmax.h
+1
-1
mace/ops/opencl/buffer/utils.cc
mace/ops/opencl/buffer/utils.cc
+1
-1
mace/ops/opencl/image/activation.h
mace/ops/opencl/image/activation.h
+1
-1
mace/ops/opencl/image/addn.h
mace/ops/opencl/image/addn.h
+1
-1
mace/ops/opencl/image/batch_norm.h
mace/ops/opencl/image/batch_norm.h
+1
-1
mace/ops/opencl/image/batch_to_space.h
mace/ops/opencl/image/batch_to_space.h
+1
-1
mace/ops/opencl/image/bias_add.h
mace/ops/opencl/image/bias_add.h
+1
-1
mace/ops/opencl/image/buffer_to_image.h
mace/ops/opencl/image/buffer_to_image.h
+1
-1
mace/ops/opencl/image/channel_shuffle.h
mace/ops/opencl/image/channel_shuffle.h
+1
-1
mace/ops/opencl/image/concat.cc
mace/ops/opencl/image/concat.cc
+2
-2
mace/ops/opencl/image/conv_2d_1x1.cc
mace/ops/opencl/image/conv_2d_1x1.cc
+1
-1
mace/ops/opencl/image/conv_2d_3x3.cc
mace/ops/opencl/image/conv_2d_3x3.cc
+1
-1
mace/ops/opencl/image/conv_2d_general.cc
mace/ops/opencl/image/conv_2d_general.cc
+1
-1
mace/ops/opencl/image/crop.h
mace/ops/opencl/image/crop.h
+1
-1
mace/ops/opencl/image/deconv_2d.h
mace/ops/opencl/image/deconv_2d.h
+1
-1
mace/ops/opencl/image/depth_to_space.h
mace/ops/opencl/image/depth_to_space.h
+1
-1
mace/ops/opencl/image/depthwise_conv2d.cc
mace/ops/opencl/image/depthwise_conv2d.cc
+1
-1
mace/ops/opencl/image/depthwise_deconv2d.h
mace/ops/opencl/image/depthwise_deconv2d.h
+1
-1
mace/ops/opencl/image/eltwise.h
mace/ops/opencl/image/eltwise.h
+1
-1
mace/ops/opencl/image/fully_connected.h
mace/ops/opencl/image/fully_connected.h
+1
-1
mace/ops/opencl/image/image_to_buffer.h
mace/ops/opencl/image/image_to_buffer.h
+1
-1
mace/ops/opencl/image/lstm_cell.h
mace/ops/opencl/image/lstm_cell.h
+1
-1
mace/ops/opencl/image/matmul.h
mace/ops/opencl/image/matmul.h
+1
-1
mace/ops/opencl/image/pad.h
mace/ops/opencl/image/pad.h
+1
-1
mace/ops/opencl/image/pooling.h
mace/ops/opencl/image/pooling.h
+1
-1
mace/ops/opencl/image/reduce_mean.h
mace/ops/opencl/image/reduce_mean.h
+1
-1
mace/ops/opencl/image/resize_bicubic.h
mace/ops/opencl/image/resize_bicubic.h
+1
-1
mace/ops/opencl/image/resize_bilinear.h
mace/ops/opencl/image/resize_bilinear.h
+1
-1
mace/ops/opencl/image/softmax.h
mace/ops/opencl/image/softmax.h
+1
-1
mace/ops/opencl/image/space_to_batch.h
mace/ops/opencl/image/space_to_batch.h
+1
-1
mace/ops/opencl/image/space_to_depth.h
mace/ops/opencl/image/space_to_depth.h
+1
-1
mace/ops/opencl/image/split.h
mace/ops/opencl/image/split.h
+1
-1
mace/ops/opencl/image/sqrdiff_mean.h
mace/ops/opencl/image/sqrdiff_mean.h
+1
-1
mace/ops/opencl/image/winograd_conv2d.cc
mace/ops/opencl/image/winograd_conv2d.cc
+5
-4
mace/ops/opencl/out_of_range_check_test.cc
mace/ops/opencl/out_of_range_check_test.cc
+1
-1
mace/ops/ops_test_util.cc
mace/ops/ops_test_util.cc
+3
-3
mace/ops/pad.cc
mace/ops/pad.cc
+1
-1
mace/ops/pooling.cc
mace/ops/pooling.cc
+1
-1
mace/ops/reduce_mean.cc
mace/ops/reduce_mean.cc
+1
-1
mace/ops/resize_bicubic.cc
mace/ops/resize_bicubic.cc
+1
-1
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+1
-1
mace/ops/softmax.cc
mace/ops/softmax.cc
+1
-1
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+1
-1
mace/ops/space_to_depth.cc
mace/ops/space_to_depth.cc
+1
-1
mace/ops/split.cc
mace/ops/split.cc
+1
-1
mace/ops/sqrdiff_mean.cc
mace/ops/sqrdiff_mean.cc
+1
-1
未找到文件。
mace/core/device.cc
浏览文件 @
66cf184f
...
...
@@ -33,8 +33,8 @@ CPURuntime *CPUDevice::cpu_runtime() {
}
#ifdef MACE_ENABLE_OPENCL
OpenCLRuntime
*
CPUDevice
::
opencl
_runtime
()
{
LOG
(
FATAL
)
<<
"CPU device should not call
OpenCL
Runtime"
;
GPURuntime
*
CPUDevice
::
gpu
_runtime
()
{
LOG
(
FATAL
)
<<
"CPU device should not call
GPU
Runtime"
;
return
nullptr
;
}
#endif
...
...
mace/core/device.h
浏览文件 @
66cf184f
...
...
@@ -21,7 +21,7 @@
#include "mace/core/allocator.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/
opencl
_runtime.h"
#include "mace/core/runtime/opencl/
gpu
_runtime.h"
#endif
namespace
mace
{
...
...
@@ -33,7 +33,7 @@ class Device {
virtual
~
Device
()
{}
#ifdef MACE_ENABLE_OPENCL
virtual
OpenCLRuntime
*
opencl
_runtime
()
=
0
;
virtual
GPURuntime
*
gpu
_runtime
()
=
0
;
#endif // MACE_ENABLE_OPENCL
virtual
CPURuntime
*
cpu_runtime
()
=
0
;
...
...
@@ -50,7 +50,7 @@ class CPUDevice : public Device {
virtual
~
CPUDevice
();
#ifdef MACE_ENABLE_OPENCL
OpenCLRuntime
*
opencl
_runtime
()
override
;
GPURuntime
*
gpu
_runtime
()
override
;
#endif
CPURuntime
*
cpu_runtime
()
override
;
...
...
mace/core/runtime/opencl/gpu_device.cc
浏览文件 @
66cf184f
...
...
@@ -30,12 +30,13 @@ GPUDevice::GPUDevice(std::shared_ptr<Tuner<uint32_t>> tuner,
runtime_
(
new
OpenCLRuntime
(
opencl_cache_storage
,
priority
,
perf
,
opencl_binary_storage
,
tuner
)),
allocator_
(
new
OpenCLAllocator
(
runtime_
.
get
())),
scratch_buffer_
(
new
ScratchBuffer
(
allocator_
.
get
()))
{}
scratch_buffer_
(
new
ScratchBuffer
(
allocator_
.
get
())),
gpu_runtime_
(
new
GPURuntime
(
runtime_
.
get
()))
{}
GPUDevice
::~
GPUDevice
()
=
default
;
OpenCLRuntime
*
GPUDevice
::
opencl
_runtime
()
{
return
runtime_
.
get
();
GPURuntime
*
GPUDevice
::
gpu
_runtime
()
{
return
gpu_
runtime_
.
get
();
}
Allocator
*
GPUDevice
::
allocator
()
{
...
...
mace/core/runtime/opencl/gpu_device.h
浏览文件 @
66cf184f
...
...
@@ -19,6 +19,7 @@
#include "mace/core/device_context.h"
#include "mace/core/device.h"
#include "mace/core/runtime/opencl/gpu_runtime.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
namespace
mace
{
...
...
@@ -34,7 +35,7 @@ class GPUDevice : public CPUDevice {
CPUAffinityPolicy
cpu_affinity_policy
=
AFFINITY_NONE
,
bool
use_gemmlowp
=
false
);
~
GPUDevice
();
OpenCLRuntime
*
opencl
_runtime
()
override
;
GPURuntime
*
gpu
_runtime
()
override
;
Allocator
*
allocator
()
override
;
DeviceType
device_type
()
const
override
;
ScratchBuffer
*
scratch_buffer
()
override
;
...
...
@@ -42,6 +43,7 @@ class GPUDevice : public CPUDevice {
std
::
unique_ptr
<
OpenCLRuntime
>
runtime_
;
std
::
unique_ptr
<
OpenCLAllocator
>
allocator_
;
std
::
unique_ptr
<
ScratchBuffer
>
scratch_buffer_
;
std
::
unique_ptr
<
GPURuntime
>
gpu_runtime_
;
};
}
// namespace mace
...
...
mace/core/runtime/opencl/gpu_runtime.cc
0 → 100644
浏览文件 @
66cf184f
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/runtime/opencl/gpu_runtime.h"
#include "mace/core/runtime/opencl/scratch_image.h"
namespace
mace
{
GPURuntime
::
GPURuntime
(
mace
::
OpenCLRuntime
*
runtime
)
:
runtime_
(
runtime
),
scratch_image_manager_
(
new
ScratchImageManager
),
mem_type_
(
MemoryType
::
GPU_IMAGE
)
{}
GPURuntime
::~
GPURuntime
()
=
default
;
OpenCLRuntime
*
GPURuntime
::
opencl_runtime
()
{
return
runtime_
;
}
ScratchImageManager
*
GPURuntime
::
scratch_image_manager
()
const
{
return
scratch_image_manager_
.
get
();
}
bool
GPURuntime
::
UseImageMemory
()
{
return
this
->
mem_type_
==
MemoryType
::
GPU_IMAGE
;
}
void
GPURuntime
::
set_mem_type
(
MemoryType
type
)
{
this
->
mem_type_
=
type
;
}
}
// namespace mace
mace/core/runtime/opencl/gpu_runtime.h
0 → 100644
浏览文件 @
66cf184f
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_
#define MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_
#include <memory>
#include "mace/proto/mace.pb.h"
namespace
mace
{
class
OpenCLRuntime
;
class
ScratchImageManager
;
class
GPURuntime
{
public:
explicit
GPURuntime
(
OpenCLRuntime
*
runtime
);
~
GPURuntime
();
OpenCLRuntime
*
opencl_runtime
();
ScratchImageManager
*
scratch_image_manager
()
const
;
// TODO(liuqi): remove this function in the future, make decision at runtime.
bool
UseImageMemory
();
void
set_mem_type
(
MemoryType
type
);
private:
OpenCLRuntime
*
runtime_
;
std
::
unique_ptr
<
ScratchImageManager
>
scratch_image_manager_
;
MemoryType
mem_type_
;
};
}
// namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_GPU_RUNTIME_H_
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
66cf184f
...
...
@@ -284,9 +284,7 @@ OpenCLRuntime::OpenCLRuntime(
is_opencl_avaliable_
(
false
),
is_profiling_enabled_
(
false
),
opencl_version_
(
CL_VER_UNKNOWN
),
gpu_type_
(
UNKNOWN
),
mem_type_
(
MemoryType
::
GPU_IMAGE
),
scratch_image_manager_
(
new
ScratchImageManager
)
{
gpu_type_
(
UNKNOWN
)
{
std
::
vector
<
cl
::
Platform
>
all_platforms
;
cl
::
Platform
::
get
(
&
all_platforms
);
if
(
all_platforms
.
size
()
==
0
)
{
...
...
@@ -471,14 +469,6 @@ uint32_t OpenCLRuntime::device_compute_units() const {
return
device_compute_units_
;
}
bool
OpenCLRuntime
::
UseImageMemory
()
{
return
this
->
mem_type_
==
MemoryType
::
GPU_IMAGE
;
}
void
OpenCLRuntime
::
set_mem_type
(
MemoryType
type
)
{
this
->
mem_type_
=
type
;
}
bool
OpenCLRuntime
::
BuildProgramFromCache
(
const
std
::
string
&
built_program_key
,
const
std
::
string
&
build_options_str
,
...
...
@@ -792,8 +782,4 @@ bool OpenCLRuntime::is_profiling_enabled() const {
return
is_profiling_enabled_
;
}
ScratchImageManager
*
OpenCLRuntime
::
scratch_image_manager
()
const
{
return
scratch_image_manager_
.
get
();
}
}
// namespace mace
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
66cf184f
...
...
@@ -83,11 +83,7 @@ class OpenCLRuntime {
uint64_t
device_global_mem_cache_size
()
const
;
uint32_t
device_compute_units
()
const
;
Tuner
<
uint32_t
>
*
tuner
();
ScratchImageManager
*
scratch_image_manager
()
const
;
bool
is_opencl_avaliable
();
// TODO(liuqi): remove this function in the future, make decision at runtime.
bool
UseImageMemory
();
void
set_mem_type
(
MemoryType
type
);
void
GetCallStats
(
const
cl
::
Event
&
event
,
CallStats
*
stats
);
uint64_t
GetDeviceMaxWorkGroupSize
();
...
...
@@ -135,8 +131,6 @@ class OpenCLRuntime {
bool
is_profiling_enabled_
;
OpenCLVersion
opencl_version_
;
GPUType
gpu_type_
;
MemoryType
mem_type_
;
std
::
unique_ptr
<
ScratchImageManager
>
scratch_image_manager_
;
// All OpenCL object must be a pointer and manually deleted before unloading
// OpenCL library.
std
::
shared_ptr
<
cl
::
Context
>
context_
;
...
...
mace/core/workspace.cc
浏览文件 @
66cf184f
...
...
@@ -109,7 +109,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
(
!
is_quantize_model
&&
HasQuantizedTensor
(
net_def
))));
#ifdef MACE_ENABLE_OPENCL
diffused_buffer_
=
diffused_buffer_
||
(
device_type
==
DeviceType
::
GPU
&&
device
->
opencl_runtime
()
->
GetDeviceMaxMemAllocSize
()
<=
device
->
gpu_runtime
()
->
opencl_runtime
()
->
GetDeviceMaxMemAllocSize
()
<=
static_cast
<
uint64_t
>
(
model_data_size
));
#endif
if
(
diffused_buffer_
)
{
...
...
mace/libmace/mace.cc
浏览文件 @
66cf184f
...
...
@@ -69,8 +69,8 @@ void UnloadModelData(const unsigned char *model_data,
#ifdef MACE_ENABLE_OPENCL
MaceStatus
CheckGPUAvalibility
(
const
NetDef
*
net_def
,
Device
*
device
)
{
// Check OpenCL avaliable
auto
runtime
=
device
->
opencl
_runtime
();
if
(
!
runtime
->
is_opencl_avaliable
())
{
auto
runtime
=
device
->
gpu
_runtime
();
if
(
!
runtime
->
opencl_runtime
()
->
is_opencl_avaliable
())
{
LOG
(
WARNING
)
<<
"The device does not support OpenCL"
;
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
}
...
...
@@ -678,8 +678,8 @@ MaceStatus MaceEngine::Impl::Run(
#ifdef MACE_ENABLE_OPENCL
if
(
device_type_
==
GPU
)
{
device_
->
opencl_runtime
()
->
command_queue
().
finish
();
device_
->
opencl_runtime
()
->
SaveBuiltCLProgram
();
device_
->
gpu_runtime
()
->
opencl_runtime
()
->
command_queue
().
finish
();
device_
->
gpu_runtime
()
->
opencl_runtime
()
->
SaveBuiltCLProgram
();
}
#endif
for
(
auto
&
output
:
*
outputs
)
{
...
...
mace/ops/activation.cc
浏览文件 @
66cf184f
...
...
@@ -81,7 +81,7 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
auto
relux_max_limit
=
static_cast
<
T
>
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
));
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
ActivationKernel
<
T
>
(
type
,
relux_max_limit
));
...
...
mace/ops/addn.cc
浏览文件 @
66cf184f
...
...
@@ -106,7 +106,7 @@ class AddNOp<DeviceType::GPU, T> : public Operation {
public:
explicit
AddNOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
AddNKernel
<
T
>
);
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/batch_norm.cc
浏览文件 @
66cf184f
...
...
@@ -149,7 +149,7 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
));
float
relux_max_limit
=
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
);
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
BatchNormKernel
<
T
>
(
epsilon
,
activation
,
relux_max_limit
));
...
...
mace/ops/batch_to_space.cc
浏览文件 @
66cf184f
...
...
@@ -265,7 +265,7 @@ class BatchToSpaceNDOp<DeviceType::GPU, T> : public BatchToSpaceOpBase {
public:
explicit
BatchToSpaceNDOp
(
OpConstructContext
*
context
)
:
BatchToSpaceOpBase
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
BatchToSpaceKernel
<
T
>
);
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/bias_add.cc
浏览文件 @
66cf184f
...
...
@@ -101,7 +101,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
data_format_
(
static_cast
<
DataFormat
>
(
Operation
::
GetOptionalArg
<
int
>
(
"data_format"
,
NHWC
)))
{
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
BiasAddKernel
<
T
>
);
}
else
{
...
...
mace/ops/channel_shuffle.cc
浏览文件 @
66cf184f
...
...
@@ -84,7 +84,7 @@ class ChannelShuffleOp<DeviceType::GPU, T> : public Operation {
explicit
ChannelShuffleOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
const
int
groups
=
Operation
::
GetOptionalArg
<
int
>
(
"group"
,
1
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ChannelShuffleKernel
<
T
>
(
groups
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/concat.cc
浏览文件 @
66cf184f
...
...
@@ -196,7 +196,7 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
public:
explicit
ConcatOp
(
OpConstructContext
*
context
)
:
ConcatOpBase
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ConcatKernel
<
T
>
(
axis_
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/conv_2d.cc
浏览文件 @
66cf184f
...
...
@@ -963,7 +963,7 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
wino_block_size_
(
Operation
::
GetOptionalArg
<
int
>
(
"wino_block_size"
,
0
))
{
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
Conv2dKernel
<
T
>
);
}
else
{
...
...
@@ -974,7 +974,7 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
// Transform filter tensor to target format
if
((
wino_block_size_
==
2
||
wino_block_size_
==
4
)
&&
(
kernel_
->
CheckUseWinograd
(
context
->
device
()
->
opencl_runtime
(),
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
(),
context
->
workspace
()
->
GetTensor
(
operator_def_
->
input
(
1
))
->
shape
(),
std
::
vector
<
index_t
>
(
operator_def_
->
output_shape
(
0
).
dims
().
begin
(),
...
...
mace/ops/crop.cc
浏览文件 @
66cf184f
...
...
@@ -113,7 +113,7 @@ class CropOp<DeviceType::GPU, T> : public Operation {
explicit
CropOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
const
int
axis
=
Operation
::
GetOptionalArg
<
int
>
(
"axis"
,
2
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
CropKernel
<
T
>
(
axis
,
Operation
::
GetRepeatedArgs
<
int
>
(
"offset"
)));
}
else
{
...
...
mace/ops/deconv_2d.cc
浏览文件 @
66cf184f
...
...
@@ -360,7 +360,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
explicit
Deconv2dOp
(
OpConstructContext
*
context
)
:
Deconv2dOpBase
(
context
)
{
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
Deconv2dKernel
<
T
>
);
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/depth_to_space.cc
浏览文件 @
66cf184f
...
...
@@ -96,7 +96,7 @@ class DepthToSpaceOp<DeviceType::GPU, T> : public Operation {
explicit
DepthToSpaceOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
int
block_size
=
Operation
::
GetOptionalArg
<
int
>
(
"block_size"
,
1
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
DepthToSpaceKernel
<
T
>
(
block_size
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/depthwise_conv2d.cc
浏览文件 @
66cf184f
...
...
@@ -492,7 +492,7 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
explicit
DepthwiseConv2dOp
(
OpConstructContext
*
context
)
:
DepthwiseConv2dOpBase
(
context
)
{
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
DepthwiseConv2dKernel
<
T
>
);
}
else
{
...
...
mace/ops/depthwise_deconv2d.cc
浏览文件 @
66cf184f
...
...
@@ -410,7 +410,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
explicit
DepthwiseDeconv2dOp
(
OpConstructContext
*
context
)
:
Deconv2dOpBase
(
context
)
{
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
DepthwiseDeconv2dKernel
<
T
>
);
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/eltwise.cc
浏览文件 @
66cf184f
...
...
@@ -1088,7 +1088,7 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
int32_t
scalar_input_index
=
Operation
::
GetOptionalArg
<
int32_t
>
(
"scalar_input_index"
,
1
);
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
EltwiseKernel
<
T
>
(
type
,
coeff
,
scalar_input
,
scalar_input_index
));
...
...
mace/ops/fully_connected.cc
浏览文件 @
66cf184f
...
...
@@ -194,7 +194,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
explicit
FullyConnectedOp
(
OpConstructContext
*
context
)
:
FullyConnectedOpBase
(
context
)
{
MemoryType
mem_type
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
FullyConnectedKernel
<
T
>
);
}
else
{
...
...
mace/ops/lstm_cell.cc
浏览文件 @
66cf184f
...
...
@@ -34,7 +34,7 @@ class LSTMCellOp<DeviceType::GPU, T> : public Operation {
Operation
::
GetOptionalArg
<
float
>
(
"scalar_input"
,
0.0
));
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
LSTMCellKernel
<
T
>
(
forget_bias
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/opencl/buffer/buffer_transform.cc
浏览文件 @
66cf184f
...
...
@@ -47,7 +47,7 @@ MaceStatus TransformConv2DFilter(
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
transformed_shape
));
output
->
Reshape
(
input
->
shape
());
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
if
(
kernel
->
get
()
==
nullptr
)
{
std
::
set
<
std
::
string
>
built_options
;
...
...
@@ -116,7 +116,7 @@ MaceStatus TransformDWConv2DFilter(
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
transformed_shape
));
output
->
Reshape
(
input
->
shape
());
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
if
(
kernel
->
get
()
==
nullptr
)
{
std
::
set
<
std
::
string
>
built_options
;
...
...
@@ -173,7 +173,7 @@ MaceStatus TransformArgument(
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
transformed_shape
));
output
->
Reshape
(
input
->
shape
());
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
if
(
kernel
->
get
()
==
nullptr
)
{
std
::
set
<
std
::
string
>
built_options
;
...
...
mace/ops/opencl/buffer/buffer_type_transform.cc
浏览文件 @
66cf184f
...
...
@@ -31,7 +31,7 @@ MaceStatus BufferTypeTransform(
Tensor
*
output
)
{
MACE_RETURN_IF_ERROR
(
output
->
ResizeLike
(
input
));
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
const
uint32_t
gws
=
...
...
mace/ops/opencl/buffer/conv_2d_1x1.cc
浏览文件 @
66cf184f
...
...
@@ -43,7 +43,7 @@ MaceStatus Conv2d1x1(OpContext *context,
const
index_t
in_height
=
padded_input
->
dim
(
1
);
const
index_t
in_width
=
padded_input
->
dim
(
2
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/buffer/conv_2d_general.cc
浏览文件 @
66cf184f
...
...
@@ -48,7 +48,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
const
index_t
filter_height
=
filter
->
dim
(
2
);
const
index_t
filter_width
=
filter
->
dim
(
3
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/buffer/depthwise_conv2d.cc
浏览文件 @
66cf184f
...
...
@@ -48,7 +48,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const
index_t
filter_height
=
filter
->
dim
(
2
);
const
index_t
filter_width
=
filter
->
dim
(
3
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/buffer/pooling.h
浏览文件 @
66cf184f
...
...
@@ -92,7 +92,7 @@ MaceStatus PoolingKernel<T>::Compute(
bool
input_changed
=
!
IsVecEqual
(
input_shape_
,
input
->
shape
());
input_shape_
=
input
->
shape
();
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
// pad input
std
::
vector
<
index_t
>
padded_input_shape
=
input
->
shape
();
...
...
mace/ops/opencl/buffer/softmax.h
浏览文件 @
66cf184f
...
...
@@ -75,7 +75,7 @@ MaceStatus SoftmaxKernel<T>::Compute(
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/buffer/utils.cc
浏览文件 @
66cf184f
...
...
@@ -47,7 +47,7 @@ MaceStatus PadInput(OpContext *context,
static_cast
<
uint32_t
>
(
padded_height
*
batch
)
};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/activation.h
浏览文件 @
66cf184f
...
...
@@ -66,7 +66,7 @@ MaceStatus ActivationKernel<T>::Compute(
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/addn.h
浏览文件 @
66cf184f
...
...
@@ -57,7 +57,7 @@ MaceStatus AddNKernel<T>::Compute(
const
index_t
width
=
input_tensors
[
0
]
->
dim
(
2
);
const
index_t
channels
=
input_tensors
[
0
]
->
dim
(
3
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
for
(
size_t
i
=
1
;
i
<
size
;
++
i
)
{
...
...
mace/ops/opencl/image/batch_norm.h
浏览文件 @
66cf184f
...
...
@@ -85,7 +85,7 @@ MaceStatus BatchNormKernel<T>::Compute(
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/batch_to_space.h
浏览文件 @
66cf184f
...
...
@@ -68,7 +68,7 @@ MaceStatus BatchToSpaceKernel<T>::Compute(
chan_blk
,
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
0
)
*
batch_tensor
->
dim
(
1
))};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/bias_add.h
浏览文件 @
66cf184f
...
...
@@ -62,7 +62,7 @@ MaceStatus BiasAddKernel<T>::Compute(
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/buffer_to_image.h
浏览文件 @
66cf184f
...
...
@@ -98,7 +98,7 @@ MaceStatus BufferToImage<T>::Compute(
}
}
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/channel_shuffle.h
浏览文件 @
66cf184f
...
...
@@ -70,7 +70,7 @@ MaceStatus ChannelShuffleKernel<T>::Compute(
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/concat.cc
浏览文件 @
66cf184f
...
...
@@ -65,7 +65,7 @@ MaceStatus Concat2(OpContext *context,
static_cast
<
uint32_t
>
(
batch
*
height
),
};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
@@ -126,7 +126,7 @@ MaceStatus ConcatN(OpContext *context,
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/conv_2d_1x1.cc
浏览文件 @
66cf184f
...
...
@@ -95,7 +95,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/conv_2d_3x3.cc
浏览文件 @
66cf184f
...
...
@@ -83,7 +83,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
const
index_t
width_blocks
=
RoundUpDiv
<
index_t
,
5
>
(
width
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/conv_2d_general.cc
浏览文件 @
66cf184f
...
...
@@ -91,7 +91,7 @@ extern MaceStatus Conv2d(OpContext *context,
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/crop.h
浏览文件 @
66cf184f
...
...
@@ -141,7 +141,7 @@ MaceStatus CropKernel<T>::Compute(
static_cast
<
uint32_t
>
(
output
->
dim
(
0
)
*
output
->
dim
(
1
))
};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/deconv_2d.h
浏览文件 @
66cf184f
...
...
@@ -92,7 +92,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
const
int
align_w
=
stride_w
-
1
-
padding_w
;
const
int
kernel_size
=
filter
->
dim
(
2
)
*
filter
->
dim
(
3
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/depth_to_space.h
浏览文件 @
66cf184f
...
...
@@ -87,7 +87,7 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
static_cast
<
uint32_t
>
(
output_width
),
static_cast
<
uint32_t
>
(
output_height
*
batch
)
};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/depthwise_conv2d.cc
浏览文件 @
66cf184f
...
...
@@ -93,7 +93,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel
->
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/depthwise_deconv2d.h
浏览文件 @
66cf184f
...
...
@@ -98,7 +98,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
const
int
align_w
=
stride_w
-
1
-
padding_w
;
const
int
kernel_size
=
filter
->
dim
(
2
)
*
filter
->
dim
(
3
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/eltwise.h
浏览文件 @
66cf184f
...
...
@@ -117,7 +117,7 @@ MaceStatus EltwiseKernel<T>::Compute(
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
batch_height_pixels
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
set
<
std
::
string
>
built_options
;
...
...
mace/ops/opencl/image/fully_connected.h
浏览文件 @
66cf184f
...
...
@@ -64,7 +64,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
&
output_image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
output_image_shape
));
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/image_to_buffer.h
浏览文件 @
66cf184f
...
...
@@ -92,7 +92,7 @@ MaceStatus ImageToBuffer<T>::Compute(OpContext *context,
break
;
}
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/lstm_cell.h
浏览文件 @
66cf184f
...
...
@@ -71,7 +71,7 @@ MaceStatus LSTMCellKernel<T>::Compute(
const
index_t
hidden_units
=
pre_output
->
dim
(
1
);
const
index_t
w_blocks
=
hidden_units
>>
2
;
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/matmul.h
浏览文件 @
66cf184f
...
...
@@ -82,7 +82,7 @@ MaceStatus MatMulKernel<T>::Compute(
static_cast
<
uint32_t
>
(
height_blocks
*
batch
),
};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/pad.h
浏览文件 @
66cf184f
...
...
@@ -80,7 +80,7 @@ MaceStatus PadKernel<T>::Compute(
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/pooling.h
浏览文件 @
66cf184f
...
...
@@ -112,7 +112,7 @@ MaceStatus PoolingKernel<T>::Compute(
&
output_image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
output_image_shape
));
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/reduce_mean.h
浏览文件 @
66cf184f
...
...
@@ -76,7 +76,7 @@ MaceStatus ReduceMeanKernel<T>::Compute(
&
output_image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
output_image_shape
));
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/resize_bicubic.h
浏览文件 @
66cf184f
...
...
@@ -102,7 +102,7 @@ MaceStatus ResizeBicubicKernel<T>::Compute(
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
out_height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/resize_bilinear.h
浏览文件 @
66cf184f
...
...
@@ -107,7 +107,7 @@ MaceStatus ResizeBilinearKernel<T>::Compute(
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
out_height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/softmax.h
浏览文件 @
66cf184f
...
...
@@ -102,7 +102,7 @@ MaceStatus SoftmaxKernel<T>::Compute(
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/space_to_batch.h
浏览文件 @
66cf184f
...
...
@@ -66,7 +66,7 @@ MaceStatus SpaceToBatchKernel<T>::Compute(
chan_blk
,
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
0
)
*
batch_tensor
->
dim
(
1
))};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/space_to_depth.h
浏览文件 @
66cf184f
...
...
@@ -79,7 +79,7 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
&
image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
image_shape
));
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/split.h
浏览文件 @
66cf184f
...
...
@@ -70,7 +70,7 @@ MaceStatus SplitKernel<T>::Compute(
output_list
[
i
]
->
ResizeImage
(
output_shape
,
image_shape
));
}
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/sqrdiff_mean.h
浏览文件 @
66cf184f
...
...
@@ -72,7 +72,7 @@ MaceStatus SqrDiffMeanKernel<T>::Compute(
&
output_image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
output_image_shape
));
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
...
...
mace/ops/opencl/image/winograd_conv2d.cc
浏览文件 @
66cf184f
...
...
@@ -37,7 +37,7 @@ MaceStatus WinogradInputTransform(OpContext *context,
Tensor
*
output_tensor
,
uint32_t
*
kwg_size
,
StatsFuture
*
future
)
{
OpenCLRuntime
*
runtime
=
context
->
device
()
->
opencl_runtime
();
OpenCLRuntime
*
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
const
index_t
out_width
=
output_tensor
->
dim
(
2
);
MACE_OUT_OF_RANGE_DEFINITION
;
...
...
@@ -119,7 +119,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
Tensor
*
output_tensor
,
uint32_t
*
kwg_size
,
StatsFuture
*
future
)
{
OpenCLRuntime
*
runtime
=
context
->
device
()
->
opencl_runtime
();
OpenCLRuntime
*
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
auto
&
output_shape
=
output_tensor
->
shape
();
MACE_OUT_OF_RANGE_DEFINITION
;
...
...
@@ -227,8 +227,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
uint32_t
*
kwg_size
[
3
])
{
OpenCLRuntime
*
runtime
=
context
->
device
()
->
opencl_runtime
();
ScratchImageManager
*
scratch_manager
=
runtime
->
scratch_image_manager
();
OpenCLRuntime
*
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
ScratchImageManager
*
scratch_manager
=
context
->
device
()
->
gpu_runtime
()
->
scratch_image_manager
();
StatsFuture
t_input_future
,
mm_future
,
t_output_future
;
bool
input_changed
=
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
());
*
prev_input_shape
=
input
->
shape
();
...
...
mace/ops/opencl/out_of_range_check_test.cc
浏览文件 @
66cf184f
...
...
@@ -35,7 +35,7 @@ MaceStatus BufferToImageOpImpl(OpContext *context,
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
image_shape
[
0
]),
static_cast
<
uint32_t
>
(
image_shape
[
1
])};
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
std
::
string
kernel_name
=
"in_out_buffer_to_image"
;
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
...
...
mace/ops/ops_test_util.cc
浏览文件 @
66cf184f
...
...
@@ -206,7 +206,7 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
auto
opencl_mem_types
=
OpTestContext
::
Get
()
->
opencl_mem_types
();
for
(
auto
type
:
opencl_mem_types
)
{
OpTestContext
::
Get
()
->
GetDevice
(
device
)
->
opencl
_runtime
()
->
set_mem_type
(
type
);
->
gpu
_runtime
()
->
set_mem_type
(
type
);
Setup
(
device
);
MACE_RETURN_IF_ERROR
(
Run
());
}
...
...
@@ -242,8 +242,8 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def,
void
OpsTestNet
::
Sync
()
{
#ifdef MACE_ENABLE_OPENCL
if
(
net_
&&
device_type_
==
DeviceType
::
GPU
)
{
OpTestContext
::
Get
()
->
GetDevice
(
DeviceType
::
GPU
)
->
opencl
_runtime
()
->
command_queue
().
finish
();
OpTestContext
::
Get
()
->
GetDevice
(
DeviceType
::
GPU
)
->
gpu
_runtime
()
->
opencl_runtime
()
->
command_queue
().
finish
();
}
#endif
}
...
...
mace/ops/pad.cc
浏览文件 @
66cf184f
...
...
@@ -97,7 +97,7 @@ class PadOp<DeviceType::GPU, T> : public Operation {
std
::
vector
<
int
>
paddings
=
Operation
::
GetRepeatedArgs
<
int
>
(
"paddings"
);
float
constant_value
=
Operation
::
GetOptionalArg
<
float
>
(
"constant_value"
,
0.0
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
PadKernel
<
T
>
(
paddings
,
constant_value
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/pooling.cc
浏览文件 @
66cf184f
...
...
@@ -429,7 +429,7 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase {
public:
explicit
PoolingOp
(
OpConstructContext
*
context
)
:
PoolingOpBase
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
PoolingKernel
<
T
>
);
}
else
{
context
->
set_output_mem_type
(
MemoryType
::
GPU_BUFFER
);
...
...
mace/ops/reduce_mean.cc
浏览文件 @
66cf184f
...
...
@@ -246,7 +246,7 @@ class ReduceMeanOp<DeviceType::GPU, T> : public ReduceMeanOpBase {
public:
explicit
ReduceMeanOp
(
OpConstructContext
*
context
)
:
ReduceMeanOpBase
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ReduceMeanKernel
<
T
>
(
axis_
,
keep_dims_
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/resize_bicubic.cc
浏览文件 @
66cf184f
...
...
@@ -195,7 +195,7 @@ class ResizeBicubicOp<DeviceType::GPU, T> : public Operation {
std
::
vector
<
index_t
>
size
=
Operation
::
GetRepeatedArgs
<
index_t
>
(
"size"
,
{
-
1
,
-
1
});
MACE_CHECK
(
size
.
size
()
==
2
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ResizeBicubicKernel
<
T
>
(
align_corners
,
size
[
0
],
size
[
1
]));
...
...
mace/ops/resize_bilinear.cc
浏览文件 @
66cf184f
...
...
@@ -331,7 +331,7 @@ class ResizeBilinearOp<DeviceType::GPU, T> : public Operation {
std
::
vector
<
index_t
>
size
=
Operation
::
GetRepeatedArgs
<
index_t
>
(
"size"
,
{
-
1
,
-
1
});
MACE_CHECK
(
size
.
size
()
==
2
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ResizeBilinearKernel
<
T
>
(
align_corners
,
size
[
0
],
size
[
1
]));
...
...
mace/ops/softmax.cc
浏览文件 @
66cf184f
...
...
@@ -364,7 +364,7 @@ class SoftmaxOp<DeviceType::GPU, T> : public Operation {
public:
explicit
SoftmaxOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SoftmaxKernel
<
T
>
);
}
else
{
context
->
set_output_mem_type
(
MemoryType
::
GPU_BUFFER
);
...
...
mace/ops/space_to_batch.cc
浏览文件 @
66cf184f
...
...
@@ -308,7 +308,7 @@ class SpaceToBatchNDOp<DeviceType::GPU, T> : public SpaceToBatchOpBase {
public:
explicit
SpaceToBatchNDOp
(
OpConstructContext
*
context
)
:
SpaceToBatchOpBase
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SpaceToBatchKernel
<
T
>
);
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/space_to_depth.cc
浏览文件 @
66cf184f
...
...
@@ -94,7 +94,7 @@ class SpaceToDepthOp<DeviceType::GPU, T> : public Operation {
explicit
SpaceToDepthOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
int
block_size
=
Operation
::
GetOptionalArg
<
int
>
(
"block_size"
,
1
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SpaceToDepthKernel
<
T
>
(
block_size
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/split.cc
浏览文件 @
66cf184f
...
...
@@ -105,7 +105,7 @@ class SplitOp<DeviceType::GPU, T> : public Operation {
explicit
SplitOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
int32_t
axis
=
Operation
::
GetOptionalArg
<
int
>
(
"axis"
,
3
);
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SplitKernel
<
T
>
(
axis
));
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
mace/ops/sqrdiff_mean.cc
浏览文件 @
66cf184f
...
...
@@ -82,7 +82,7 @@ class SqrDiffMeanOp<DeviceType::GPU, T> : public Operation {
public:
explicit
SqrDiffMeanOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
if
(
context
->
device
()
->
opencl
_runtime
()
->
UseImageMemory
())
{
if
(
context
->
device
()
->
gpu
_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SqrDiffMeanKernel
<
T
>
());
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录