Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
dc364ddc
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
dc364ddc
编写于
3月 01, 2019
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change new to make_unique
上级
0c3cc381
变更
42
隐藏空白更改
内联
并排
Showing
42 changed file
with
232 addition
and
118 deletion
+232
-118
mace/libmace/mace.cc
mace/libmace/mace.cc
+7
-5
mace/ops/activation.cc
mace/ops/activation.cc
+3
-3
mace/ops/addn.cc
mace/ops/addn.cc
+2
-1
mace/ops/arm/conv_winograd.cc
mace/ops/arm/conv_winograd.cc
+17
-14
mace/ops/arm/conv_winograd.h
mace/ops/arm/conv_winograd.h
+2
-2
mace/ops/arm/conv_winograd_test.cc
mace/ops/arm/conv_winograd_test.cc
+1
-1
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+3
-2
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+2
-1
mace/ops/bias_add.cc
mace/ops/bias_add.cc
+2
-1
mace/ops/channel_shuffle.cc
mace/ops/channel_shuffle.cc
+2
-1
mace/ops/concat.cc
mace/ops/concat.cc
+2
-1
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+12
-11
mace/ops/crop.cc
mace/ops/crop.cc
+3
-2
mace/ops/deconv_2d.cc
mace/ops/deconv_2d.cc
+2
-1
mace/ops/depth_to_space.cc
mace/ops/depth_to_space.cc
+2
-1
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+3
-2
mace/ops/depthwise_deconv2d.cc
mace/ops/depthwise_deconv2d.cc
+2
-1
mace/ops/eltwise.cc
mace/ops/eltwise.cc
+3
-2
mace/ops/fully_connected.cc
mace/ops/fully_connected.cc
+3
-1
mace/ops/lstm_cell.cc
mace/ops/lstm_cell.cc
+2
-1
mace/ops/opencl/buffer/conv_2d.h
mace/ops/opencl/buffer/conv_2d.h
+3
-2
mace/ops/opencl/buffer/depthwise_conv2d.h
mace/ops/opencl/buffer/depthwise_conv2d.h
+3
-2
mace/ops/opencl/buffer/pooling.h
mace/ops/opencl/buffer/pooling.h
+3
-2
mace/ops/opencl/buffer_transformer.h
mace/ops/opencl/buffer_transformer.h
+4
-3
mace/ops/opencl/helper.h
mace/ops/opencl/helper.h
+3
-2
mace/ops/opencl/image/winograd_conv2d.cc
mace/ops/opencl/image/winograd_conv2d.cc
+5
-4
mace/ops/opencl/out_of_range_check_test.cc
mace/ops/opencl/out_of_range_check_test.cc
+3
-1
mace/ops/ops_test_util.cc
mace/ops/ops_test_util.cc
+13
-14
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+5
-4
mace/ops/pad.cc
mace/ops/pad.cc
+3
-2
mace/ops/pooling.cc
mace/ops/pooling.cc
+3
-2
mace/ops/reduce.cc
mace/ops/reduce.cc
+4
-3
mace/ops/resize_bicubic.cc
mace/ops/resize_bicubic.cc
+3
-3
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+3
-3
mace/ops/resize_nearest_neighbor.cc
mace/ops/resize_nearest_neighbor.cc
+3
-2
mace/ops/sgemm.cc
mace/ops/sgemm.cc
+10
-9
mace/ops/softmax.cc
mace/ops/softmax.cc
+4
-2
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+2
-1
mace/ops/space_to_depth.cc
mace/ops/space_to_depth.cc
+2
-1
mace/ops/split.cc
mace/ops/split.cc
+2
-1
mace/ops/sqrdiff_mean.cc
mace/ops/sqrdiff_mean.cc
+2
-1
mace/utils/memory.h
mace/utils/memory.h
+74
-0
未找到文件。
mace/libmace/mace.cc
浏览文件 @
dc364ddc
...
...
@@ -33,6 +33,8 @@
#include "mace/core/runtime/hexagon/hexagon_device.h"
#endif // MACE_ENABLE_HEXAGON
#include "mace/utils/memory.h"
namespace
mace
{
namespace
{
...
...
@@ -289,7 +291,7 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
std
::
shared_ptr
<
float
>
data
,
const
DataFormat
format
)
{
MACE_CHECK_NOTNULL
(
data
.
get
());
impl_
=
std
::
unique_ptr
<
MaceTensor
::
Impl
>
(
new
MaceTensor
::
Impl
()
);
impl_
=
make_unique
<
MaceTensor
::
Impl
>
(
);
impl_
->
shape
=
shape
;
impl_
->
data
=
data
;
impl_
->
format
=
format
;
...
...
@@ -298,11 +300,11 @@ MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
}
MaceTensor
::
MaceTensor
()
{
impl_
=
std
::
unique_ptr
<
MaceTensor
::
Impl
>
(
new
MaceTensor
::
Impl
()
);
impl_
=
make_unique
<
MaceTensor
::
Impl
>
(
);
}
MaceTensor
::
MaceTensor
(
const
MaceTensor
&
other
)
{
impl_
=
std
::
unique_ptr
<
MaceTensor
::
Impl
>
(
new
MaceTensor
::
Impl
()
);
impl_
=
make_unique
<
MaceTensor
::
Impl
>
(
);
impl_
->
shape
=
other
.
shape
();
impl_
->
data
=
other
.
data
();
impl_
->
format
=
other
.
data_format
();
...
...
@@ -310,7 +312,7 @@ MaceTensor::MaceTensor(const MaceTensor &other) {
}
MaceTensor
::
MaceTensor
(
const
MaceTensor
&&
other
)
{
impl_
=
std
::
unique_ptr
<
MaceTensor
::
Impl
>
(
new
MaceTensor
::
Impl
()
);
impl_
=
make_unique
<
MaceTensor
::
Impl
>
(
);
impl_
->
shape
=
other
.
shape
();
impl_
->
data
=
other
.
data
();
impl_
->
format
=
other
.
data_format
();
...
...
@@ -725,7 +727,7 @@ MaceStatus MaceEngine::Impl::Run(
}
MaceEngine
::
MaceEngine
(
const
MaceEngineConfig
&
config
)
:
impl_
(
new
MaceEngine
::
Impl
(
config
))
{}
impl_
(
make_unique
<
MaceEngine
::
Impl
>
(
config
))
{}
MaceEngine
::~
MaceEngine
()
=
default
;
...
...
mace/ops/activation.cc
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/activation.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -88,9 +89,8 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
ActivationKernel
<
T
>
(
type
,
relux_max_limit
,
leakyrelu_coefficient
));
kernel_
=
make_unique
<
opencl
::
image
::
ActivationKernel
<
T
>>
(
type
,
relux_max_limit
,
leakyrelu_coefficient
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/addn.cc
浏览文件 @
dc364ddc
...
...
@@ -24,6 +24,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/addn.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -107,7 +108,7 @@ class AddNOp<DeviceType::GPU, T> : public Operation {
explicit
AddNOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
AddNKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
AddNKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/arm/conv_winograd.cc
浏览文件 @
dc364ddc
...
...
@@ -15,6 +15,7 @@
#include <algorithm>
#include "mace/ops/arm/conv_winograd.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -607,7 +608,7 @@ void TransformFilter8x8(const float *filter,
}
}
void
Wino
G
radConv3x3s1
(
const
float
*
input
,
void
Wino
g
radConv3x3s1
(
const
float
*
input
,
const
float
*
transformed_filter
,
const
index_t
batch
,
const
index_t
in_height
,
...
...
@@ -659,7 +660,7 @@ void WinoGradConv3x3s1(const float *input,
}
}
void
Wino
G
radConv3x3s1
(
const
float
*
input
,
void
Wino
g
radConv3x3s1
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
batch
,
const
index_t
in_height
,
...
...
@@ -684,28 +685,30 @@ void WinoGradConv3x3s1(const float *input,
index_t
transformed_output_size
=
in_tile_area
*
batch
*
out_channels
*
tile_count
;
float
*
transformed_input
=
new
float
[
transformed_input_size
];
// TNCB
float
*
transformed_filter
=
new
float
[
transformed_filter_size
];
// TOC
float
*
transformed_output
=
new
float
[
transformed_output_size
];
auto
transformed_input
=
make_unique
<
float
[]
>
(
transformed_input_size
);
// TNCB NOLINT
auto
transformed_filter
=
make_unique
<
float
[]
>
(
transformed_filter_size
);
// TOC NOLINT
auto
transformed_output
=
make_unique
<
float
[]
>
(
transformed_output_size
);
// NOLINT
switch
(
out_tile_size
)
{
case
2
:
TransformFilter4x4
(
filter
,
in_channels
,
out_channels
,
transformed_filter
);
TransformFilter4x4
(
filter
,
in_channels
,
out_channels
,
transformed_filter
.
get
());
break
;
case
6
:
TransformFilter8x8
(
filter
,
in_channels
,
out_channels
,
transformed_filter
);
TransformFilter8x8
(
filter
,
in_channels
,
out_channels
,
transformed_filter
.
get
());
break
;
default:
MACE_NOT_IMPLEMENTED
;
}
WinoGradConv3x3s1
(
input
,
transformed_filter
,
batch
,
in_height
,
in_width
,
in_channels
,
out_channels
,
out_tile_size
,
transformed_input
,
transformed_output
,
output
,
sgemm
,
scratch_buffer
);
delete
[]
transformed_input
;
delete
[]
transformed_filter
;
delete
[]
transformed_output
;
WinogradConv3x3s1
(
input
,
transformed_filter
.
get
(),
batch
,
in_height
,
in_width
,
in_channels
,
out_channels
,
out_tile_size
,
transformed_input
.
get
(),
transformed_output
.
get
(),
output
,
sgemm
,
scratch_buffer
);
}
void
ConvRef3x3s1
(
const
float
*
input
,
...
...
mace/ops/arm/conv_winograd.h
浏览文件 @
dc364ddc
...
...
@@ -35,7 +35,7 @@ void TransformFilter8x8(const float *filter,
const
index_t
out_channels
,
float
*
output
);
void
Wino
G
radConv3x3s1
(
const
float
*
input
,
void
Wino
g
radConv3x3s1
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
batch
,
const
index_t
in_height
,
...
...
@@ -47,7 +47,7 @@ void WinoGradConv3x3s1(const float *input,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
);
void
Wino
G
radConv3x3s1
(
const
float
*
input
,
void
Wino
g
radConv3x3s1
(
const
float
*
input
,
const
float
*
transformed_filter
,
const
index_t
batch
,
const
index_t
in_height
,
...
...
mace/ops/arm/conv_winograd_test.cc
浏览文件 @
dc364ddc
...
...
@@ -66,7 +66,7 @@ TEST(ConvWinogradTest, winograd) {
in_channels
,
out_channels
,
output_data_ref
);
SGemm
sgemm
;
ops
::
Wino
G
radConv3x3s1
(
input_data
,
filter_data
,
batch
,
in_height
,
ops
::
Wino
g
radConv3x3s1
(
input_data
,
filter_data
,
batch
,
in_height
,
in_width
,
in_channels
,
out_channels
,
6
,
output_data
,
&
sgemm
,
nullptr
);
...
...
mace/ops/batch_norm.cc
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/batch_norm.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -156,8 +157,8 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
BatchNormKernel
<
T
>
(
epsilon
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
)
)
;
kernel_
=
make_unique
<
opencl
::
image
::
BatchNormKernel
<
T
>
>
(
epsilon
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/batch_to_space.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/batch_to_space.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -266,7 +267,7 @@ class BatchToSpaceNDOp<DeviceType::GPU, T> : public BatchToSpaceOpBase {
explicit
BatchToSpaceNDOp
(
OpConstructContext
*
context
)
:
BatchToSpaceOpBase
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
BatchToSpaceKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
BatchToSpaceKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/bias_add.cc
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/bias_add.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -103,7 +104,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
BiasAddKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
BiasAddKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/channel_shuffle.cc
浏览文件 @
dc364ddc
...
...
@@ -18,6 +18,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/channel_shuffle.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -83,7 +84,7 @@ class ChannelShuffleOp<DeviceType::GPU, T> : public Operation {
:
Operation
(
context
)
{
const
int
groups
=
Operation
::
GetOptionalArg
<
int
>
(
"group"
,
1
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ChannelShuffleKernel
<
T
>
(
groups
)
);
kernel_
=
make_unique
<
opencl
::
image
::
ChannelShuffleKernel
<
T
>>
(
groups
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/concat.cc
浏览文件 @
dc364ddc
...
...
@@ -16,6 +16,7 @@
#include "mace/core/operator.h"
#include "mace/utils/quantize.h"
#include "mace/utils/memory.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/concat.h"
...
...
@@ -199,7 +200,7 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
explicit
ConcatOp
(
OpConstructContext
*
context
)
:
ConcatOpBase
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ConcatKernel
<
T
>
(
axis_
)
);
kernel_
=
make_unique
<
opencl
::
image
::
ConcatKernel
<
T
>>
(
axis_
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/conv_2d.cc
浏览文件 @
dc364ddc
...
...
@@ -31,6 +31,7 @@
#include "mace/ops/arm/conv_winograd.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_NEON
...
...
@@ -129,7 +130,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
if
(
filter_h
==
1
&&
filter_w
==
1
&&
stride_h
==
1
&&
stride_w
==
1
&&
dilation_h
==
1
&&
dilation_w
==
1
)
{
if
(
conv2d_delegator_
.
get
()
==
nullptr
)
{
conv2d_delegator_
.
reset
(
new
arm
::
fp32
::
Conv2dK1x1
()
);
conv2d_delegator_
=
make_unique
<
arm
::
fp32
::
Conv2dK1x1
>
(
);
}
conv2d_delegator_
->
Compute
(
context
,
input
,
filter
,
output
);
}
else
{
...
...
@@ -354,7 +355,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
*
transformed_output_data
=
transformed_output
.
mutable_data
<
float
>
();
conv_func
=
[
=
](
const
float
*
pad_input
,
float
*
pad_output
)
{
Wino
G
radConv3x3s1
(
pad_input
,
Wino
g
radConv3x3s1
(
pad_input
,
transformed_filter_data
,
batch
,
extra_input_height
,
...
...
@@ -508,12 +509,12 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
}
#else
if
(
conv2d_delegator_
.
get
()
==
nullptr
)
{
conv2d_delegator_
.
reset
(
new
ref
::
Conv2d
<
float
>
(
paddings
[
0
],
paddings
[
1
],
stride_h
,
stride_w
,
dilation_h
,
dilation_w
)
);
conv2d_delegator_
=
make_unique
<
ref
::
Conv2d
<
float
>
>
(
paddings
[
0
],
paddings
[
1
],
stride_h
,
stride_w
,
dilation_h
,
dilation_w
);
}
conv2d_delegator_
->
Compute
(
context
,
input
,
filter
,
output
);
#endif
...
...
@@ -848,7 +849,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
ScratchBuffer
*
scratch
=
context
->
device
()
->
scratch_buffer
();
scratch
->
Rewind
();
scratch
->
GrowSize
(
im2col_size
);
im2col
.
reset
(
new
Tensor
(
scratch
->
Scratch
(
im2col_size
),
DT_UINT8
)
);
im2col
=
make_unique
<
Tensor
>
(
scratch
->
Scratch
(
im2col_size
),
DT_UINT8
);
uint8_t
*
im2col_data
=
im2col
->
mutable_data
<
uint8_t
>
();
Im2col
(
input_data
,
input
->
shape
(),
filter_h
,
filter_w
,
stride_h
,
stride_w
,
static_cast
<
uint8_t
>
(
input
->
zero_point
()),
...
...
@@ -993,10 +994,10 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
Conv2dKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
Conv2dKernel
<
T
>>
(
);
}
else
{
mem_type
=
MemoryType
::
GPU_BUFFER
;
kernel_
.
reset
(
new
opencl
::
buffer
::
Conv2dKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
buffer
::
Conv2dKernel
<
T
>>
(
);
}
context
->
set_output_mem_type
(
mem_type
);
// Transform filter tensor to target format
...
...
mace/ops/crop.cc
浏览文件 @
dc364ddc
...
...
@@ -18,6 +18,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/crop.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -114,8 +115,8 @@ class CropOp<DeviceType::GPU, T> : public Operation {
:
Operation
(
context
)
{
const
int
axis
=
Operation
::
GetOptionalArg
<
int
>
(
"axis"
,
2
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
CropKernel
<
T
>
(
axis
,
Operation
::
GetRepeatedArgs
<
int
>
(
"offset"
))
)
;
kernel_
=
make_unique
<
opencl
::
image
::
CropKernel
<
T
>
>
(
axis
,
Operation
::
GetRepeatedArgs
<
int
>
(
"offset"
));
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/deconv_2d.cc
浏览文件 @
dc364ddc
...
...
@@ -28,6 +28,7 @@
#include "mace/core/tensor.h"
#include "mace/ops/activation.h"
#include "mace/ops/arm/deconv_2d_neon.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
...
...
@@ -362,7 +363,7 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
:
Deconv2dOpBase
(
context
)
{
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
Deconv2dKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
Deconv2dKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/depth_to_space.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/depth_to_space.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -97,7 +98,7 @@ class DepthToSpaceOp<DeviceType::GPU, T> : public Operation {
:
Operation
(
context
)
{
int
block_size
=
Operation
::
GetOptionalArg
<
int
>
(
"block_size"
,
1
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
DepthToSpaceKernel
<
T
>
(
block_size
)
);
kernel_
=
make_unique
<
opencl
::
image
::
DepthToSpaceKernel
<
T
>>
(
block_size
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/depthwise_conv2d.cc
浏览文件 @
dc364ddc
...
...
@@ -33,6 +33,7 @@
#include "mace/ops/arm/depthwise_conv2d_neon.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/public/mace.h"
#include "mace/utils/memory.h"
#include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
...
...
@@ -493,10 +494,10 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
DepthwiseConv2dKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
DepthwiseConv2dKernel
<
T
>>
(
);
}
else
{
mem_type
=
MemoryType
::
GPU_BUFFER
;
kernel_
.
reset
(
new
opencl
::
buffer
::
DepthwiseConv2dKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
buffer
::
DepthwiseConv2dKernel
<
T
>>
(
);
}
context
->
set_output_mem_type
(
mem_type
);
// Transform filter tensor to target format
...
...
mace/ops/depthwise_deconv2d.cc
浏览文件 @
dc364ddc
...
...
@@ -28,6 +28,7 @@
#include "mace/ops/arm/depthwise_deconv2d_neon.h"
#include "mace/utils/utils.h"
#include "mace/public/mace.h"
#include "mace/utils/memory.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/depthwise_deconv2d.h"
...
...
@@ -412,7 +413,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
:
Deconv2dOpBase
(
context
)
{
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
DepthwiseDeconv2dKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
DepthwiseDeconv2dKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/eltwise.cc
浏览文件 @
dc364ddc
...
...
@@ -30,6 +30,7 @@
#include "mace/core/future.h"
#include "mace/core/operator.h"
#include "mace/core/tensor.h"
#include "mace/utils/memory.h"
#include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
...
...
@@ -1160,8 +1161,8 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
EltwiseKernel
<
T
>
(
type
,
coeff
,
scalar_input
,
scalar_input_index
)
)
;
kernel_
=
make_unique
<
opencl
::
image
::
EltwiseKernel
<
T
>
>
(
type
,
coeff
,
scalar_input
,
scalar_input_index
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/fully_connected.cc
浏览文件 @
dc364ddc
...
...
@@ -38,6 +38,8 @@
#include "mace/ops/opencl/image/fully_connected.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -186,7 +188,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
FullyConnectedKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
FullyConnectedKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/lstm_cell.cc
浏览文件 @
dc364ddc
...
...
@@ -18,6 +18,7 @@
#include "mace/core/operator.h"
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/lstm_cell.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -36,7 +37,7 @@ class LSTMCellOp<DeviceType::GPU, T> : public Operation {
0.0
));
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
LSTMCellKernel
<
T
>
(
forget_bias
)
);
kernel_
=
make_unique
<
opencl
::
image
::
LSTMCellKernel
<
T
>>
(
forget_bias
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/opencl/buffer/conv_2d.h
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer/utils.h"
#include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -211,8 +212,8 @@ MaceStatus Conv2dKernel<T>::Compute(
old_scratch_size_
=
scratch
->
size
();
}
padded_input
.
reset
(
new
Tensor
(
scratch
->
Scratch
(
padded_input_size
),
input
->
dtype
())
)
;
padded_input
=
make_unique
<
Tensor
>
(
scratch
->
Scratch
(
padded_input_size
),
input
->
dtype
());
padded_input
->
Resize
(
padded_input_shape
);
PadInput
(
context
,
&
kernels_
[
0
],
input
,
pad_top
,
pad_left
,
...
...
mace/ops/opencl/buffer/depthwise_conv2d.h
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#include "mace/ops/opencl/buffer/utils.h"
#include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -165,8 +166,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
old_scratch_size_
=
scratch
->
size
();
}
padded_input
.
reset
(
new
Tensor
(
scratch
->
Scratch
(
padded_input_size
),
input
->
dtype
()
));
padded_input
=
make_unique
<
Tensor
>
(
scratch
->
Scratch
(
padded_input_size
),
input
->
dtype
(
));
padded_input
->
Resize
(
padded_input_shape
);
PadInput
(
context
,
&
kernels_
[
0
],
input
,
pad_top
,
pad_left
,
...
...
mace/ops/opencl/buffer/pooling.h
浏览文件 @
dc364ddc
...
...
@@ -24,6 +24,7 @@
#include "mace/ops/opencl/buffer/utils.h"
#include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -124,8 +125,8 @@ MaceStatus PoolingKernel<T>::Compute(
old_scratch_size_
=
scratch
->
size
();
}
padded_input
.
reset
(
new
Tensor
(
scratch
->
Scratch
(
padded_input_size
),
input
->
dtype
()
));
padded_input
=
make_unique
<
Tensor
>
(
scratch
->
Scratch
(
padded_input_size
),
input
->
dtype
(
));
padded_input
->
Resize
(
padded_input_shape
);
PadInput
(
context
,
&
kernels_
[
0
],
input
,
0
,
0
,
...
...
mace/ops/opencl/buffer_transformer.h
浏览文件 @
dc364ddc
...
...
@@ -24,6 +24,7 @@
#include "mace/ops/opencl/image/image_to_buffer.h"
#include "mace/ops/opencl/buffer/buffer_transform.h"
#include "mace/ops/common/transpose.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -34,11 +35,11 @@ class OpenCLBufferTransformer {
OpenCLBufferTransformer
(
const
MemoryType
in_mem_type
,
const
MemoryType
out_mem_type
)
{
if
(
out_mem_type
==
MemoryType
::
GPU_IMAGE
)
{
kernel_
.
reset
(
new
opencl
::
image
::
BufferToImage
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
BufferToImage
<
T
>>
(
);
}
else
if
(
in_mem_type
==
MemoryType
::
GPU_IMAGE
)
{
kernel_
.
reset
(
new
opencl
::
image
::
ImageToBuffer
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
ImageToBuffer
<
T
>>
(
);
}
else
{
kernel_
.
reset
(
new
opencl
::
buffer
::
BufferTransform
<
T
>
);
kernel_
=
make_unique
<
opencl
::
buffer
::
BufferTransform
<
T
>>
(
);
}
}
...
...
mace/ops/opencl/helper.h
浏览文件 @
dc364ddc
...
...
@@ -26,6 +26,7 @@
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#include "mace/core/types.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h"
namespace
mace
{
...
...
@@ -41,8 +42,8 @@ namespace ops {
#define MACE_OUT_OF_RANGE_INIT(kernel) \
if (runtime->IsOutOfRangeCheckEnabled()) { \
oorc_flag =
std::move(std::unique_ptr<Buffer>(
\
new Buffer((context)->device()->allocator())));
\
oorc_flag =
make_unique<Buffer>(
\
(context)->device()->allocator());
\
MACE_RETURN_IF_ERROR((oorc_flag)->Allocate(sizeof(int)));\
oorc_flag->Map(nullptr); \
*(oorc_flag->mutable_data<int>()) = 0; \
...
...
mace/ops/opencl/image/winograd_conv2d.cc
浏览文件 @
dc364ddc
...
...
@@ -17,6 +17,7 @@
#include "mace/ops/common/activation_type.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h"
namespace
mace
{
...
...
@@ -264,9 +265,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
OpenCLBufferType
::
IN_OUT_HEIGHT
,
&
t_input_image_shape
);
ScratchImage
transformed_input_image
(
scratch_manager
);
std
::
unique_ptr
<
Tensor
>
transformed_input
(
new
Tensor
(
std
::
unique_ptr
<
Tensor
>
transformed_input
=
make_unique
<
Tensor
>
(
transformed_input_image
.
Scratch
(
context
->
device
()
->
allocator
(),
t_input_image_shape
,
dt
),
dt
)
)
;
t_input_image_shape
,
dt
),
dt
);
MACE_RETURN_IF_ERROR
(
transformed_input
->
ResizeImage
(
t_input_shape
,
t_input_image_shape
));
MACE_RETURN_IF_ERROR
(
WinogradInputTransform
(
...
...
@@ -289,9 +290,9 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
&
mm_output_image_shape
);
ScratchImage
mm_output_image
(
scratch_manager
);
std
::
unique_ptr
<
Tensor
>
mm_output
(
new
Tensor
(
std
::
unique_ptr
<
Tensor
>
mm_output
=
make_unique
<
Tensor
>
(
mm_output_image
.
Scratch
(
context
->
device
()
->
allocator
(),
mm_output_image_shape
,
dt
),
dt
)
)
;
mm_output_image_shape
,
dt
),
dt
);
MACE_RETURN_IF_ERROR
(
mm_output
->
ResizeImage
(
mm_output_shape
,
mm_output_image_shape
));
...
...
mace/ops/opencl/out_of_range_check_test.cc
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/ops/opencl/helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -130,7 +131,8 @@ TEST(OutOfRangeCheckTest, RandomTest) {
index_t
channels
=
11
;
GPUContext
gpu_context
;
std
::
unique_ptr
<
Device
>
device
(
new
GPUDevice
(
gpu_context
.
opencl_tuner
()));
std
::
unique_ptr
<
Device
>
device
=
make_unique
<
GPUDevice
>
(
gpu_context
.
opencl_tuner
());
Workspace
ws
;
OpContext
context
(
&
ws
,
device
.
get
());
...
...
mace/ops/ops_test_util.cc
浏览文件 @
dc364ddc
...
...
@@ -14,6 +14,7 @@
#include "mace/ops/ops_test_util.h"
#include "mace/core/memory_optimizer.h"
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -120,17 +121,15 @@ OpTestContext *OpTestContext::Get(int num_threads,
OpTestContext
::
OpTestContext
(
int
num_threads
,
CPUAffinityPolicy
cpu_affinity_policy
,
bool
use_gemmlowp
)
:
gpu_context_
(
new
GPUContext
(
GetStoragePathFromEnv
())),
:
gpu_context_
(
std
::
make_shared
<
GPUContext
>
(
GetStoragePathFromEnv
())),
opencl_mem_types_
({
MemoryType
::
GPU_IMAGE
})
{
device_map_
[
DeviceType
::
CPU
]
=
std
::
unique_ptr
<
Device
>
(
new
CPUDevice
(
num_threads
,
cpu_affinity_policy
,
use_gemmlowp
));
device_map_
[
DeviceType
::
GPU
]
=
std
::
unique_ptr
<
Device
>
(
new
GPUDevice
(
gpu_context_
->
opencl_tuner
(),
gpu_context_
->
opencl_cache_storage
(),
GPUPriorityHint
::
PRIORITY_NORMAL
));
device_map_
[
DeviceType
::
CPU
]
=
make_unique
<
CPUDevice
>
(
num_threads
,
cpu_affinity_policy
,
use_gemmlowp
);
device_map_
[
DeviceType
::
GPU
]
=
make_unique
<
GPUDevice
>
(
gpu_context_
->
opencl_tuner
(),
gpu_context_
->
opencl_cache_storage
(),
GPUPriorityHint
::
PRIORITY_NORMAL
);
}
std
::
shared_ptr
<
GPUContext
>
OpTestContext
::
gpu_context
()
const
{
...
...
@@ -189,12 +188,12 @@ bool OpsTestNet::Setup(mace::DeviceType device) {
}
}
MemoryOptimizer
mem_optimizer
;
net_
=
std
::
unique_ptr
<
NetBase
>
(
new
SerialNet
(
net_
=
make_unique
<
SerialNet
>
(
op_registry_
.
get
(),
&
net_def
,
&
ws_
,
OpTestContext
::
Get
()
->
GetDevice
(
device
),
&
mem_optimizer
)
)
;
&
mem_optimizer
);
MaceStatus
status
=
(
ws_
.
PreallocateOutputTensor
(
net_def
,
&
mem_optimizer
,
...
...
@@ -236,12 +235,12 @@ MaceStatus OpsTestNet::RunNet(const mace::NetDef &net_def,
const
mace
::
DeviceType
device
)
{
device_type_
=
device
;
MemoryOptimizer
mem_optimizer
;
net_
=
std
::
unique_ptr
<
NetBase
>
(
new
SerialNet
(
net_
=
make_unique
<
SerialNet
>
(
op_registry_
.
get
(),
&
net_def
,
&
ws_
,
OpTestContext
::
Get
()
->
GetDevice
(
device
),
&
mem_optimizer
)
)
;
&
mem_optimizer
);
MACE_RETURN_IF_ERROR
(
ws_
.
PreallocateOutputTensor
(
net_def
,
&
mem_optimizer
,
...
...
mace/ops/ops_test_util.h
浏览文件 @
dc364ddc
...
...
@@ -34,6 +34,7 @@
#include "mace/core/workspace.h"
#include "mace/ops/ops_registry.h"
#include "mace/public/mace.h"
#include "mace/utils/memory.h"
#include "mace/utils/utils.h"
#include "mace/utils/quantize.h"
#include "mace/ops/testing/test_utils.h"
...
...
@@ -97,7 +98,7 @@ class OpTestContext {
class
OpsTestNet
{
public:
OpsTestNet
()
:
op_registry_
(
new
OpRegistry
())
{}
op_registry_
(
make_unique
<
OpRegistry
>
())
{}
template
<
DeviceType
D
,
typename
T
>
void
AddInputFromArray
(
const
std
::
string
&
name
,
...
...
@@ -355,9 +356,9 @@ class OpsTestNet {
std
::
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
=
{},
const
std
::
vector
<
T
>
&
data
=
{})
{
std
::
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
OpTestContext
::
Get
()
->
GetDevice
(
D
)
->
allocator
(),
DataTypeToEnum
<
T
>::
v
()
));
std
::
unique_ptr
<
Tensor
>
res
=
make_unique
<
Tensor
>
(
OpTestContext
::
Get
()
->
GetDevice
(
D
)
->
allocator
(),
DataTypeToEnum
<
T
>::
v
(
));
if
(
!
data
.
empty
())
{
res
->
Resize
(
shape
);
T
*
input_data
=
res
->
mutable_data
<
T
>
();
...
...
mace/ops/pad.cc
浏览文件 @
dc364ddc
...
...
@@ -20,6 +20,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/pad.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -182,8 +183,8 @@ class PadOp<DeviceType::GPU, T> : public Operation {
float
constant_value
=
Operation
::
GetOptionalArg
<
float
>
(
"constant_value"
,
0.0
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
PadKernel
<
T
>
(
type
,
paddings
,
constant_value
)
)
;
kernel_
=
make_unique
<
opencl
::
image
::
PadKernel
<
T
>
>
(
type
,
paddings
,
constant_value
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/pooling.cc
浏览文件 @
dc364ddc
...
...
@@ -32,6 +32,7 @@
#include "mace/ops/opencl/image/pooling.h"
#include "mace/ops/opencl/buffer/pooling.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -433,10 +434,10 @@ class PoolingOp<DeviceType::GPU, T> : public PoolingOpBase {
explicit
PoolingOp
(
OpConstructContext
*
context
)
:
PoolingOpBase
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
PoolingKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
PoolingKernel
<
T
>>
(
);
}
else
{
context
->
set_output_mem_type
(
MemoryType
::
GPU_BUFFER
);
kernel_
.
reset
(
new
opencl
::
buffer
::
PoolingKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
buffer
::
PoolingKernel
<
T
>>
(
);
}
}
MaceStatus
Run
(
OpContext
*
context
)
override
{
...
...
mace/ops/reduce.cc
浏览文件 @
dc364ddc
...
...
@@ -25,6 +25,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/reduce.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -847,9 +848,9 @@ class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase {
explicit
ReduceOp
(
OpConstructContext
*
context
)
:
ReduceOpBase
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ReduceKernel
<
T
>
(
reduce_type_
,
axis_
,
keep_dims_
)
);
kernel_
=
make_unique
<
opencl
::
image
::
ReduceKernel
<
T
>
>
(
reduce_type_
,
axis_
,
keep_dims_
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/resize_bicubic.cc
浏览文件 @
dc364ddc
...
...
@@ -23,6 +23,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_bicubic.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -197,9 +198,8 @@ class ResizeBicubicOp<DeviceType::GPU, T> : public Operation {
"size"
,
{
-
1
,
-
1
});
MACE_CHECK
(
size
.
size
()
==
2
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ResizeBicubicKernel
<
T
>
(
align_corners
,
size
[
0
],
size
[
1
]));
kernel_
=
make_unique
<
opencl
::
image
::
ResizeBicubicKernel
<
T
>>
(
align_corners
,
size
[
0
],
size
[
1
]);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/resize_bilinear.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#include <vector>
#include "mace/core/operator.h"
#include "mace/utils/memory.h"
#include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_bilinear.h"
...
...
@@ -332,9 +333,8 @@ class ResizeBilinearOp<DeviceType::GPU, T> : public Operation {
"size"
,
{
-
1
,
-
1
});
MACE_CHECK
(
size
.
size
()
==
2
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ResizeBilinearKernel
<
T
>
(
align_corners
,
size
[
0
],
size
[
1
]));
kernel_
=
make_unique
<
opencl
::
image
::
ResizeBilinearKernel
<
T
>>
(
align_corners
,
size
[
0
],
size
[
1
]);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/resize_nearest_neighbor.cc
浏览文件 @
dc364ddc
...
...
@@ -22,6 +22,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_nearest_neighbor.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -142,8 +143,8 @@ class ResizeNearestNeighborOp<DeviceType::GPU, T> : public Operation {
bool
align_corners
=
Operation
::
GetOptionalArg
<
bool
>
(
"align_corners"
,
false
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
ResizeNearestNeighborKernel
<
T
>
(
align_corners
)
)
;
kernel_
=
make_unique
<
opencl
::
image
::
ResizeNearestNeighborKernel
<
T
>
>
(
align_corners
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/sgemm.cc
浏览文件 @
dc364ddc
...
...
@@ -18,6 +18,7 @@
#include "mace/ops/sgemm.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/utils/memory.h"
#if defined(MACE_ENABLE_NEON)
#include <arm_neon.h>
...
...
@@ -55,27 +56,27 @@ void SGemm::operator()(const SGemmMatrixMap<const float> &lhs,
scratch_buffer
->
GrowSize
(
total_size
*
sizeof
(
float
));
if
(
!
lhs
.
is_const
())
{
packed_lhs_
.
reset
(
new
Tensor
(
scratch_buffer
->
Scratch
(
lhs
.
size
()
*
sizeof
(
float
)),
DT_FLOAT
)
)
;
packed_lhs_
=
make_unique
<
Tensor
>
(
scratch_buffer
->
Scratch
(
lhs
.
size
()
*
sizeof
(
float
)),
DT_FLOAT
);
}
if
(
!
rhs
.
is_const
())
{
packed_rhs_
.
reset
(
new
Tensor
(
scratch_buffer
->
Scratch
(
rhs
.
size
()
*
sizeof
(
float
)),
DT_FLOAT
)
)
;
packed_rhs_
=
make_unique
<
Tensor
>
(
scratch_buffer
->
Scratch
(
rhs
.
size
()
*
sizeof
(
float
)),
DT_FLOAT
);
}
packed_result_
.
reset
(
new
Tensor
(
scratch_buffer
->
Scratch
(
result
->
size
()
*
sizeof
(
float
)),
DT_FLOAT
)
)
;
packed_result_
=
make_unique
<
Tensor
>
(
scratch_buffer
->
Scratch
(
result
->
size
()
*
sizeof
(
float
)),
DT_FLOAT
);
}
if
(
packed_lhs_
.
get
()
==
nullptr
)
{
packed_lhs_
.
reset
(
new
Tensor
(
GetCPUAllocator
(),
DT_FLOAT
)
);
packed_lhs_
=
make_unique
<
Tensor
>
(
GetCPUAllocator
(),
DT_FLOAT
);
packed_lhs_
->
Resize
({
lhs
.
size
()});
}
if
(
packed_rhs_
.
get
()
==
nullptr
)
{
packed_rhs_
.
reset
(
new
Tensor
(
GetCPUAllocator
(),
DT_FLOAT
)
);
packed_rhs_
=
make_unique
<
Tensor
>
(
GetCPUAllocator
(),
DT_FLOAT
);
packed_rhs_
->
Resize
({
rhs
.
size
()});
}
if
(
packed_result_
.
get
()
==
nullptr
)
{
packed_result_
.
reset
(
new
Tensor
(
GetCPUAllocator
(),
DT_FLOAT
)
);
packed_result_
=
make_unique
<
Tensor
>
(
GetCPUAllocator
(),
DT_FLOAT
);
packed_result_
->
Resize
({
result
->
size
()});
}
...
...
mace/ops/softmax.cc
浏览文件 @
dc364ddc
...
...
@@ -30,6 +30,8 @@
#include "mace/ops/opencl/buffer/softmax.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -374,10 +376,10 @@ class SoftmaxOp<DeviceType::GPU, T> : public Operation {
explicit
SoftmaxOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SoftmaxKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
SoftmaxKernel
<
T
>>
(
);
}
else
{
context
->
set_output_mem_type
(
MemoryType
::
GPU_BUFFER
);
kernel_
.
reset
(
new
opencl
::
buffer
::
SoftmaxKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
buffer
::
SoftmaxKernel
<
T
>>
(
);
}
}
MaceStatus
Run
(
OpContext
*
context
)
override
{
...
...
mace/ops/space_to_batch.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/space_to_batch.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -309,7 +310,7 @@ class SpaceToBatchNDOp<DeviceType::GPU, T> : public SpaceToBatchOpBase {
explicit
SpaceToBatchNDOp
(
OpConstructContext
*
context
)
:
SpaceToBatchOpBase
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SpaceToBatchKernel
<
T
>
);
kernel_
=
make_unique
<
opencl
::
image
::
SpaceToBatchKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/space_to_depth.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/space_to_depth.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -95,7 +96,7 @@ class SpaceToDepthOp<DeviceType::GPU, T> : public Operation {
:
Operation
(
context
)
{
int
block_size
=
Operation
::
GetOptionalArg
<
int
>
(
"block_size"
,
1
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SpaceToDepthKernel
<
T
>
(
block_size
)
);
kernel_
=
make_unique
<
opencl
::
image
::
SpaceToDepthKernel
<
T
>>
(
block_size
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/split.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/split.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -108,7 +109,7 @@ class SplitOp<DeviceType::GPU, T> : public Operation {
:
Operation
(
context
)
{
int32_t
axis
=
Operation
::
GetOptionalArg
<
int
>
(
"axis"
,
3
);
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SplitKernel
<
T
>
(
axis
)
);
kernel_
=
make_unique
<
opencl
::
image
::
SplitKernel
<
T
>>
(
axis
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/sqrdiff_mean.cc
浏览文件 @
dc364ddc
...
...
@@ -19,6 +19,7 @@
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/sqrdiff_mean.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -83,7 +84,7 @@ class SqrDiffMeanOp<DeviceType::GPU, T> : public Operation {
explicit
SqrDiffMeanOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
kernel_
.
reset
(
new
opencl
::
image
::
SqrDiffMeanKernel
<
T
>
()
);
kernel_
=
make_unique
<
opencl
::
image
::
SqrDiffMeanKernel
<
T
>>
(
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/utils/memory.h
0 → 100644
浏览文件 @
dc364ddc
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_UTILS_MEMORY_H_
#define MACE_UTILS_MEMORY_H_
#include <memory>
#include <utility>
namespace
mace
{
namespace
memory_internal
{
// Traits to select proper overload and return type for `make_unique<>`.
template
<
typename
T
>
struct
MakeUniqueResult
{
using
scalar
=
std
::
unique_ptr
<
T
>
;
};
template
<
typename
T
>
struct
MakeUniqueResult
<
T
[]
>
{
using
array
=
std
::
unique_ptr
<
T
[]
>
;
};
template
<
typename
T
,
size_t
N
>
struct
MakeUniqueResult
<
T
[
N
]
>
{
using
invalid
=
void
;
};
}
// namespace memory_internal
// gcc 4.8 has __cplusplus at 201301 but doesn't define make_unique. Other
// supported compilers either just define __cplusplus as 201103 but have
// make_unique (msvc), or have make_unique whenever __cplusplus > 201103 (clang)
#if (__cplusplus > 201103L || defined(_MSC_VER)) && \
!(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
using
std
::
make_unique
;
#else
// `make_unique` overload for non-array types.
template
<
typename
T
,
typename
...
Args
>
typename
memory_internal
::
MakeUniqueResult
<
T
>::
scalar
make_unique
(
Args
&&
...
args
)
{
return
std
::
unique_ptr
<
T
>
(
new
T
(
std
::
forward
<
Args
>
(
args
)...));
}
// `make_unique` overload for an array T[] of unknown bounds.
// The array allocation needs to use the `new T[size]` form and cannot take
// element constructor arguments. The `std::unique_ptr` will manage destructing
// these array elements.
template
<
typename
T
>
typename
memory_internal
::
MakeUniqueResult
<
T
>::
array
make_unique
(
size_t
n
)
{
return
std
::
unique_ptr
<
T
>
(
new
typename
std
::
remove_extent
<
T
>::
type
[
n
]());
}
// `make_unique` overload for an array T[N] of known bounds.
// This construction will be rejected.
template
<
typename
T
,
typename
...
Args
>
typename
memory_internal
::
MakeUniqueResult
<
T
>::
invalid
make_unique
(
Args
&&
...
/* args */
)
=
delete
;
#endif
}
// namespace mace
#endif // MACE_UTILS_MEMORY_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录