Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
344a9fea
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
344a9fea
编写于
10月 18, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add empty conv2d opencl kernel
上级
ebff986d
变更
17
显示空白变更内容
内联
并排
Showing
17 changed file
with
235 addition
and
116 deletion
+235
-116
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+1
-1
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+50
-31
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+3
-3
mace/core/runtime/opencl/opencl_smoketest.cc
mace/core/runtime/opencl/opencl_smoketest.cc
+1
-1
mace/core/runtime/opencl/opencl_wrapper.cc
mace/core/runtime/opencl/opencl_wrapper.cc
+2
-2
mace/core/tensor.h
mace/core/tensor.h
+10
-9
mace/examples/mace_run.cc
mace/examples/mace_run.cc
+1
-1
mace/kernels/BUILD
mace/kernels/BUILD
+3
-2
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+37
-33
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+27
-16
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+51
-0
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+36
-0
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+2
-0
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+2
-9
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+4
-3
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+4
-4
mace/tools/benchmark/benchmark_model.cc
mace/tools/benchmark/benchmark_model.cc
+1
-1
未找到文件。
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
344a9fea
...
@@ -31,7 +31,7 @@ void OpenCLAllocator::Delete(void *buffer) {
...
@@ -31,7 +31,7 @@ void OpenCLAllocator::Delete(void *buffer) {
void
*
OpenCLAllocator
::
Map
(
void
*
buffer
,
size_t
nbytes
)
{
void
*
OpenCLAllocator
::
Map
(
void
*
buffer
,
size_t
nbytes
)
{
auto
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
auto
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
auto
queue
=
OpenCLRuntime
::
Get
()
->
command_queue
();
auto
queue
=
OpenCLRuntime
::
Get
()
->
command_queue
();
// TODO
(heliangliang) Non-blocking call
// TODO(heliangliang) Non-blocking call
cl_int
error
;
cl_int
error
;
void
*
mapped_ptr
=
void
*
mapped_ptr
=
queue
.
enqueueMapBuffer
(
*
cl_buffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
queue
.
enqueueMapBuffer
(
*
cl_buffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
344a9fea
...
@@ -4,8 +4,12 @@
...
@@ -4,8 +4,12 @@
#include <cstdlib>
#include <cstdlib>
#include <fstream>
#include <fstream>
#include <memory>
#include <mutex>
#include <mutex>
#include <dirent.h>
#include <errno.h>
#include "mace/core/logging.h"
#include "mace/core/logging.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
...
@@ -13,8 +17,7 @@
...
@@ -13,8 +17,7 @@
namespace
mace
{
namespace
mace
{
namespace
{
namespace
{
bool
ReadSourceFile
(
const
char
*
filename
,
std
::
string
*
content
)
{
bool
ReadSourceFile
(
const
std
::
string
&
filename
,
std
::
string
*
content
)
{
MACE_CHECK_NOTNULL
(
filename
);
MACE_CHECK_NOTNULL
(
content
);
MACE_CHECK_NOTNULL
(
content
);
*
content
=
""
;
*
content
=
""
;
std
::
ifstream
ifs
(
filename
,
std
::
ifstream
::
in
);
std
::
ifstream
ifs
(
filename
,
std
::
ifstream
::
in
);
...
@@ -31,26 +34,50 @@ bool ReadSourceFile(const char *filename, std::string *content) {
...
@@ -31,26 +34,50 @@ bool ReadSourceFile(const char *filename, std::string *content) {
}
}
bool
BuildProgram
(
OpenCLRuntime
*
runtime
,
bool
BuildProgram
(
OpenCLRuntime
*
runtime
,
const
char
*
filename
,
const
std
::
string
&
path
,
cl
::
Program
*
program
)
{
cl
::
Program
*
program
)
{
MACE_CHECK_NOTNULL
(
filename
);
MACE_CHECK_NOTNULL
(
program
);
MACE_CHECK_NOTNULL
(
program
);
std
::
string
kernel_code
;
auto
closer
=
[](
DIR
*
d
)
{
if
(
!
ReadSourceFile
(
filename
,
&
kernel_code
))
{
if
(
d
!=
nullptr
)
closedir
(
d
);
LOG
(
ERROR
)
<<
"Failed to read kernel source "
<<
filename
;
}
;
return
false
;
std
::
unique_ptr
<
DIR
,
decltype
(
closer
)
>
dir
(
opendir
(
path
.
c_str
()),
closer
)
;
}
MACE_CHECK_NOTNULL
(
dir
.
get
());
const
std
::
string
kSourceSuffix
=
".cl"
;
cl
::
Program
::
Sources
sources
;
cl
::
Program
::
Sources
sources
;
sources
.
push_back
({
kernel_code
.
c_str
(),
kernel_code
.
length
()});
errno
=
0
;
dirent
*
entry
=
readdir
(
dir
.
get
());
MACE_CHECK
(
errno
==
0
);
while
(
entry
!=
nullptr
)
{
if
(
entry
->
d_type
==
DT_REG
)
{
std
::
string
d_name
(
entry
->
d_name
);
if
(
d_name
.
size
()
>
kSourceSuffix
.
size
()
&&
d_name
.
compare
(
d_name
.
size
()
-
kSourceSuffix
.
size
(),
kSourceSuffix
.
size
(),
kSourceSuffix
)
==
0
)
{
std
::
string
filename
=
path
+
d_name
;
std
::
string
kernel_source
;
MACE_CHECK
(
ReadSourceFile
(
filename
,
&
kernel_source
));
sources
.
push_back
({
kernel_source
.
c_str
(),
kernel_source
.
length
()});
}
}
entry
=
readdir
(
dir
.
get
());
MACE_CHECK
(
errno
==
0
);
};
*
program
=
cl
::
Program
(
runtime
->
context
(),
sources
);
*
program
=
cl
::
Program
(
runtime
->
context
(),
sources
);
if
(
program
->
build
({
runtime
->
device
()})
!=
CL_SUCCESS
)
{
std
::
string
build_options
=
"-Werror -cl-mad-enable -I"
+
path
;
LOG
(
INFO
)
<<
"Error building: "
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
<<
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
runtime
->
device
());
if
(
program
->
build
({
runtime
->
device
()},
build_options
.
c_str
())
!=
CL_SUCCESS
)
{
return
false
;
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
runtime
->
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
build_log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
runtime
->
device
());
LOG
(
INFO
)
<<
"Program build log: "
<<
build_log
;
}
}
LOG
(
FATAL
)
<<
"Build program failed"
;
}
return
true
;
return
true
;
}
}
...
@@ -123,24 +150,16 @@ cl::Device &OpenCLRuntime::device() { return device_; }
...
@@ -123,24 +150,16 @@ cl::Device &OpenCLRuntime::device() { return device_; }
cl
::
CommandQueue
&
OpenCLRuntime
::
command_queue
()
{
return
command_queue_
;
}
cl
::
CommandQueue
&
OpenCLRuntime
::
command_queue
()
{
return
command_queue_
;
}
cl
::
Program
OpenCLRuntime
::
GetProgram
(
const
std
::
string
&
name
)
{
cl
::
Program
&
OpenCLRuntime
::
program
()
{
static
const
char
*
kernel_source_path
=
getenv
(
"MACE_KERNEL_SOURCE_PATH"
);
// TODO(heliangliang) Support binary format
std
::
string
filename
=
name
;
static
const
char
*
kernel_path
=
getenv
(
"MACE_KERNEL_PATH"
);
if
(
kernel_source_path
!=
nullptr
)
{
std
::
string
path
(
kernel_path
==
nullptr
?
""
:
kernel_path
);
filename
=
kernel_source_path
+
name
;
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
program_lock_
);
std
::
call_once
(
build_flag_
,
[
this
,
&
path
]()
{
// TODO (heliangliang) Support binary format
MACE_CHECK
(
BuildProgram
(
this
,
path
,
&
program_
));
auto
iter
=
programs_
.
find
(
name
);
});
if
(
iter
!=
programs_
.
end
())
{
return
iter
->
second
;
return
program_
;
}
else
{
cl
::
Program
program
;
MACE_CHECK
(
BuildProgram
(
this
,
filename
.
c_str
(),
&
program
));
programs_
.
emplace
(
name
,
program
);
return
program
;
}
}
}
}
// namespace mace
}
// namespace mace
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
344a9fea
...
@@ -28,14 +28,14 @@ class OpenCLRuntime {
...
@@ -28,14 +28,14 @@ class OpenCLRuntime {
cl
::
Context
&
context
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
cl
::
CommandQueue
&
command_queue
();
cl
::
Program
GetProgram
(
const
std
::
string
&
name
);
cl
::
Program
&
program
(
);
private:
private:
cl
::
Context
context_
;
cl
::
Context
context_
;
cl
::
CommandQueue
command_queue_
;
cl
::
CommandQueue
command_queue_
;
cl
::
Device
device_
;
cl
::
Device
device_
;
std
::
map
<
std
::
string
,
cl
::
Program
>
programs
_
;
cl
::
Program
program
_
;
std
::
mutex
program_lock
_
;
std
::
once_flag
build_flag
_
;
};
};
}
// namespace mace
}
// namespace mace
...
...
mace/core/runtime/opencl/opencl_smoketest.cc
浏览文件 @
344a9fea
...
@@ -40,7 +40,7 @@ int main() {
...
@@ -40,7 +40,7 @@ int main() {
step
[
0
]
=
step_size
;
step
[
0
]
=
step_size
;
}
}
auto
program
=
runtime
->
GetProgram
(
"simple_add.cl"
);
auto
program
=
runtime
->
program
(
);
auto
simple_add
=
auto
simple_add
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
>
(
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
>
(
...
...
mace/core/runtime/opencl/opencl_wrapper.cc
浏览文件 @
344a9fea
...
@@ -195,8 +195,8 @@ OpenCLLibraryImpl &OpenCLLibraryImpl::Get() {
...
@@ -195,8 +195,8 @@ OpenCLLibraryImpl &OpenCLLibraryImpl::Get() {
bool
OpenCLLibraryImpl
::
Load
()
{
bool
OpenCLLibraryImpl
::
Load
()
{
if
(
loaded
())
return
true
;
if
(
loaded
())
return
true
;
// TODO
(heliangliang) Make this configurable
// TODO(heliangliang) Make this configurable
// TODO
(heliangliang) Benchmark 64 bit overhead
// TODO(heliangliang) Benchmark 64 bit overhead
static
const
std
::
vector
<
std
::
string
>
paths
=
{
static
const
std
::
vector
<
std
::
string
>
paths
=
{
#if defined(__aarch64__)
#if defined(__aarch64__)
// Qualcomm Adreno
// Qualcomm Adreno
...
...
mace/core/tensor.h
浏览文件 @
344a9fea
...
@@ -88,7 +88,7 @@ class Tensor {
...
@@ -88,7 +88,7 @@ class Tensor {
* Map the device buffer as CPU buffer to access the data, unmap must be
* Map the device buffer as CPU buffer to access the data, unmap must be
* called later
* called later
*/
*/
inline
void
Map
()
{
inline
void
Map
()
const
{
if
(
!
OnHost
())
{
if
(
!
OnHost
())
{
MACE_CHECK
(
buffer_
!=
nullptr
&&
data_
==
nullptr
);
MACE_CHECK
(
buffer_
!=
nullptr
&&
data_
==
nullptr
);
data_
=
alloc_
->
Map
(
buffer_
,
size_
*
SizeOfType
());
data_
=
alloc_
->
Map
(
buffer_
,
size_
*
SizeOfType
());
...
@@ -98,7 +98,7 @@ class Tensor {
...
@@ -98,7 +98,7 @@ class Tensor {
/*
/*
* Unmap the device buffer
* Unmap the device buffer
*/
*/
inline
void
Unmap
()
{
inline
void
Unmap
()
const
{
if
(
!
OnHost
())
{
if
(
!
OnHost
())
{
MACE_CHECK
(
buffer_
!=
nullptr
&&
data_
!=
nullptr
);
MACE_CHECK
(
buffer_
!=
nullptr
&&
data_
!=
nullptr
);
alloc_
->
Unmap
(
buffer_
,
data_
);
alloc_
->
Unmap
(
buffer_
,
data_
);
...
@@ -187,7 +187,7 @@ class Tensor {
...
@@ -187,7 +187,7 @@ class Tensor {
LOG
(
INFO
)
<<
os
.
str
();
LOG
(
INFO
)
<<
os
.
str
();
}
}
inline
size_t
SizeOfType
()
{
inline
size_t
SizeOfType
()
const
{
size_t
type_size
=
0
;
size_t
type_size
=
0
;
CASES
(
dtype_
,
type_size
=
sizeof
(
T
));
CASES
(
dtype_
,
type_size
=
sizeof
(
T
));
return
type_size
;
return
type_size
;
...
@@ -203,14 +203,15 @@ class Tensor {
...
@@ -203,14 +203,15 @@ class Tensor {
class
MappingGuard
{
class
MappingGuard
{
public:
public:
MappingGuard
(
Tensor
*
tensor
)
:
tensor_
(
tensor
)
{
MappingGuard
(
const
Tensor
*
tensor
)
:
tensor_
(
tensor
)
{
MACE_ASSERT
(
tensor_
!=
nullptr
);
if
(
tensor_
!=
nullptr
)
tensor_
->
Map
();
tensor_
->
Map
();
}
~
MappingGuard
()
{
if
(
tensor_
!=
nullptr
)
tensor_
->
Unmap
();
}
}
~
MappingGuard
()
{
tensor_
->
Unmap
();
}
private:
private:
Tensor
*
tensor_
;
const
Tensor
*
tensor_
;
};
};
private:
private:
...
@@ -233,7 +234,7 @@ class Tensor {
...
@@ -233,7 +234,7 @@ class Tensor {
// read or write
// read or write
void
*
buffer_
;
void
*
buffer_
;
// Mapped buffer
// Mapped buffer
void
*
data_
;
mutable
void
*
data_
;
vector
<
index_t
>
shape_
;
vector
<
index_t
>
shape_
;
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
...
...
mace/examples/mace_run.cc
浏览文件 @
344a9fea
...
@@ -84,7 +84,7 @@ int main(int argc, char **argv) {
...
@@ -84,7 +84,7 @@ int main(int argc, char **argv) {
Workspace
ws
;
Workspace
ws
;
ws
.
LoadModelTensor
(
net_def
,
DeviceType
::
CPU
);
ws
.
LoadModelTensor
(
net_def
,
DeviceType
::
CPU
);
Tensor
*
input_tensor
=
Tensor
*
input_tensor
=
ws
.
CreateTensor
(
input_node
+
":0"
,
cpu_allocator
(
),
DT_FLOAT
);
ws
.
CreateTensor
(
input_node
+
":0"
,
GetDeviceAllocator
(
DeviceType
::
CPU
),
DT_FLOAT
);
input_tensor
->
Resize
(
shape
);
input_tensor
->
Resize
(
shape
);
float
*
input_data
=
input_tensor
->
mutable_data
<
float
>
();
float
*
input_data
=
input_tensor
->
mutable_data
<
float
>
();
...
...
mace/kernels/BUILD
浏览文件 @
344a9fea
...
@@ -11,8 +11,8 @@ load("//mace:mace.bzl", "if_android")
...
@@ -11,8 +11,8 @@ load("//mace:mace.bzl", "if_android")
cc_library
(
cc_library
(
name
=
"kernels"
,
name
=
"kernels"
,
srcs
=
glob
([
"*.cc"
])
+
if_android
(
glob
([
"neon/*.cc"
])),
srcs
=
glob
([
"*.cc"
])
+
if_android
(
glob
([
"neon/*.cc"
,
"opencl/*.cc"
])),
hdrs
=
glob
([
"*.h"
])
+
if_android
(
glob
([
"neon/*.h"
])),
hdrs
=
glob
([
"*.h"
])
+
if_android
(
glob
([
"neon/*.h"
,
"opencl/*.h"
])),
copts
=
[
copts
=
[
"-std=c++11"
,
"-std=c++11"
,
"-fopenmp"
,
"-fopenmp"
,
...
@@ -20,6 +20,7 @@ cc_library(
...
@@ -20,6 +20,7 @@ cc_library(
linkopts
=
if_android
([
"-lm"
]),
linkopts
=
if_android
([
"-lm"
]),
deps
=
[
deps
=
[
"//mace/core"
,
"//mace/core"
,
"//mace/core:opencl_runtime"
,
"//mace/utils"
,
"//mace/utils"
,
],
],
)
)
...
...
mace/kernels/conv_2d.h
浏览文件 @
344a9fea
...
@@ -19,27 +19,26 @@ struct Conv2dFunctor {
...
@@ -19,27 +19,26 @@ struct Conv2dFunctor {
const
int
*
dilations
)
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
// NCHW
void
operator
()(
const
Tensor
*
input
,
const
index_t
*
input_shape
,
const
Tensor
*
filter
,
const
T
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
Tensor
*
bias
,
const
index_t
*
filter_shape
,
Tensor
*
output
)
{
const
T
*
bias
,
// c_out
MACE_CHECK_NOTNULL
(
input
);
T
*
output
,
// NCHW
MACE_CHECK_NOTNULL
(
filter
);
const
index_t
*
output_shape
)
{
MACE_CHECK_NOTNULL
(
output
);
MACE_CHECK_NOTNULL
(
output
);
index_t
batch
=
output
_shape
[
0
];
index_t
batch
=
output
->
shape
()
[
0
];
index_t
channels
=
output
_shape
[
1
];
index_t
channels
=
output
->
shape
()
[
1
];
index_t
height
=
output
_shape
[
2
];
index_t
height
=
output
->
shape
()
[
2
];
index_t
width
=
output
_shape
[
3
];
index_t
width
=
output
->
shape
()
[
3
];
index_t
input_batch
=
input
_shape
[
0
];
index_t
input_batch
=
input
->
shape
()
[
0
];
index_t
input_channels
=
input
_shape
[
1
];
index_t
input_channels
=
input
->
shape
()
[
1
];
index_t
input_height
=
input
_shape
[
2
];
index_t
input_height
=
input
->
shape
()
[
2
];
index_t
input_width
=
input
_shape
[
3
];
index_t
input_width
=
input
->
shape
()
[
3
];
index_t
kernel_h
=
filter
_shape
[
2
];
index_t
kernel_h
=
filter
->
shape
()
[
2
];
index_t
kernel_w
=
filter
_shape
[
3
];
index_t
kernel_w
=
filter
->
shape
()
[
3
];
int
stride_h
=
strides_
[
0
];
int
stride_h
=
strides_
[
0
];
int
stride_w
=
strides_
[
1
];
int
stride_w
=
strides_
[
1
];
...
@@ -57,17 +56,26 @@ struct Conv2dFunctor {
...
@@ -57,17 +56,26 @@ struct Conv2dFunctor {
index_t
kernel_size
=
input_channels
*
kernel_h
*
kernel_w
;
index_t
kernel_size
=
input_channels
*
kernel_h
*
kernel_w
;
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
filter_mapper
(
filter
);
Tensor
::
MappingGuard
bias_mapper
(
bias
);
Tensor
::
MappingGuard
output_mapper
(
output
);
auto
input_data
=
input
->
data
<
T
>
();
auto
filter_data
=
filter
->
data
<
T
>
();
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
T
>
();
auto
output_data
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for collapse(2)
#pragma omp parallel for collapse(2)
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
T
bias_channel
=
bias
?
bias
[
c
]
:
0
;
T
bias_channel
=
bias
_data
?
bias_data
[
c
]
:
0
;
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
index_t
offset
=
n
*
channels
*
height
*
width
+
index_t
offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
+
h
*
width
+
w
;
c
*
height
*
width
+
h
*
width
+
w
;
output
[
offset
]
=
bias_channel
;
output
_data
[
offset
]
=
bias_channel
;
T
sum
=
0
;
T
sum
=
0
;
const
T
*
filter_ptr
=
filter
+
c
*
kernel_size
;
const
T
*
filter_ptr
=
filter
_data
+
c
*
kernel_size
;
for
(
int
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
for
(
int
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
...
@@ -86,13 +94,13 @@ struct Conv2dFunctor {
...
@@ -86,13 +94,13 @@ struct Conv2dFunctor {
n
*
input_channels
*
input_height
*
input_width
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
+
inh
*
input_width
+
inc
*
input_height
*
input_width
+
inh
*
input_width
+
inw
;
inw
;
sum
+=
input
[
input_offset
]
*
*
filter_ptr
;
sum
+=
input
_data
[
input_offset
]
*
*
filter_ptr
;
}
}
++
filter_ptr
;
++
filter_ptr
;
}
}
}
}
}
}
output
[
offset
]
+=
sum
;
output
_data
[
offset
]
+=
sum
;
}
}
}
}
}
}
...
@@ -105,14 +113,10 @@ struct Conv2dFunctor {
...
@@ -105,14 +113,10 @@ struct Conv2dFunctor {
};
};
template
<
>
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
Tensor
*
input
,
const
float
*
input
,
const
Tensor
*
filter
,
const
index_t
*
input_shape
,
const
Tensor
*
bias
,
const
float
*
filter
,
Tensor
*
output
);
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
...
...
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
344a9fea
...
@@ -41,14 +41,14 @@ extern void Conv2dNeonK5x5S1(const float *input,
...
@@ -41,14 +41,14 @@ extern void Conv2dNeonK5x5S1(const float *input,
const
index_t
*
output_shape
);
const
index_t
*
output_shape
);
template
<
>
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
Tensor
*
input
,
const
float
*
input
,
const
Tensor
*
filter
,
const
index_t
*
input_shape
,
const
Tensor
*
bias
,
const
float
*
filter
,
Tensor
*
output
)
{
const
index_t
*
filter_shape
,
MACE_CHECK_NOTNULL
(
input
);
const
float
*
bias
,
MACE_CHECK_NOTNULL
(
filter
);
float
*
output
,
MACE_CHECK_NOTNULL
(
output
);
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
...
@@ -61,8 +61,8 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(
...
@@ -61,8 +61,8 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK5x5S1
,
nullptr
}};
{
Conv2dNeonK5x5S1
,
nullptr
}};
// not implement yet
// not implement yet
index_t
kernel_h
=
filter
_shape
[
2
];
index_t
kernel_h
=
filter
->
shape
()
[
2
];
index_t
kernel_w
=
filter
_shape
[
3
];
index_t
kernel_w
=
filter
->
shape
()
[
3
];
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
...
@@ -71,21 +71,32 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(
...
@@ -71,21 +71,32 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
" is not implemented yet, using slow version"
;
<<
" is not implemented yet, using slow version"
;
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
input
,
filter
,
bias
,
output
);
return
;
return
;
}
}
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
filter_mapper
(
filter
);
Tensor
::
MappingGuard
bias_mapper
(
bias
);
Tensor
::
MappingGuard
output_mapper
(
output
);
auto
input_data
=
input
->
data
<
float
>
();
auto
input_shape
=
input
->
shape
().
data
();
auto
filter_data
=
filter
->
data
<
float
>
();
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
float
>
();
auto
output_data
=
output
->
mutable_data
<
float
>
();
auto
output_shape
=
output
->
shape
().
data
();
// Keep this alive during kernel execution
// Keep this alive during kernel execution
Tensor
padded_input
;
Tensor
padded_input
;
if
(
paddings_
[
0
]
>
0
||
paddings_
[
1
]
>
0
)
{
if
(
paddings_
[
0
]
>
0
||
paddings_
[
1
]
>
0
)
{
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
.
data
(),
ConstructInputWithPadding
(
input
_data
,
input
->
shape
()
.
data
(),
&
padded_input
);
paddings_
.
data
(),
&
padded_input
);
input
=
padded_input
.
data
<
float
>
();
input
_data
=
padded_input
.
data
<
float
>
();
input_shape
=
padded_input
.
shape
().
data
();
input_shape
=
padded_input
.
shape
().
data
();
}
}
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
input
,
input_shape
,
filter
,
nullptr
,
bias
,
output
,
conv2d_neon_func
(
input
_data
,
input_shape
,
filter_data
,
nullptr
,
output_shape
);
bias_data
,
output_data
,
output_shape
);
}
}
}
// namespace kernels
}
// namespace kernels
...
...
mace/kernels/opencl/conv_2d_opencl.cc
0 → 100644
浏览文件 @
344a9fea
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/conv_2d.h"
#include "mace/kernels/conv_pool_2d_util.h"
namespace
mace
{
namespace
kernels
{
extern
void
Conv2dOpenclK1x1S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
);
template
<
>
void
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
)
{
typedef
void
(
*
Conv2dOpenclFunction
)(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dOpenclFunction
selector
[
5
][
2
]
=
{
{
Conv2dOpenclK1x1S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
}};
index_t
kernel_h
=
filter
->
shape
()[
2
];
index_t
kernel_w
=
filter
->
shape
()[
3
];
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
LOG
(
WARNING
)
<<
"OpenCL conv2d kernel with "
<<
"filter"
<<
kernel_h
<<
"x"
<<
kernel_w
<<
","
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
" is not implemented yet, using slow version"
;
// TODO(heliangliang) The CPU/NEON kernel should map the buffer
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
filter
,
bias
,
output
);
return
;
}
MACE_CHECK
(
paddings_
[
0
]
==
1
&&
paddings_
[
1
]
==
1
,
"Padding not supported"
);
auto
conv2d_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_func
(
input
,
filter
,
bias
,
output
);
}
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/conv_2d_opencl_1x1.cc
0 → 100644
浏览文件 @
344a9fea
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/common.h"
#include "mace/kernels/conv_2d.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
static
constexpr
index_t
kInputChannelBlockSize
=
2
;
static
constexpr
index_t
kOutputChannelBlockSize
=
4
;
extern
void
Conv2dOpenclK1x1S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
shape
()[
0
];
const
index_t
channels
=
output
->
shape
()[
1
];
const
index_t
height
=
output
->
shape
()[
2
];
const
index_t
width
=
output
->
shape
()[
3
];
const
index_t
input_batch
=
input
->
shape
()[
0
];
const
index_t
input_channels
=
input
->
shape
()[
1
];
const
index_t
input_height
=
input
->
shape
()[
2
];
const
index_t
input_width
=
input
->
shape
()[
3
];
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
const
index_t
total_pixels
=
height
*
width
;
const
index_t
round_up_channels
=
RoundUp
(
channels
,
kOutputChannelBlockSize
);
};
}
// namespace kernels
}
// namespace mace
mace/ops/conv_2d.cc
浏览文件 @
344a9fea
...
@@ -12,4 +12,6 @@ REGISTER_CPU_OPERATOR(Conv2D, Conv2dOp<DeviceType::CPU, float>);
...
@@ -12,4 +12,6 @@ REGISTER_CPU_OPERATOR(Conv2D, Conv2dOp<DeviceType::CPU, float>);
REGISTER_NEON_OPERATOR
(
Conv2D
,
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_NEON_OPERATOR
(
Conv2D
,
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
#endif // __ARM_NEON
REGISTER_OPENCL_OPERATOR
(
Conv2D
,
Conv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
}
// namespace mace
}
// namespace mace
mace/ops/conv_2d.h
浏览文件 @
344a9fea
...
@@ -25,12 +25,7 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
...
@@ -25,12 +25,7 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
bool
Run
()
override
{
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
T
*
bias_data
=
nullptr
;
const
Tensor
*
bias
=
this
->
InputSize
()
>=
3
?
this
->
Input
(
BIAS
)
:
nullptr
;
if
(
this
->
InputSize
()
>=
3
)
{
const
Tensor
*
bias
=
this
->
Input
(
BIAS
);
bias_data
=
bias
->
data
<
T
>
();
}
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
output_shape
(
4
);
...
@@ -42,9 +37,7 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
...
@@ -42,9 +37,7 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
output
->
Resize
(
output_shape
);
output
->
Resize
(
output_shape
);
functor_
.
paddings_
=
paddings
;
functor_
.
paddings_
=
paddings
;
functor_
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
filter
->
data
<
T
>
(),
functor_
(
input
,
filter
,
bias
,
output
);
filter
->
shape
().
data
(),
bias_data
,
output
->
mutable_data
<
T
>
(),
output
->
shape
().
data
());
return
true
;
return
true
;
}
}
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
344a9fea
...
@@ -69,7 +69,8 @@ static void Conv2d(int iters,
...
@@ -69,7 +69,8 @@ static void Conv2d(int iters,
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
1
,
1
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
1
,
1
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// Test bad alignments
BM_CONV_2D
(
1
,
64
,
33
,
31
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// Test bad alignments
...
...
mace/ops/ops_test_util.h
浏览文件 @
344a9fea
...
@@ -48,7 +48,7 @@ class OpsTestNet {
...
@@ -48,7 +48,7 @@ class OpsTestNet {
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
const
std
::
vector
<
T
>
&
data
)
{
Tensor
*
input
=
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(
),
DataTypeToEnum
<
T
>::
v
());
ws_
.
CreateTensor
(
name
,
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
input
->
Resize
(
shape
);
T
*
input_data
=
input
->
mutable_data
<
T
>
();
T
*
input_data
=
input
->
mutable_data
<
T
>
();
MACE_CHECK
(
static_cast
<
size_t
>
(
input
->
size
())
==
data
.
size
());
MACE_CHECK
(
static_cast
<
size_t
>
(
input
->
size
())
==
data
.
size
());
...
@@ -60,7 +60,7 @@ class OpsTestNet {
...
@@ -60,7 +60,7 @@ class OpsTestNet {
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
index_t
>
&
shape
,
const
T
data
)
{
const
T
data
)
{
Tensor
*
input
=
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(
),
DataTypeToEnum
<
T
>::
v
());
ws_
.
CreateTensor
(
name
,
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
input
->
Resize
(
shape
);
T
*
input_data
=
input
->
mutable_data
<
T
>
();
T
*
input_data
=
input
->
mutable_data
<
T
>
();
std
::
fill
(
input_data
,
input_data
+
input
->
size
(),
data
);
std
::
fill
(
input_data
,
input_data
+
input
->
size
(),
data
);
...
@@ -71,7 +71,7 @@ class OpsTestNet {
...
@@ -71,7 +71,7 @@ class OpsTestNet {
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
bool
positive
=
false
)
{
Tensor
*
input
=
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(
),
DataTypeToEnum
<
T
>::
v
());
ws_
.
CreateTensor
(
name
,
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
T
>
();
float
*
input_data
=
input
->
mutable_data
<
T
>
();
...
@@ -206,7 +206,7 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
...
@@ -206,7 +206,7 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
template
<
typename
T
>
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
const
std
::
vector
<
T
>
&
data
)
{
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
cpu_allocator
(
),
DataTypeToEnum
<
T
>::
v
()));
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
()));
res
->
Resize
(
shape
);
res
->
Resize
(
shape
);
T
*
input_data
=
res
->
mutable_data
<
T
>
();
T
*
input_data
=
res
->
mutable_data
<
T
>
();
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
...
...
mace/tools/benchmark/benchmark_model.cc
浏览文件 @
344a9fea
...
@@ -269,7 +269,7 @@ int Main(int argc, char **argv) {
...
@@ -269,7 +269,7 @@ int Main(int argc, char **argv) {
// Load inputs
// Load inputs
for
(
size_t
i
=
0
;
i
<
inputs_count
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inputs_count
;
++
i
)
{
Tensor
*
input_tensor
=
Tensor
*
input_tensor
=
ws
.
CreateTensor
(
input_layers
[
i
],
cpu_allocator
(
),
DT_FLOAT
);
ws
.
CreateTensor
(
input_layers
[
i
],
GetDeviceAllocator
(
DeviceType
::
CPU
),
DT_FLOAT
);
vector
<
index_t
>
shapes
;
vector
<
index_t
>
shapes
;
str_util
::
SplitAndParseToInts
(
input_layer_shapes
[
i
],
','
,
&
shapes
);
str_util
::
SplitAndParseToInts
(
input_layer_shapes
[
i
],
','
,
&
shapes
);
input_tensor
->
Resize
(
shapes
);
input_tensor
->
Resize
(
shapes
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录