Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
3671880b
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3671880b
编写于
9月 07, 2017
作者:
L
Liangliang He
提交者:
wuchenghui
9月 11, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add Conv2d implementation
上级
e0ac4622
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
438 addition
and
37 deletion
+438
-37
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+118
-0
mace/ops/BUILD
mace/ops/BUILD
+12
-0
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+5
-20
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+32
-17
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+144
-0
mace/ops/conv_pool_2d_base.h
mace/ops/conv_pool_2d_base.h
+83
-0
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+44
-0
未找到文件。
mace/kernels/conv_2d.h
0 → 100644
浏览文件 @
3671880b
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_CONV_2D_H_
#define MACE_KERNELS_CONV_2D_H_
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
class
Conv2dFunctor
{
public:
Conv2dFunctor
(
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
T
*
filter
,
// kernel_h, kernel_w, c_in, c_out
const
index_t
*
filter_shape
,
const
T
*
bias
,
// c_out
T
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
MACE_CHECK_NOTNULL
(
output
);
index_t
batch
=
output_shape
[
0
];
index_t
channels
=
output_shape
[
1
];
index_t
height
=
output_shape
[
2
];
index_t
width
=
output_shape
[
3
];
index_t
input_batch
=
input_shape
[
0
];
index_t
input_channels
=
input_shape
[
1
];
index_t
input_height
=
input_shape
[
2
];
index_t
input_width
=
input_shape
[
3
];
int
kernel_h
=
filter_shape
[
0
];
int
kernel_w
=
filter_shape
[
1
];
int
stride_h
=
strides_
[
0
];
int
stride_w
=
strides_
[
1
];
int
dilation_h
=
dilations_
[
0
];
int
dilation_w
=
dilations_
[
1
];
MACE_CHECK
(
batch
==
input_batch
,
"Input/Output batch size mismatch"
);
// The left-upper most offset of the padded input
int
padded_h_start
=
0
-
paddings_
[
0
];
int
padded_w_start
=
0
-
paddings_
[
1
];
int
padded_h_stop
=
input_height
+
paddings_
[
0
];
int
padded_w_stop
=
input_width
+
paddings_
[
1
];
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
#pragma omp parallel for
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
index_t
offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
+
h
*
width
+
w
;
T
sum
=
0
;
for
(
int
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
/*
* TODO The tensorflow filter order is HWCiCo.
* We should consider other order for different
* implementaion to optimize memory access.
*/
int
filter_offset
=
kh
*
kernel_w
*
input_channels
*
channels
+
kw
*
input_channels
*
channels
+
inc
*
channels
+
c
;
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
<
0
||
inh
>=
input_height
||
inw
<
0
||
inw
>=
input_width
)
{
MACE_CHECK
(
inh
>=
padded_h_start
&&
inh
<
padded_h_stop
&&
inw
>=
padded_w_start
&&
inw
<
padded_w_stop
,
"Out of range read from input: "
,
inh
,
", "
,
inw
);
// else padding with 0:
// sum += 0;
}
else
{
index_t
input_offset
=
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
+
inh
*
input_width
+
inw
;
sum
+=
input
[
input_offset
]
*
filter
[
filter_offset
];
}
}
}
output
[
offset
]
=
sum
+
bias
[
c
];
}
}
}
}
}
}
private:
const
int
*
strides_
;
// [stride_h, stride_w]
const
int
*
paddings_
;
// [padding_h, padding_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
};
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_CONV_2D_H_
mace/ops/BUILD
浏览文件 @
3671880b
...
...
@@ -83,3 +83,15 @@ cc_test(
"@gtest//:gtest_main"
,
],
)
cc_test
(
name
=
"conv_2d_test"
,
srcs
=
[
"conv_2d_test.cc"
,],
deps
=
[
":ops"
,
":test"
,
"@gtest//:gtest_main"
,
],
copts
=
[
'-std=c++11'
],
linkstatic
=
1
,
)
mace/ops/conv_2d.cc
浏览文件 @
3671880b
...
...
@@ -7,25 +7,10 @@
namespace
mace
{
template
<
>
bool
Conv2dOp
<
DeviceType
::
CPU
,
float
>::
Run
()
{
const
Tensor
*
input
=
Input
(
INPUT
);
const
Tensor
*
filter
=
Input
(
FILTER
);
const
Tensor
*
bias
=
Input
(
BIAS
);
Tensor
*
output
=
Output
(
OUTPUT
);
// Test
VLOG
(
0
)
<<
"conv_2d(["
<<
kernels_
[
0
]
<<
", "
<<
kernels_
[
1
]
<<
"], )"
;
const
float
*
input_data
=
input
->
data
<
float
>
();
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
VLOG
(
0
)
<<
input_data
[
i
];
}
return
true
;
}
REGISTER_CPU_OPERATOR
(
Conv2d
,
Conv2dOp
<
DeviceType
::
CPU
,
float
>
);
}
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
Conv2d
,
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
}
// namespace mace
mace/ops/conv_2d.h
浏览文件 @
3671880b
...
...
@@ -5,28 +5,43 @@
#ifndef MACE_OPS_CONV_2D_H_
#define MACE_OPS_CONV_2D_H_
#include <memory>
#include "mace/core/operator.h"
#include "mace/kernels/conv_2d.h"
#include "mace/ops/conv_pool_2d_base.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
class
Conv2dOp
:
public
Operator
<
D
,
T
>
{
template
<
DeviceType
D
,
typename
T
>
class
Conv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
Conv2dOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
kernels_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"kernels"
)),
strides_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"strides"
)),
paddings_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"paddings"
)),
dilations_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"dilations"
))
{}
bool
Run
()
override
;
private:
vector
<
int
>
kernels_
;
vector
<
int
>
strides_
;
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
Conv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
)
{};
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
bias
=
this
->
Input
(
BIAS
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
;
std
::
vector
<
int
>
paddings
;
this
->
CalcPaddingAndOutputSize
(
input
,
filter
,
&
output_shape
,
&
paddings
);
output
->
Resize
(
output_shape
);
auto
conv2d
=
kernels
::
Conv2dFunctor
<
D
,
T
>
(
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
conv2d
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
filter
->
data
<
T
>
(),
filter
->
shape
().
data
(),
bias
->
data
<
T
>
(),
output
->
mutable_data
<
T
>
(),
output
->
shape
().
data
());
return
true
;
}
protected:
OP_INPUT_TAGS
(
INPUT
,
FILTER
,
BIAS
);
OP_OUTPUT_TAGS
(
OUTPUT
);
};
...
...
mace/ops/conv_2d_test.cc
0 → 100644
浏览文件 @
3671880b
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/conv_2d.h"
using
namespace
mace
;
class
Conv2dOpTest
:
public
OpsTestBase
{};
TEST_F
(
Conv2dOpTest
,
Simple_VALID
)
{
// Construct graph
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
operator_def
());
// Add args
AddIntsArg
(
"strides"
,
{
1
,
1
});
AddIntArg
(
"padding"
,
static_cast
<
int
>
(
Conv2dOp
<
DeviceType
::
CPU
,
float
>::
Padding
::
VALID
));
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
AddInputFromArray
<
float
>
(
"Filter"
,
{
3
,
3
,
2
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
AddInputFromArray
<
float
>
(
"Bias"
,
{
1
},
{
0.1
f
});
// Run
RunOp
();
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
1
},
{
18.1
f
});
ExpectTensorNear
<
float
>
(
expected
,
*
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
Conv2dOpTest
,
Simple_SAME
)
{
// Construct graph
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
operator_def
());
// Add args
AddIntsArg
(
"strides"
,
{
1
,
1
});
AddIntArg
(
"padding"
,
static_cast
<
int
>
(
Conv2dOp
<
DeviceType
::
CPU
,
float
>::
Padding
::
SAME
));
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
AddInputFromArray
<
float
>
(
"Filter"
,
{
3
,
3
,
2
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
AddInputFromArray
<
float
>
(
"Bias"
,
{
1
},
{
0.1
f
});
// Run
RunOp
();
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
});
ExpectTensorNear
<
float
>
(
expected
,
*
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
Conv2dOpTest
,
Combined
)
{
// Construct graph
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
operator_def
());
// Add args
AddIntsArg
(
"strides"
,
{
2
,
2
});
AddIntArg
(
"padding"
,
static_cast
<
int
>
(
Conv2dOp
<
DeviceType
::
CPU
,
float
>::
Padding
::
SAME
));
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
5
,
5
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
AddInputFromArray
<
float
>
(
"Filter"
,
{
3
,
3
,
2
,
2
},
{
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
});
AddInputFromArray
<
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
RunOp
();
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
,
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
ExpectTensorNear
<
float
>
(
expected
,
*
GetOutput
(
"Output"
),
0.001
);
}
// TODO we need more tests
mace/ops/conv_pool_2d_base.h
0 → 100644
浏览文件 @
3671880b
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_CONV_POOL_2D_BASE_H_
#define MACE_OPS_CONV_POOL_2D_BASE_H_
#include "mace/core/operator.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
class
ConvPool2dOpBase
:
public
Operator
<
D
,
T
>
{
public:
ConvPool2dOpBase
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
strides_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"strides"
)),
padding_
(
static_cast
<
Padding
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
dilations_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"dilations"
))
{}
void
CalcPaddingAndOutputSize
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
std
::
vector
<
index_t
>*
output_shape
,
std
::
vector
<
int
>*
padding_size
)
{
MACE_CHECK
(
dilations_
[
0
]
>
0
&&
dilations_
[
1
]
>
0
,
"Invalid dilations, must >= 1"
);
/*
* Convlution/pooling arithmetic:
* o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1
* For details, see https://arxiv.org/pdf/1603.07285.pdf or
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
*/
auto
&
input_shape
=
input
->
shape
();
auto
&
filter_shape
=
filter
->
shape
();
// HWIO
int
kernel_h
=
filter_shape
[
0
];
int
kernel_w
=
filter_shape
[
1
];
int
output_channel
=
filter_shape
[
3
];
MACE_CHECK
(
input_shape
[
1
]
==
filter_shape
[
2
],
input_shape
[
1
],
" != "
,
filter_shape
[
2
]);
*
padding_size
=
{
0
,
0
};
switch
(
padding_
)
{
case
VALID
:
break
;
case
SAME
:
(
*
padding_size
)[
0
]
=
kernel_h
/
2
;
(
*
padding_size
)[
1
]
=
kernel_w
/
2
;
break
;
case
FULL
:
(
*
padding_size
)[
0
]
=
kernel_h
-
1
;
(
*
padding_size
)[
1
]
=
kernel_w
-
1
;
break
;
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding_
);
}
*
output_shape
=
std
::
vector
<
index_t
>
(
4
);
// NCHW
(
*
output_shape
)[
0
]
=
input_shape
[
0
];
(
*
output_shape
)[
1
]
=
output_channel
;
(
*
output_shape
)[
2
]
=
(
input_shape
[
2
]
+
2
*
(
*
padding_size
)[
0
]
-
kernel_h
-
(
kernel_h
-
1
)
*
(
dilations_
[
0
]
-
1
))
/
strides_
[
0
]
+
1
;
(
*
output_shape
)[
3
]
=
(
input_shape
[
3
]
+
2
*
(
*
padding_size
)[
1
]
-
kernel_w
-
(
kernel_w
-
1
)
*
(
dilations_
[
1
]
-
1
))
/
strides_
[
1
]
+
1
;
}
enum
Padding
{
VALID
=
0
,
// No padding
SAME
=
1
,
// Pads with half the filter size (rounded down) on both sides
FULL
=
2
,
// Pads with one less than the filter size on both sides
};
protected:
std
::
vector
<
int
>
strides_
;
Padding
padding_
;
std
::
vector
<
int
>
dilations_
;
};
}
// namespace mace
#endif // MACE_OPS_CONV_POOL_2D_BASE_H_
mace/ops/ops_test_util.h
浏览文件 @
3671880b
...
...
@@ -5,6 +5,8 @@
#ifndef MACE_OPS_TEST_UTIL_H_
#define MACE_OPS_TEST_UTIL_H_
#include <type_traits>
#include "gtest/gtest.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
...
...
@@ -50,6 +52,48 @@ class OpsTestBase : public ::testing::Test {
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
}
void
AddIntArg
(
const
char
*
name
,
const
int
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_i
(
value
);
}
void
AddFloatArg
(
const
char
*
name
,
const
float
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_f
(
value
);
}
void
AddStringArg
(
const
char
*
name
,
const
char
*
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_s
(
value
);
}
void
AddIntsArg
(
const
char
*
name
,
const
std
::
vector
<
int
>&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_ints
(
value
);
}
}
void
AddFloatsArg
(
const
char
*
name
,
const
std
::
vector
<
float
>&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_floats
(
value
);
}
}
void
AddStringsArg
(
const
char
*
name
,
const
std
::
vector
<
const
char
*>&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_strings
(
value
);
}
}
OperatorDef
*
operator_def
()
{
return
&
op_def_
;
}
bool
RunOp
()
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录