Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
bc576436
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
bc576436
编写于
1月 08, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix bugs of batch norm and graph transformation.
上级
286d1e21
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
448 addition
and
38 deletion
+448
-38
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+21
-13
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+5
-7
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+1
-1
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+2
-3
mace/ops/folded_batch_norm.h
mace/ops/folded_batch_norm.h
+3
-5
mace/ops/folded_batch_norm_test.cc
mace/ops/folded_batch_norm_test.cc
+393
-0
mace/python/tools/tf_converter.py
mace/python/tools/tf_converter.py
+10
-1
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+13
-8
未找到文件。
mace/kernels/batch_norm.h
浏览文件 @
bc576436
...
...
@@ -12,16 +12,26 @@
namespace
mace
{
namespace
kernels
{
struct
BatchNormFunctorBase
{
BatchNormFunctorBase
(
bool
folded_constant
,
bool
fused_relu
)
:
folded_constant_
(
folded_constant
),
fused_relu_
(
fused_relu
){}
const
bool
folded_constant_
;
const
bool
fused_relu_
;
};
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
{
struct
BatchNormFunctor
:
BatchNormFunctorBase
{
BatchNormFunctor
(
const
bool
folded_constant
,
const
bool
fused_relu
)
:
BatchNormFunctorBase
(
folded_constant
,
fused_relu
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
...
...
@@ -45,17 +55,17 @@ struct BatchNormFunctor {
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
vector
<
T
>
new_scale
;
vector
<
T
>
new_offset
;
if
(
!
folded_constant
)
{
if
(
!
folded_constant
_
)
{
new_scale
.
resize
(
channels
);
new_offset
.
resize
(
channels
);
Tensor
::
MappingGuard
mean_mapper
(
mean
);
Tensor
::
MappingGuard
var_mapper
(
var
);
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
new_scale
[
c
]
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
epsilon
);
...
...
@@ -70,12 +80,12 @@ struct BatchNormFunctor {
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
if
(
folded_constant
)
{
if
(
folded_constant
_
)
{
output_ptr
[
pos
]
=
scale_ptr
[
c
]
*
input_ptr
[
pos
]
+
offset_ptr
[
c
];
}
else
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
}
if
(
fused_relu
)
{
if
(
fused_relu
_
)
{
output_ptr
[
pos
]
=
std
::
max
(
output_ptr
[
pos
],
static_cast
<
T
>
(
0
));
}
++
pos
;
...
...
@@ -94,21 +104,19 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
);
template
<
typename
T
>
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>
{
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>
:
BatchNormFunctorBase
{
BatchNormFunctor
(
const
bool
folded_constant
,
const
bool
fused_relu
)
:
BatchNormFunctorBase
(
folded_constant
,
fused_relu
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
);
};
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
bc576436
...
...
@@ -20,11 +20,9 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_CHECK
(
folded_constant
||
(
mean
!=
nullptr
&&
var
!=
nullptr
));
MACE_CHECK
(
folded_constant
_
||
(
mean
!=
nullptr
&&
var
!=
nullptr
));
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
...
...
@@ -38,10 +36,10 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
if
(
folded_constant
)
{
if
(
folded_constant
_
)
{
built_options
.
emplace
(
"-DFOLDED_CONSTANT"
);
}
if
(
fused_relu
)
{
if
(
fused_relu
_
)
{
built_options
.
emplace
(
"-DFUSED_RELU"
);
}
auto
bm_kernel
=
runtime
->
BuildKernel
(
"batch_norm"
,
"batch_norm"
,
built_options
);
...
...
@@ -50,7 +48,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
scale
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
offset
->
buffer
())));
if
(
!
folded_constant
)
{
if
(
!
folded_constant
_
)
{
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
epsilon
);
...
...
@@ -103,7 +101,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
)
<<
"_"
<<
folded_constant
;
<<
folded_constant
_
;
OpenCLProfilingTimer
timer
(
&
event
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
ss
.
str
(),
lws
,
...
...
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
bc576436
...
...
@@ -25,7 +25,7 @@ __kernel void batch_norm(__read_only image2d_t input,
//
native_rsqrt
seems
not
faster
than
rsqrt
DATA_TYPE4
bn_scale
=
scale_value
*
rsqrt
(
var_value
+
(
DATA_TYPE4
)
epsilon
)
;
DATA_TYPE4
bn_offset
=
mad
(
0
-
mean_value,
new
_scale,
offset_value
)
;
DATA_TYPE4
bn_offset
=
mad
(
0
-
mean_value,
bn
_scale,
offset_value
)
;
#
endif
const
int
pos
=
mad24
(
ch_blk,
width,
w
)
;
...
...
mace/ops/batch_norm.h
浏览文件 @
bc576436
...
...
@@ -14,7 +14,7 @@ template <DeviceType D, class T>
class
BatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
,
functor_
(
false
,
false
)
{
epsilon_
=
OperatorBase
::
GetSingleArgument
<
float
>
(
"epsilon"
,
static_cast
<
float
>
(
1e-4
));
}
...
...
@@ -40,8 +40,7 @@ class BatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
output
->
ResizeLike
(
input
);
functor_
(
input
,
scale
,
offset
,
mean
,
var
,
epsilon_
,
false
,
false
,
output
,
future
);
functor_
(
input
,
scale
,
offset
,
mean
,
var
,
epsilon_
,
output
,
future
);
return
true
;
}
...
...
mace/ops/folded_batch_norm.h
浏览文件 @
bc576436
...
...
@@ -14,8 +14,8 @@ template <DeviceType D, class T>
class
FoldedBatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
FoldedBatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
fused_relu_
=
OperatorBase
::
GetSingleArgument
<
bool
>
(
"fused_relu"
,
false
);
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
,
functor_
(
true
,
OperatorBase
::
GetSingleArgument
<
bool
>
(
"fused_relu"
,
false
))
{
}
bool
Run
(
StatsFuture
*
future
)
override
{
...
...
@@ -33,13 +33,11 @@ class FoldedBatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
output
->
ResizeLike
(
input
);
functor_
(
input
,
scale
,
offset
,
nullptr
,
nullptr
,
0
,
true
,
fused_relu_
,
output
,
future
);
functor_
(
input
,
scale
,
offset
,
nullptr
,
nullptr
,
0
,
output
,
future
);
return
true
;
}
private:
bool
fused_relu_
;
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
protected:
...
...
mace/ops/folded_batch_norm_test.cc
0 → 100644
浏览文件 @
bc576436
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
class
FoldedBatchNormOpTest
:
public
OpsTestBase
{};
void
CalculateScaleOffset
(
const
std
::
vector
<
float
>
&
gamma
,
const
std
::
vector
<
float
>
&
beta
,
const
std
::
vector
<
float
>
&
mean
,
const
std
::
vector
<
float
>
&
var
,
const
float
epsilon
,
std
::
vector
<
float
>
&
scale
,
std
::
vector
<
float
>
&
offset
)
{
size_t
size
=
gamma
.
size
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
scale
[
i
]
=
gamma
[
i
]
/
std
::
sqrt
(
var
[
i
]
+
epsilon
);
offset
[
i
]
=
offset
[
i
]
-
mean
[
i
]
*
scale
[
i
];
}
}
template
<
DeviceType
D
>
void
Simple
()
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
std
::
vector
<
float
>
scale
(
1
);
std
::
vector
<
float
>
offset
(
1
);
CalculateScaleOffset
({
4.0
f
},
{
2.0
},
{
10
},
{
11.67
f
},
1e-3
,
scale
,
offset
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
scale
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
offset
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
6
,
2
,
1
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
FoldedBatchNormOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
();
}
/*
TEST_F(FoldedBatchNormOpTest, SimpleNEON) {
Simple<DeviceType::NEON>();
}
*/
TEST_F
(
FoldedBatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
/*
TEST_F(FoldedBatchNormOpTest, SimpleRandomNeon) {
srand(time(NULL));
// generate random input
index_t batch = 1 + rand() % 10;
index_t channels = 3 + rand() % 50;
index_t height = 64;
index_t width = 64;
// Construct graph
OpsTestNet net;
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Var", {channels}, true);
net.AddInputFromArray<DeviceType::CPU, float>("Epsilon", {}, {1e-3});
// run cpu
net.RunOp();
// Check
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// Run NEON
net.RunOp(DeviceType::NEON);
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 1e-2);
}
TEST_F(FoldedBatchNormOpTest, ComplexRandomNeon) {
srand(time(NULL));
// generate random input
index_t batch = 1 + rand() % 10;
index_t channels = 3 + rand() % 50;
index_t height = 103;
index_t width = 113;
// Construct graph
OpsTestNet net;
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Var", {channels}, true);
net.AddInputFromArray<DeviceType::CPU, float>("Epsilon", {}, {1e-3});
// run cpu
net.RunOp();
// Check
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// Run NEON
net.RunOp(DeviceType::NEON);
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 1e-2);
}
*/
TEST_F
(
FoldedBatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
TEST_F
(
FoldedBatchNormOpTest
,
SimpleRandomHalfOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataType
::
DT_HALF
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
TEST_F
(
FoldedBatchNormOpTest
,
ComplexRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
TEST_F
(
FoldedBatchNormOpTest
,
ComplexRandomHalfOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FoldedBatchNorm"
,
"FoldedBatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataType
::
DT_HALF
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
}
mace/python/tools/tf_converter.py
浏览文件 @
bc576436
...
...
@@ -39,6 +39,13 @@ def main(unused_args):
f
.
write
(
str
(
output_graph_def
))
print
(
"Model conversion is completed."
)
def
str2bool
(
v
):
if
v
.
lower
()
in
(
'yes'
,
'true'
,
't'
,
'y'
,
'1'
):
return
True
elif
v
.
lower
()
in
(
'no'
,
'false'
,
'f'
,
'n'
,
'0'
):
return
False
else
:
raise
argparse
.
ArgumentTypeError
(
'Boolean value expected.'
)
def
parse_args
():
"""Parses command line arguments."""
...
...
@@ -91,7 +98,9 @@ def parse_args():
help
=
"template path"
)
parser
.
add_argument
(
"--confuse"
,
type
=
bool
,
type
=
str2bool
,
nargs
=
'?'
,
const
=
False
,
default
=
False
,
help
=
"confuse model names"
)
parser
.
add_argument
(
...
...
mace/python/tools/tf_converter_lib.py
浏览文件 @
bc576436
...
...
@@ -228,10 +228,19 @@ class TFConverter(object):
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
'FoldedBatchNorm'
gamma_tensor
=
get_input_tensor
(
op
,
1
)
gamma_value
=
gamma_tensor
.
eval
().
astype
(
np
.
float32
)
for
i
in
range
(
1
,
5
):
input_tensor
=
get_input_tensor
(
op
,
i
)
assert
input_tensor
.
shape
==
gamma_tensor
.
shape
self
.
unused_tensor
.
add
(
input_tensor
.
name
)
gamma_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
float32
)
beta_value
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
float32
)
mean_value
=
get_input_tensor
(
op
,
3
).
eval
().
astype
(
np
.
float32
)
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
...
...
@@ -241,9 +250,8 @@ class TFConverter(object):
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
name_prefix
=
op
.
inputs
[
1
].
name
idx
=
name_prefix
.
rfind
(
'/'
)
name_prefix
=
op
.
inputs
[
1
].
name
[:
idx
]
+
'/'
idx
=
gamma_tensor
.
name
.
rfind
(
'/'
)
name_prefix
=
gamma_tensor
.
name
[:
idx
]
+
'/'
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
scale_value
)
...
...
@@ -259,8 +267,8 @@ class TFConverter(object):
op_def
.
input
.
extend
([
input
.
name
for
input
in
input_names
])
self
.
resolved_ops
[
op
.
name
]
=
1
final_op
=
op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Relu'
:
relu_op
=
self
.
tf_graph
[
op
.
name
][
0
]
final_op
=
relu_op
...
...
@@ -272,9 +280,6 @@ class TFConverter(object):
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
data_format_arg
.
s
=
'NHWC'
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_batchnorm
(
self
,
op
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录