Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
286d1e21
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
286d1e21
编写于
1月 07, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add folded batch norm to combine batchnorm and relu.
上级
2d50eea4
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
216 addition
and
38 deletion
+216
-38
mace/core/operator.cc
mace/core/operator.cc
+2
-0
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+28
-13
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+18
-4
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+15
-3
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+9
-7
mace/ops/folded_batch_norm.cc
mace/ops/folded_batch_norm.cc
+37
-0
mace/ops/folded_batch_norm.h
mace/ops/folded_batch_norm.h
+52
-0
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+54
-10
tools/validate_gcn.sh
tools/validate_gcn.sh
+1
-1
未找到文件。
mace/core/operator.cc
浏览文件 @
286d1e21
...
...
@@ -76,6 +76,7 @@ extern void Register_Relu(OperatorRegistry *op_registry);
extern
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
);
extern
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Softmax
(
OperatorRegistry
*
op_registry
);
extern
void
Register_FoldedBatchNorm
(
OperatorRegistry
*
op_registry
);
OperatorRegistry
::
OperatorRegistry
()
{
Register_AddN
(
this
);
...
...
@@ -95,6 +96,7 @@ OperatorRegistry::OperatorRegistry() {
Register_ResizeBilinear
(
this
);
Register_SpaceToBatchND
(
this
);
Register_Softmax
(
this
);
Register_FoldedBatchNorm
(
this
);
}
}
// namespace mace
mace/kernels/batch_norm.h
浏览文件 @
286d1e21
...
...
@@ -14,13 +14,14 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
{
float
epsilon_
;
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
...
...
@@ -39,8 +40,6 @@ struct BatchNormFunctor {
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
scale_mapper
(
scale
);
Tensor
::
MappingGuard
offset_mapper
(
offset
);
Tensor
::
MappingGuard
mean_mapper
(
mean
);
Tensor
::
MappingGuard
var_mapper
(
var
);
Tensor
::
MappingGuard
output_mapper
(
output
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
...
...
@@ -50,13 +49,18 @@ struct BatchNormFunctor {
const
T
*
var_ptr
=
var
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
vector
<
T
>
new_scale
(
channels
);
vector
<
T
>
new_offset
(
channels
);
vector
<
T
>
new_scale
;
vector
<
T
>
new_offset
;
if
(
!
folded_constant
)
{
new_scale
.
resize
(
channels
);
new_offset
.
resize
(
channels
);
Tensor
::
MappingGuard
mean_mapper
(
mean
);
Tensor
::
MappingGuard
var_mapper
(
var
);
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
new_scale
[
c
]
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
epsilon_
);
new_offset
[
c
]
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
[
c
];
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
new_scale
[
c
]
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
epsilon
);
new_offset
[
c
]
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
[
c
];
}
}
index_t
pos
=
0
;
...
...
@@ -66,7 +70,14 @@ struct BatchNormFunctor {
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
if
(
folded_constant
)
{
output_ptr
[
pos
]
=
scale_ptr
[
c
]
*
input_ptr
[
pos
]
+
offset_ptr
[
c
];
}
else
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
}
if
(
fused_relu
)
{
output_ptr
[
pos
]
=
std
::
max
(
output_ptr
[
pos
],
static_cast
<
T
>
(
0
));
}
++
pos
;
}
}
...
...
@@ -82,18 +93,22 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
);
template
<
typename
T
>
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>
{
float
epsilon_
;
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
);
};
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
286d1e21
...
...
@@ -19,8 +19,13 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
float
epsilon
,
const
bool
folded_constant
,
const
bool
fused_relu
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_CHECK
(
folded_constant
||
(
mean
!=
nullptr
&&
var
!=
nullptr
));
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
...
...
@@ -33,15 +38,23 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
if
(
folded_constant
)
{
built_options
.
emplace
(
"-DFOLDED_CONSTANT"
);
}
if
(
fused_relu
)
{
built_options
.
emplace
(
"-DFUSED_RELU"
);
}
auto
bm_kernel
=
runtime
->
BuildKernel
(
"batch_norm"
,
"batch_norm"
,
built_options
);
uint32_t
idx
=
0
;
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
scale
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
offset
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
epsilon_
);
if
(
!
folded_constant
)
{
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
epsilon
);
}
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
@@ -89,7 +102,8 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
<<
output
->
dim
(
3
)
<<
"_"
<<
folded_constant
;
OpenCLProfilingTimer
timer
(
&
event
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
ss
.
str
(),
lws
,
...
...
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
286d1e21
...
...
@@ -3,27 +3,39 @@
__kernel
void
batch_norm
(
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
#
ifndef
FOLDED_CONSTANT
__read_only
image2d_t
mean,
__read_only
image2d_t
var,
__private
const
float
epsilon,
#
endif
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
width
=
get_global_size
(
1
)
;
#
ifdef
FOLDED_CONSTANT
DATA_TYPE4
bn_scale
=
READ_IMAGET
(
scale,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
bn_offset
=
READ_IMAGET
(
offset,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
#
else
DATA_TYPE4
scale_value
=
READ_IMAGET
(
scale,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
offset_value
=
READ_IMAGET
(
offset,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
mean_value
=
READ_IMAGET
(
mean,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
var_value
=
READ_IMAGET
(
var,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
//
native_rsqrt
seems
not
faster
than
rsqrt
DATA_TYPE4
new_scale
=
scale_value
*
rsqrt
(
var_value
+
(
DATA_TYPE4
)
epsilon
)
;
DATA_TYPE4
new_offset
=
mad
(
0
-
mean_value,
new_scale,
offset_value
)
;
DATA_TYPE4
bn_scale
=
scale_value
*
rsqrt
(
var_value
+
(
DATA_TYPE4
)
epsilon
)
;
DATA_TYPE4
bn_offset
=
mad
(
0
-
mean_value,
new_scale,
offset_value
)
;
#
endif
const
int
pos
=
mad24
(
ch_blk,
width,
w
)
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
mad
(
in,
new_scale,
new_offset
)
;
DATA_TYPE4
out
=
mad
(
in,
bn_scale,
bn_offset
)
;
#
ifdef
FUSED_RELU
out
=
fmax
(
out,
0
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
}
mace/ops/batch_norm.h
浏览文件 @
286d1e21
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_BATCH_NORM_H_
#define MACE_BATCH_NORM_H_
#ifndef MACE_
OPS_
BATCH_NORM_H_
#define MACE_
OPS_
BATCH_NORM_H_
#include "mace/core/operator.h"
#include "mace/kernels/batch_norm.h"
...
...
@@ -14,9 +14,9 @@ template <DeviceType D, class T>
class
BatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
,
functor_
()
{
functor_
.
epsilon_
=
OperatorBase
::
GetSingleArgument
<
float
>
(
"epsilon"
,
static_cast
<
float
>
(
1e-4
));
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
epsilon_
=
OperatorBase
::
GetSingleArgument
<
float
>
(
"epsilon"
,
static_cast
<
float
>
(
1e-4
));
}
bool
Run
(
StatsFuture
*
future
)
override
{
...
...
@@ -40,11 +40,13 @@ class BatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
output
->
ResizeLike
(
input
);
functor_
(
input
,
scale
,
offset
,
mean
,
var
,
output
,
future
);
functor_
(
input
,
scale
,
offset
,
mean
,
var
,
epsilon_
,
false
,
false
,
output
,
future
);
return
true
;
}
private:
float
epsilon_
;
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
protected:
...
...
@@ -54,4 +56,4 @@ class BatchNormOp : public Operator<D, T> {
}
// namespace mace
#endif // MACE_BATCH_NORM_H_
#endif // MACE_
OPS_
BATCH_NORM_H_
mace/ops/folded_batch_norm.cc
0 → 100644
浏览文件 @
286d1e21
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/folded_batch_norm.h"
namespace
mace
{
void
Register_FoldedBatchNorm
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/folded_batch_norm.h
0 → 100644
浏览文件 @
286d1e21
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_FOLDED_BATCH_NORM_H_
#define MACE_OPS_FOLDED_BATCH_NORM_H_
#include "mace/core/operator.h"
#include "mace/kernels/batch_norm.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
class
FoldedBatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
FoldedBatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
fused_relu_
=
OperatorBase
::
GetSingleArgument
<
bool
>
(
"fused_relu"
,
false
);
}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
scale
=
this
->
Input
(
SCALE
);
const
Tensor
*
offset
=
this
->
Input
(
OFFSET
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional. "
,
input
->
dim_size
());
MACE_CHECK
(
scale
->
dim_size
()
==
1
,
"scale must be 1-dimensional. "
,
scale
->
dim_size
());
MACE_CHECK
(
offset
->
dim_size
()
==
1
,
"offset must be 1-dimensional. "
,
offset
->
dim_size
());
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
output
->
ResizeLike
(
input
);
functor_
(
input
,
scale
,
offset
,
nullptr
,
nullptr
,
0
,
true
,
fused_relu_
,
output
,
future
);
return
true
;
}
private:
bool
fused_relu_
;
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
protected:
OP_INPUT_TAGS
(
INPUT
,
SCALE
,
OFFSET
,
MEAN
,
VAR
);
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace mace
#endif // MACE_OPS_FOLDED_BATCH_NORM_H_
mace/python/tools/tf_converter_lib.py
浏览文件 @
286d1e21
from
mace.proto
import
mace_pb2
import
tensorflow
as
tf
import
numpy
as
np
import
math
from
mace.python.tools
import
memory_optimizer
# TODO: support NCHW formt, now only support NHWC.
...
...
@@ -136,6 +137,22 @@ class TFConverter(object):
output_shapes
.
append
(
output_shape
)
op
.
output_shape
.
extend
(
output_shapes
)
def
add_tensor
(
self
,
name
,
shape
,
tf_dt
,
value
):
tensor
=
self
.
net_def
.
tensors
.
add
()
tensor
.
name
=
name
shape
=
list
(
shape
)
tensor
.
dims
.
extend
(
shape
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
value
.
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
value
.
flat
)
else
:
raise
Exception
(
"Not supported tensor type: "
+
tf_dt
.
name
)
def
convert_tensor
(
self
,
op
):
if
op
.
outputs
[
0
].
name
not
in
self
.
unused_tensor
:
tensor
=
self
.
net_def
.
tensors
.
add
()
...
...
@@ -212,25 +229,52 @@ class TFConverter(object):
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'BatchNorm'
op_def
.
type
=
'FoldedBatchNorm'
gamma_tensor
=
get_input_tensor
(
op
,
1
)
gamma_value
=
gamma_tensor
.
eval
().
astype
(
np
.
float32
)
beta_value
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
float32
)
mean_value
=
get_input_tensor
(
op
,
3
).
eval
().
astype
(
np
.
float32
)
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
epsilon_value
=
op
.
get_attr
(
'epsilon'
)
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
name_prefix
=
op
.
inputs
[
1
].
name
idx
=
name_prefix
.
rfind
(
'/'
)
name_prefix
=
op
.
inputs
[
1
].
name
[:
idx
]
+
'/'
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
offset_value
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
for
i
in
range
(
1
,
len
(
op
.
inputs
))
:
output_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
i
].
name
,
"ARGUMENT"
)
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
op
.
outputs
[
0
].
name
])
op_def
.
input
.
extend
([
input
.
name
for
input
in
input_names
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
final_op
=
op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Relu'
:
relu_op
=
self
.
tf_graph
[
op
.
name
][
0
]
final_op
=
relu_op
fused_relu_arg
=
op_def
.
arg
.
add
()
fused_relu_arg
.
name
=
'fused_relu'
fused_relu_arg
.
i
=
1
self
.
resolved_ops
[
relu_op
.
name
]
=
1
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'epsilon'
epsilon_arg
.
f
=
op
.
get_attr
(
'epsilon'
)
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
data_format_arg
.
s
=
'NHWC'
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_batchnorm
(
self
,
op
):
...
...
tools/validate_gcn.sh
浏览文件 @
286d1e21
...
...
@@ -96,7 +96,7 @@ bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output_type
=
source
\
--template
=
${
MACE_SOURCE_DIR
}
/mace/python/tools/model.template
\
--model_tag
=
${
MODEL_TAG
}
\
--confuse
=
Fals
e
||
exit
-1
--confuse
=
Tru
e
||
exit
-1
echo
"Step 3: Generate version source"
rm
-rf
${
VERSION_SOURCE_PATH
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录