Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
5967c7ab
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
5967c7ab
编写于
4月 08, 2019
作者:
李
李寅
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'embedding' into 'master'
Support int32 input data type. See merge request !1047
上级
c35775c7
80d1c9dd
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
379 addition
and
210 deletion
+379
-210
mace/benchmark/benchmark_model.cc
mace/benchmark/benchmark_model.cc
+9
-9
mace/core/memory_optimizer.cc
mace/core/memory_optimizer.cc
+1
-1
mace/examples/cli/example.cc
mace/examples/cli/example.cc
+2
-0
mace/libmace/mace.cc
mace/libmace/mace.cc
+84
-32
mace/ops/common/transpose.cc
mace/ops/common/transpose.cc
+28
-108
mace/ops/common/transpose.h
mace/ops/common/transpose.h
+139
-2
mace/ops/expand_dims.cc
mace/ops/expand_dims.cc
+25
-14
mace/public/mace.h
mace/public/mace.h
+13
-1
mace/python/tools/converter.py
mace/python/tools/converter.py
+21
-8
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+9
-0
mace/python/tools/converter_tool/tensorflow_converter.py
mace/python/tools/converter_tool/tensorflow_converter.py
+4
-7
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+2
-2
mace/tools/validation/mace_run.cc
mace/tools/validation/mace_run.cc
+15
-12
tools/common.py
tools/common.py
+1
-0
tools/converter.py
tools/converter.py
+22
-14
tools/sh_commands.py
tools/sh_commands.py
+4
-0
未找到文件。
mace/benchmark/benchmark_model.cc
浏览文件 @
5967c7ab
...
...
@@ -335,18 +335,17 @@ int Main(int argc, char **argv) {
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
inputs
;
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
outputs
;
for
(
size_t
i
=
0
;
i
<
input_count
;
++
i
)
{
//
Allocate input and output
//
only support float and int32, use char for generalization
int64_t
input_size
=
std
::
accumulate
(
input_shape_vec
[
i
].
begin
(),
input_shape_vec
[
i
].
end
(),
1
,
std
::
accumulate
(
input_shape_vec
[
i
].
begin
(),
input_shape_vec
[
i
].
end
(),
4
,
std
::
multiplies
<
int64_t
>
());
auto
buffer_in
=
std
::
shared_ptr
<
float
>
(
new
float
[
input_size
],
std
::
default_delete
<
float
[]
>
());
auto
buffer_in
=
std
::
shared_ptr
<
char
>
(
new
char
[
input_size
],
std
::
default_delete
<
char
[]
>
());
// load input
std
::
ifstream
in_file
(
FLAGS_input_file
+
"_"
+
FormatName
(
input_names
[
i
]),
std
::
ios
::
in
|
std
::
ios
::
binary
);
if
(
in_file
.
is_open
())
{
in_file
.
read
(
reinterpret_cast
<
char
*>
(
buffer_in
.
get
()),
input_size
*
sizeof
(
float
));
in_file
.
read
(
buffer_in
.
get
(),
input_size
);
in_file
.
close
();
}
else
{
LOG
(
INFO
)
<<
"Open input file failed"
;
...
...
@@ -357,12 +356,13 @@ int Main(int argc, char **argv) {
}
for
(
size_t
i
=
0
;
i
<
output_count
;
++
i
)
{
// only support float and int32, use char for generalization
int64_t
output_size
=
std
::
accumulate
(
output_shape_vec
[
i
].
begin
(),
output_shape_vec
[
i
].
end
(),
1
,
output_shape_vec
[
i
].
end
(),
4
,
std
::
multiplies
<
int64_t
>
());
auto
buffer_out
=
std
::
shared_ptr
<
float
>
(
new
float
[
output_size
],
std
::
default_delete
<
float
[]
>
());
auto
buffer_out
=
std
::
shared_ptr
<
char
>
(
new
char
[
output_size
],
std
::
default_delete
<
char
[]
>
());
outputs
[
output_names
[
i
]]
=
mace
::
MaceTensor
(
output_shape_vec
[
i
],
buffer_out
,
output_data_formats
[
i
]);
...
...
mace/core/memory_optimizer.cc
浏览文件 @
5967c7ab
...
...
@@ -33,7 +33,7 @@ namespace mace {
bool
MemoryOptimizer
::
IsMemoryReuseOp
(
const
std
::
string
&
op_type
)
{
static
const
std
::
unordered_set
<
std
::
string
>
kReuseOp
=
{
"Reshape"
,
"Identity"
,
"Squeeze"
,
"ExpandDims"
"Reshape"
,
"Identity"
,
"Squeeze"
};
return
kReuseOp
.
count
(
op_type
)
==
1
;
}
...
...
mace/examples/cli/example.cc
浏览文件 @
5967c7ab
...
...
@@ -269,6 +269,7 @@ bool RunModel(const std::vector<std::string> &input_names,
std
::
accumulate
(
input_shapes
[
i
].
begin
(),
input_shapes
[
i
].
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
inputs_size
[
input_names
[
i
]]
=
input_size
;
// Only support float and int32 data type
auto
buffer_in
=
std
::
shared_ptr
<
float
>
(
new
float
[
input_size
],
std
::
default_delete
<
float
[]
>
());
inputs
[
input_names
[
i
]]
=
mace
::
MaceTensor
(
input_shapes
[
i
],
buffer_in
,
...
...
@@ -279,6 +280,7 @@ bool RunModel(const std::vector<std::string> &input_names,
int64_t
output_size
=
std
::
accumulate
(
output_shapes
[
i
].
begin
(),
output_shapes
[
i
].
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
// Only support float and int32 data type
auto
buffer_out
=
std
::
shared_ptr
<
float
>
(
new
float
[
output_size
],
std
::
default_delete
<
float
[]
>
());
outputs
[
output_names
[
i
]]
=
mace
::
MaceTensor
(
output_shapes
[
i
],
buffer_out
,
...
...
mace/libmace/mace.cc
浏览文件 @
5967c7ab
...
...
@@ -284,13 +284,13 @@ MaceStatus MaceEngineConfig::SetCPUThreadPolicy(
class
MaceTensor
::
Impl
{
public:
std
::
vector
<
int64_t
>
shape
;
std
::
shared_ptr
<
float
>
data
;
std
::
shared_ptr
<
void
>
data
;
DataFormat
format
;
int64_t
buffer_size
;
};
MaceTensor
::
MaceTensor
(
const
std
::
vector
<
int64_t
>
&
shape
,
std
::
shared_ptr
<
float
>
data
,
std
::
shared_ptr
<
void
>
data
,
const
DataFormat
format
)
{
MACE_CHECK_NOTNULL
(
data
.
get
());
MACE_CHECK
(
format
==
DataFormat
::
DF_NONE
||
format
==
DataFormat
::
NHWC
...
...
@@ -345,9 +345,21 @@ MaceTensor::~MaceTensor() = default;
const
std
::
vector
<
int64_t
>
&
MaceTensor
::
shape
()
const
{
return
impl_
->
shape
;
}
const
std
::
shared_ptr
<
float
>
MaceTensor
::
data
()
const
{
return
impl_
->
data
;
}
const
std
::
shared_ptr
<
float
>
MaceTensor
::
data
()
const
{
return
std
::
static_pointer_cast
<
float
>
(
impl_
->
data
);
}
std
::
shared_ptr
<
float
>
MaceTensor
::
data
()
{
return
std
::
static_pointer_cast
<
float
>
(
impl_
->
data
);
}
std
::
shared_ptr
<
void
>
MaceTensor
::
raw_data
()
const
{
return
impl_
->
data
;
}
std
::
shared_ptr
<
float
>
MaceTensor
::
data
()
{
return
impl_
->
data
;
}
std
::
shared_ptr
<
void
>
MaceTensor
::
raw_mutable_data
()
{
return
impl_
->
data
;
}
DataFormat
MaceTensor
::
data_format
()
const
{
return
impl_
->
format
;
...
...
@@ -466,8 +478,9 @@ MaceStatus MaceEngine::Impl::Init(
<<
"' does not belong to model's inputs: "
<<
MakeString
(
MapKeys
(
input_info_map_
));
}
DataType
input_dt
=
input_info_map_
[
input_name
].
data_type
();
Tensor
*
input_tensor
=
ws_
->
CreateTensor
(
input_name
,
device_
->
allocator
(),
DT_FLOAT
);
ws_
->
CreateTensor
(
input_name
,
device_
->
allocator
(),
input_dt
);
// Resize to possible largest shape to avoid resize during running.
std
::
vector
<
index_t
>
shape
(
input_info_map_
[
input_name
].
dims_size
());
for
(
int
i
=
0
;
i
<
input_info_map_
[
input_name
].
dims_size
();
++
i
)
{
...
...
@@ -485,8 +498,9 @@ MaceStatus MaceEngine::Impl::Init(
<<
MakeString
(
MapKeys
(
output_info_map_
));
}
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
DataType
output_dt
=
output_info_map_
[
output_name
].
data_type
();
Tensor
*
output_tensor
=
ws_
->
CreateTensor
(
output_name
,
device_
->
allocator
(),
DT_FLOAT
);
ws_
->
CreateTensor
(
output_name
,
device_
->
allocator
(),
output_dt
);
output_tensor
->
set_data_format
(
NHWC
);
#endif
}
...
...
@@ -572,54 +586,71 @@ MaceStatus MaceEngine::Impl::TransposeInput(
Tensor
*
input_tensor
)
{
bool
has_data_format
=
input_tensor
->
data_format
()
!=
DataFormat
::
DF_NONE
;
DataFormat
data_format
=
DataFormat
::
DF_NONE
;
DataType
input_dt
=
input_tensor
->
dtype
();
if
(
has_data_format
)
{
std
::
vector
<
int
>
dst_dims
;
if
(
device_
->
device_type
()
==
DeviceType
::
CPU
&&
input
.
second
.
shape
().
size
()
==
4
&&
input
.
second
.
data_format
()
==
NHWC
&&
!
is_quantized_model_
)
{
VLOG
(
1
)
<<
"Transform input "
<<
input
.
first
<<
" from NHWC to NCHW"
;
input_tensor
->
set_data_format
(
DataFormat
::
NCHW
);
std
::
vector
<
int
>
dst_dims
=
{
0
,
3
,
1
,
2
};
std
::
vector
<
index_t
>
output_shape
=
TransposeShape
<
int64_t
,
index_t
>
(
input
.
second
.
shape
(),
dst_dims
);
MACE_RETURN_IF_ERROR
(
input_tensor
->
Resize
(
output_shape
));
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
float
*
input_data
=
input_tensor
->
mutable_data
<
float
>
();
return
ops
::
Transpose
(
input
.
second
.
data
().
get
(),
input
.
second
.
shape
(),
dst_dims
,
input_data
);
dst_dims
=
{
0
,
3
,
1
,
2
};
}
else
if
(
(
is_quantized_model_
||
device_
->
device_type
()
==
DeviceType
::
GPU
)
&&
input
.
second
.
shape
().
size
()
==
4
&&
input
.
second
.
data_format
()
==
DataFormat
::
NCHW
)
{
VLOG
(
1
)
<<
"Transform input "
<<
input
.
first
<<
" from NCHW to NHWC"
;
std
::
vector
<
int
>
dst_dims
=
{
0
,
2
,
3
,
1
};
input_tensor
->
set_data_format
(
DataFormat
::
NHWC
);
dst_dims
=
{
0
,
2
,
3
,
1
};
}
if
(
!
dst_dims
.
empty
())
{
std
::
vector
<
index_t
>
output_shape
=
TransposeShape
<
int64_t
,
index_t
>
(
input
.
second
.
shape
(),
dst_dims
);
MACE_RETURN_IF_ERROR
(
input_tensor
->
Resize
(
output_shape
));
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
float
*
input_data
=
input_tensor
->
mutable_data
<
float
>
();
return
ops
::
Transpose
(
input
.
second
.
data
().
get
(),
input
.
second
.
shape
(),
dst_dims
,
input_data
);
if
(
input_dt
==
DataType
::
DT_FLOAT
)
{
auto
input_data
=
input_tensor
->
mutable_data
<
float
>
();
return
ops
::
Transpose
(
input
.
second
.
data
<
float
>
().
get
(),
input
.
second
.
shape
(),
dst_dims
,
input_data
,
input_dt
);
}
else
if
(
input_dt
==
DataType
::
DT_INT32
)
{
auto
input_data
=
input_tensor
->
mutable_data
<
int
>
();
return
ops
::
Transpose
(
input
.
second
.
data
<
int
>
().
get
(),
input
.
second
.
shape
(),
dst_dims
,
input_data
,
input_dt
);
}
else
{
LOG
(
FATAL
)
<<
"MACE do not support the input data type: "
<<
input_dt
;
}
}
data_format
=
input
.
second
.
data_format
();
}
input_tensor
->
set_data_format
(
data_format
);
MACE_RETURN_IF_ERROR
(
input_tensor
->
Resize
(
input
.
second
.
shape
()));
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
float
*
input_data
=
input_tensor
->
mutable_data
<
float
>
();
memcpy
(
input_data
,
input
.
second
.
data
().
get
(),
input_tensor
->
size
()
*
sizeof
(
float
));
if
(
input_dt
==
DataType
::
DT_FLOAT
)
{
auto
input_data
=
input_tensor
->
mutable_data
<
float
>
();
memcpy
(
input_data
,
input
.
second
.
data
().
get
(),
input_tensor
->
size
()
*
sizeof
(
float
));
}
else
if
(
input_dt
==
DataType
::
DT_INT32
)
{
auto
input_data
=
input_tensor
->
mutable_data
<
int
>
();
memcpy
(
input_data
,
input
.
second
.
data
().
get
(),
input_tensor
->
size
()
*
sizeof
(
int
));
}
else
{
LOG
(
FATAL
)
<<
"MACE do not support the input data type: "
<<
input_dt
;
}
return
MaceStatus
::
MACE_SUCCESS
;
}
MaceStatus
MaceEngine
::
Impl
::
TransposeOutput
(
const
mace
::
Tensor
*
output_tensor
,
std
::
pair
<
const
std
::
string
,
mace
::
MaceTensor
>
*
output
)
{
DataType
output_dt
=
output_tensor
->
dtype
();
// save output
if
(
output_tensor
!=
nullptr
&&
output
->
second
.
data
()
!=
nullptr
)
{
if
(
output_tensor
->
data_format
()
!=
DataFormat
::
DF_NONE
&&
...
...
@@ -655,11 +686,23 @@ MaceStatus MaceEngine::Impl::TransposeOutput(
<<
output
->
second
.
impl_
->
buffer_size
;
output
->
second
.
impl_
->
shape
=
shape
;
Tensor
::
MappingGuard
output_guard
(
output_tensor
);
const
float
*
output_data
=
output_tensor
->
data
<
float
>
();
return
ops
::
Transpose
(
output_data
,
output_tensor
->
shape
(),
dst_dims
,
output
->
second
.
data
().
get
());
if
(
output_dt
==
DataType
::
DT_FLOAT
)
{
auto
output_data
=
output_tensor
->
data
<
float
>
();
return
ops
::
Transpose
(
output_data
,
output_tensor
->
shape
(),
dst_dims
,
output
->
second
.
data
<
float
>
().
get
());
}
else
if
(
output_dt
==
DataType
::
DT_INT32
)
{
auto
output_data
=
output_tensor
->
data
<
int
>
();
return
ops
::
Transpose
(
output_data
,
output_tensor
->
shape
(),
dst_dims
,
output
->
second
.
data
<
int
>
().
get
(),
output_dt
);
}
else
{
LOG
(
FATAL
)
<<
"MACE do not support the output data type: "
<<
output_dt
;
return
MaceStatus
::
MACE_INVALID_ARGS
;
}
}
else
{
Tensor
::
MappingGuard
output_guard
(
output_tensor
);
auto
shape
=
output_tensor
->
shape
();
...
...
@@ -670,8 +713,17 @@ MaceStatus MaceEngine::Impl::TransposeOutput(
<<
MakeString
<
int64_t
>
(
shape
)
<<
" vs buffer size "
<<
output
->
second
.
impl_
->
buffer_size
;
output
->
second
.
impl_
->
shape
=
shape
;
std
::
memcpy
(
output
->
second
.
data
().
get
(),
output_tensor
->
data
<
float
>
(),
output_size
*
sizeof
(
float
));
if
(
output_dt
==
DataType
::
DT_FLOAT
)
{
std
::
memcpy
(
output
->
second
.
data
<
float
>
().
get
(),
output_tensor
->
data
<
float
>
(),
output_size
*
sizeof
(
float
));
}
else
if
(
output_dt
==
DataType
::
DT_INT32
)
{
std
::
memcpy
(
output
->
second
.
data
<
int
>
().
get
(),
output_tensor
->
data
<
int
>
(),
output_size
*
sizeof
(
int
));
}
else
{
LOG
(
FATAL
)
<<
"MACE do not support the output data type: "
<<
output_dt
;
}
return
MaceStatus
::
MACE_SUCCESS
;
}
}
else
{
...
...
mace/ops/common/transpose.cc
浏览文件 @
5967c7ab
...
...
@@ -14,19 +14,14 @@
#include "mace/ops/common/transpose.h"
#include <algorithm>
#if defined(MACE_ENABLE_NEON)
#include <arm_neon.h>
#endif
#include "mace/core/types.h"
#include "mace/utils/logging.h"
namespace
mace
{
namespace
ops
{
namespace
{
namespace
transpose
{
void
TransposeNHWCToNCHWC3
(
const
float
*
input
,
float
*
output
,
const
index_t
height
,
...
...
@@ -100,119 +95,44 @@ void TransposeNCHWToNHWCC2(const float *input,
#endif
}
}
}
// namespace
MaceStatus
Transpose
(
const
float
*
input
,
const
std
::
vector
<
int64_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
dst_dims
,
float
*
output
)
{
MACE_CHECK
((
input_shape
.
size
()
==
2
&&
dst_dims
.
size
()
==
2
)
||
(
input_shape
.
size
()
==
4
&&
dst_dims
.
size
()
==
4
),
"Only support 2D or 4D transpose"
);
void
TransposeNHWCToNCHWC3
(
const
int
*
input
,
int
*
output
,
const
index_t
height
,
const
index_t
width
)
{
index_t
image_size
=
height
*
width
;
std
::
vector
<
index_t
>
output_shape
;
for
(
size_t
i
=
0
;
i
<
dst_dims
.
size
();
++
i
)
{
output_shape
.
push_back
(
input_shape
[
dst_dims
[
i
]])
;
}
#pragma omp parallel for
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
index_t
in_offset
=
h
*
width
*
3
;
index_t
out_offset
=
h
*
width
;
if
(
input_shape
.
size
()
==
2
)
{
MACE_CHECK
(
dst_dims
[
0
]
==
1
&&
dst_dims
[
1
]
==
0
,
"no need transform"
);
index_t
height
=
input_shape
[
0
];
index_t
width
=
input_shape
[
1
];
index_t
stride_i
=
height
;
index_t
stride_j
=
width
;
index_t
tile_size
=
height
>
512
||
width
>
512
?
64
:
32
;
#pragma omp parallel for collapse(2)
for
(
index_t
i
=
0
;
i
<
height
;
i
+=
tile_size
)
{
for
(
index_t
j
=
0
;
j
<
width
;
j
+=
tile_size
)
{
index_t
end_i
=
std
::
min
(
i
+
tile_size
,
height
);
index_t
end_j
=
std
::
min
(
j
+
tile_size
,
width
);
for
(
index_t
tile_i
=
i
;
tile_i
<
end_i
;
++
tile_i
)
{
for
(
index_t
tile_j
=
j
;
tile_j
<
end_j
;
++
tile_j
)
{
output
[
tile_j
*
stride_i
+
tile_i
]
=
input
[
tile_i
*
stride_j
+
tile_j
];
}
}
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
3
;
++
c
)
{
output
[
out_offset
+
c
*
image_size
+
w
]
=
input
[
in_offset
+
w
*
3
+
c
];
}
}
}
else
if
(
input_shape
.
size
()
==
4
)
{
std
::
vector
<
int
>
transpose_order_from_NHWC_to_NCHW
{
0
,
3
,
1
,
2
};
std
::
vector
<
int
>
transpose_order_from_NCHW_to_NHWC
{
0
,
2
,
3
,
1
};
index_t
batch_size
=
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
if
(
dst_dims
==
transpose_order_from_NHWC_to_NCHW
&&
input_shape
[
3
]
==
3
)
{
for
(
index_t
b
=
0
;
b
<
input_shape
[
0
];
++
b
)
{
TransposeNHWCToNCHWC3
(
input
+
b
*
batch_size
,
output
+
b
*
batch_size
,
input_shape
[
1
],
input_shape
[
2
]);
}
}
else
if
(
dst_dims
==
transpose_order_from_NCHW_to_NHWC
&&
input_shape
[
1
]
==
2
)
{
for
(
index_t
b
=
0
;
b
<
input_shape
[
0
];
++
b
)
{
TransposeNCHWToNHWCC2
(
input
+
b
*
batch_size
,
output
+
b
*
batch_size
,
input_shape
[
2
],
input_shape
[
3
]);
}
}
else
if
(
dst_dims
==
std
::
vector
<
int
>
{
0
,
2
,
1
,
3
})
{
index_t
height
=
input_shape
[
1
];
index_t
width
=
input_shape
[
2
];
index_t
channel
=
input_shape
[
3
];
index_t
channel_raw_size
=
channel
*
sizeof
(
float
);
index_t
stride_i
=
height
;
index_t
stride_j
=
width
;
index_t
tile_size
=
std
::
max
(
static_cast
<
index_t
>
(
1
),
static_cast
<
index_t
>
(
std
::
sqrt
(
8
*
1024
/
channel
)));
#pragma omp parallel for collapse(2)
for
(
index_t
i
=
0
;
i
<
height
;
i
+=
tile_size
)
{
for
(
index_t
j
=
0
;
j
<
width
;
j
+=
tile_size
)
{
index_t
end_i
=
std
::
min
(
i
+
tile_size
,
height
);
index_t
end_j
=
std
::
min
(
j
+
tile_size
,
width
);
for
(
index_t
tile_i
=
i
;
tile_i
<
end_i
;
++
tile_i
)
{
for
(
index_t
tile_j
=
j
;
tile_j
<
end_j
;
++
tile_j
)
{
memcpy
(
output
+
(
tile_j
*
stride_i
+
tile_i
)
*
channel
,
input
+
(
tile_i
*
stride_j
+
tile_j
)
*
channel
,
channel_raw_size
);
}
}
}
}
}
else
{
std
::
vector
<
index_t
>
in_stride
{
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
],
input_shape
[
2
]
*
input_shape
[
3
],
input_shape
[
3
],
1
};
std
::
vector
<
index_t
>
out_stride
{
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
],
output_shape
[
2
]
*
output_shape
[
3
],
output_shape
[
3
],
1
};
}
}
std
::
vector
<
index_t
>
idim
(
4
,
0
);
std
::
vector
<
index_t
>
odim
(
4
,
0
);
for
(
odim
[
0
]
=
0
;
odim
[
0
]
<
output_shape
[
0
];
++
odim
[
0
])
{
for
(
odim
[
1
]
=
0
;
odim
[
1
]
<
output_shape
[
1
];
++
odim
[
1
])
{
for
(
odim
[
2
]
=
0
;
odim
[
2
]
<
output_shape
[
2
];
++
odim
[
2
])
{
for
(
odim
[
3
]
=
0
;
odim
[
3
]
<
output_shape
[
3
];
++
odim
[
3
])
{
idim
[
dst_dims
[
0
]]
=
odim
[
0
];
idim
[
dst_dims
[
1
]]
=
odim
[
1
];
idim
[
dst_dims
[
2
]]
=
odim
[
2
];
idim
[
dst_dims
[
3
]]
=
odim
[
3
];
void
TransposeNCHWToNHWCC2
(
const
int
*
input
,
int
*
output
,
const
index_t
height
,
const
index_t
width
)
{
index_t
image_size
=
height
*
width
;
#pragma omp parallel for
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
index_t
in_offset
=
h
*
width
;
index_t
out_offset
=
h
*
width
*
2
;
output
[
odim
[
0
]
*
out_stride
[
0
]
+
odim
[
1
]
*
out_stride
[
1
]
+
odim
[
2
]
*
out_stride
[
2
]
+
odim
[
3
]]
=
input
[
idim
[
0
]
*
in_stride
[
0
]
+
idim
[
1
]
*
in_stride
[
1
]
+
idim
[
2
]
*
in_stride
[
2
]
+
idim
[
3
]];
}
}
}
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
2
;
++
c
)
{
output
[
out_offset
+
w
*
2
+
c
]
=
input
[
in_offset
+
c
*
image_size
+
w
];
}
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
return
MaceStatus
::
MACE_SUCCESS
;
}
}
// namespace transpose
}
// namespace ops
}
// namespace mace
mace/ops/common/transpose.h
浏览文件 @
5967c7ab
...
...
@@ -15,17 +15,154 @@
#ifndef MACE_OPS_COMMON_TRANSPOSE_H_
#define MACE_OPS_COMMON_TRANSPOSE_H_
#include <algorithm>
#include <vector>
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
ops
{
namespace
transpose
{
MaceStatus
Transpose
(
const
float
*
input
,
void
TransposeNHWCToNCHWC3
(
const
float
*
input
,
float
*
output
,
const
index_t
height
,
const
index_t
width
);
void
TransposeNHWCToNCHWC3
(
const
int
*
input
,
int
*
output
,
const
index_t
height
,
const
index_t
width
);
void
TransposeNCHWToNHWCC2
(
const
float
*
input
,
float
*
output
,
const
index_t
height
,
const
index_t
width
);
void
TransposeNCHWToNHWCC2
(
const
int
*
input
,
int
*
output
,
const
index_t
height
,
const
index_t
width
);
}
// namespace transpose
template
<
typename
T
>
MaceStatus
Transpose
(
const
T
*
input
,
const
std
::
vector
<
int64_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
dst_dims
,
float
*
output
);
T
*
output
,
DataType
data_type
=
DataType
::
DT_FLOAT
)
{
MACE_CHECK
((
input_shape
.
size
()
==
2
&&
dst_dims
.
size
()
==
2
)
||
(
input_shape
.
size
()
==
4
&&
dst_dims
.
size
()
==
4
),
"Only support 2D or 4D transpose"
);
std
::
vector
<
index_t
>
output_shape
;
for
(
size_t
i
=
0
;
i
<
dst_dims
.
size
();
++
i
)
{
output_shape
.
push_back
(
input_shape
[
dst_dims
[
i
]]);
}
if
(
input_shape
.
size
()
==
2
)
{
MACE_CHECK
(
dst_dims
[
0
]
==
1
&&
dst_dims
[
1
]
==
0
,
"no need transform"
);
index_t
height
=
input_shape
[
0
];
index_t
width
=
input_shape
[
1
];
index_t
stride_i
=
height
;
index_t
stride_j
=
width
;
index_t
tile_size
=
height
>
512
||
width
>
512
?
64
:
32
;
#pragma omp parallel for collapse(2)
for
(
index_t
i
=
0
;
i
<
height
;
i
+=
tile_size
)
{
for
(
index_t
j
=
0
;
j
<
width
;
j
+=
tile_size
)
{
index_t
end_i
=
std
::
min
(
i
+
tile_size
,
height
);
index_t
end_j
=
std
::
min
(
j
+
tile_size
,
width
);
for
(
index_t
tile_i
=
i
;
tile_i
<
end_i
;
++
tile_i
)
{
for
(
index_t
tile_j
=
j
;
tile_j
<
end_j
;
++
tile_j
)
{
output
[
tile_j
*
stride_i
+
tile_i
]
=
input
[
tile_i
*
stride_j
+
tile_j
];
}
}
}
}
}
else
if
(
input_shape
.
size
()
==
4
)
{
std
::
vector
<
int
>
transpose_order_from_NHWC_to_NCHW
{
0
,
3
,
1
,
2
};
std
::
vector
<
int
>
transpose_order_from_NCHW_to_NHWC
{
0
,
2
,
3
,
1
};
index_t
batch_size
=
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
bool
supported_dt
=
(
data_type
==
DataType
::
DT_FLOAT
||
data_type
==
DataType
::
DT_INT32
);
if
(
dst_dims
==
transpose_order_from_NHWC_to_NCHW
&&
input_shape
[
3
]
==
3
&&
supported_dt
)
{
for
(
index_t
b
=
0
;
b
<
input_shape
[
0
];
++
b
)
{
transpose
::
TransposeNHWCToNCHWC3
(
input
+
b
*
batch_size
,
output
+
b
*
batch_size
,
input_shape
[
1
],
input_shape
[
2
]);
}
}
else
if
(
dst_dims
==
transpose_order_from_NCHW_to_NHWC
&&
input_shape
[
1
]
==
2
&&
supported_dt
)
{
for
(
index_t
b
=
0
;
b
<
input_shape
[
0
];
++
b
)
{
transpose
::
TransposeNCHWToNHWCC2
(
input
+
b
*
batch_size
,
output
+
b
*
batch_size
,
input_shape
[
2
],
input_shape
[
3
]);
}
}
else
if
(
dst_dims
==
std
::
vector
<
int
>
{
0
,
2
,
1
,
3
})
{
index_t
height
=
input_shape
[
1
];
index_t
width
=
input_shape
[
2
];
index_t
channel
=
input_shape
[
3
];
index_t
channel_raw_size
=
channel
*
sizeof
(
T
);
index_t
stride_i
=
height
;
index_t
stride_j
=
width
;
index_t
tile_size
=
std
::
max
(
static_cast
<
index_t
>
(
1
),
static_cast
<
index_t
>
(
std
::
sqrt
(
8
*
1024
/
channel
)));
#pragma omp parallel for collapse(2)
for
(
index_t
i
=
0
;
i
<
height
;
i
+=
tile_size
)
{
for
(
index_t
j
=
0
;
j
<
width
;
j
+=
tile_size
)
{
index_t
end_i
=
std
::
min
(
i
+
tile_size
,
height
);
index_t
end_j
=
std
::
min
(
j
+
tile_size
,
width
);
for
(
index_t
tile_i
=
i
;
tile_i
<
end_i
;
++
tile_i
)
{
for
(
index_t
tile_j
=
j
;
tile_j
<
end_j
;
++
tile_j
)
{
memcpy
(
output
+
(
tile_j
*
stride_i
+
tile_i
)
*
channel
,
input
+
(
tile_i
*
stride_j
+
tile_j
)
*
channel
,
channel_raw_size
);
}
}
}
}
}
else
{
std
::
vector
<
index_t
>
in_stride
{
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
],
input_shape
[
2
]
*
input_shape
[
3
],
input_shape
[
3
],
1
};
std
::
vector
<
index_t
>
out_stride
{
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
],
output_shape
[
2
]
*
output_shape
[
3
],
output_shape
[
3
],
1
};
std
::
vector
<
index_t
>
idim
(
4
,
0
);
std
::
vector
<
index_t
>
odim
(
4
,
0
);
for
(
odim
[
0
]
=
0
;
odim
[
0
]
<
output_shape
[
0
];
++
odim
[
0
])
{
for
(
odim
[
1
]
=
0
;
odim
[
1
]
<
output_shape
[
1
];
++
odim
[
1
])
{
for
(
odim
[
2
]
=
0
;
odim
[
2
]
<
output_shape
[
2
];
++
odim
[
2
])
{
for
(
odim
[
3
]
=
0
;
odim
[
3
]
<
output_shape
[
3
];
++
odim
[
3
])
{
idim
[
dst_dims
[
0
]]
=
odim
[
0
];
idim
[
dst_dims
[
1
]]
=
odim
[
1
];
idim
[
dst_dims
[
2
]]
=
odim
[
2
];
idim
[
dst_dims
[
3
]]
=
odim
[
3
];
output
[
odim
[
0
]
*
out_stride
[
0
]
+
odim
[
1
]
*
out_stride
[
1
]
+
odim
[
2
]
*
out_stride
[
2
]
+
odim
[
3
]]
=
input
[
idim
[
0
]
*
in_stride
[
0
]
+
idim
[
1
]
*
in_stride
[
1
]
+
idim
[
2
]
*
in_stride
[
2
]
+
idim
[
3
]];
}
}
}
}
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
return
MaceStatus
::
MACE_SUCCESS
;
}
}
// namespace ops
}
// namespace mace
...
...
mace/ops/expand_dims.cc
浏览文件 @
5967c7ab
...
...
@@ -14,6 +14,8 @@
#include "mace/core/operator.h"
#include "mace/ops/common/transpose.h"
#include "mace/utils/math.h"
namespace
mace
{
namespace
ops
{
...
...
@@ -33,21 +35,35 @@ class ExpandDimsOp<DeviceType::CPU, T> : public Operation {
const
Tensor
*
input
=
this
->
Input
(
0
);
Tensor
*
output
=
this
->
Output
(
0
);
index_t
input_dims_size
=
input
->
dim_size
();
if
(
axis_
<
0
)
{
if
(
axis_
<
0
)
{
axis_
+=
input_dims_size
+
1
;
}
MACE_CHECK
(
axis_
>=
0
&&
axis_
<=
input_dims_size
,
"axis is out of bound: "
,
axis_
);
const
std
::
vector
<
index_t
>
input_shape
=
input
->
shape
();
std
::
vector
<
index_t
>
output_shape
;
output_shape
.
insert
(
output_shape
.
end
(),
input_shape
.
begin
(),
input_shape
.
begin
()
+
axis_
);
output_shape
.
insert
(
output_shape
.
end
(),
1
);
output_shape
.
insert
(
output_shape
.
end
(),
input_shape
.
begin
()
+
axis_
,
input_shape
.
end
());
std
::
vector
<
index_t
>
output_shape
(
input_shape
);
output_shape
.
insert
(
output_shape
.
begin
()
+
axis_
,
1
);
output
->
ReuseTensorBuffer
(
*
input
);
output
->
Reshape
(
output_shape
);
bool
has_data_format
=
Operation
::
GetOptionalArg
<
int
>
(
"has_data_format"
,
0
)
==
1
;
if
(
has_data_format
&&
output_shape
.
size
()
==
4
)
{
// only tensorflow support expand dim, so the default format is NHWC
// transform NHWC to NCHW
auto
t_output_shape
=
TransposeShape
<
int64_t
,
int64_t
>
(
output_shape
,
{
0
,
3
,
1
,
2
});
output
->
Resize
(
t_output_shape
);
Tensor
::
MappingGuard
input_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
auto
input_data
=
input
->
data
<
T
>
();
auto
output_data
=
output
->
mutable_data
<
T
>
();
Transpose
(
input_data
,
output_shape
,
{
0
,
3
,
1
,
2
},
output_data
);
}
else
{
output
->
Resize
(
output_shape
);
Tensor
::
MappingGuard
input_guard
(
input
);
auto
input_data
=
input
->
data
<
T
>
();
output
->
Copy
<
T
>
(
input_data
,
input
->
size
());
}
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -62,11 +78,6 @@ void RegisterExpandDims(OpRegistryBase *op_registry) {
MACE_REGISTER_OP
(
op_registry
,
"ExpandDims"
,
ExpandDimsOp
,
DeviceType
::
CPU
,
int32_t
);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP
(
op_registry
,
"ExpandDims"
,
ExpandDimsOp
,
DeviceType
::
CPU
,
uint8_t
);
#endif // MACE_ENABLE_QUANTIZE
}
}
// namespace ops
...
...
mace/public/mace.h
浏览文件 @
5967c7ab
...
...
@@ -326,7 +326,7 @@ class MACE_API MaceTensor {
// of shared_ptr and manage the life cycle of the buffer by yourself.
// For example, std::shared_ptr<float>(raw_buffer, [](float *){});
MaceTensor
(
const
std
::
vector
<
int64_t
>
&
shape
,
std
::
shared_ptr
<
float
>
data
,
std
::
shared_ptr
<
void
>
data
,
const
DataFormat
format
=
DataFormat
::
NHWC
);
MaceTensor
();
MaceTensor
(
const
MaceTensor
&
other
);
...
...
@@ -339,8 +339,20 @@ class MACE_API MaceTensor {
const
std
::
vector
<
int64_t
>
&
shape
()
const
;
const
std
::
shared_ptr
<
float
>
data
()
const
;
std
::
shared_ptr
<
float
>
data
();
template
<
typename
T
>
const
std
::
shared_ptr
<
T
>
data
()
const
{
return
std
::
static_pointer_cast
<
T
>
(
raw_data
());
}
template
<
typename
T
>
std
::
shared_ptr
<
T
>
data
()
{
return
std
::
static_pointer_cast
<
T
>
(
raw_mutable_data
());
}
DataFormat
data_format
()
const
;
private:
std
::
shared_ptr
<
void
>
raw_data
()
const
;
std
::
shared_ptr
<
void
>
raw_mutable_data
();
private:
class
Impl
;
std
::
unique_ptr
<
Impl
>
impl_
;
...
...
mace/python/tools/converter.py
浏览文件 @
5967c7ab
...
...
@@ -47,6 +47,11 @@ data_format_map = {
'OIHW'
:
cvt
.
DataFormat
.
OIHW
,
}
data_type_map
=
{
'float32'
:
mace_pb2
.
DT_FLOAT
,
'int32'
:
mace_pb2
.
DT_INT32
,
}
def
parse_data_type
(
data_type
,
device_type
):
if
device_type
==
cvt
.
DeviceType
.
CPU
.
value
or
\
...
...
@@ -141,6 +146,7 @@ def main(unused_args):
option
.
data_type
=
parse_data_type
(
FLAGS
.
data_type
,
option
.
device
)
input_node_names
=
FLAGS
.
input_node
.
split
(
','
)
input_data_types
=
FLAGS
.
input_data_types
.
split
(
','
)
input_node_shapes
=
FLAGS
.
input_shape
.
split
(
':'
)
input_node_formats
=
FLAGS
.
input_data_formats
.
split
(
","
)
if
FLAGS
.
input_range
:
...
...
@@ -152,10 +158,8 @@ def main(unused_args):
for
i
in
six
.
moves
.
range
(
len
(
input_node_names
)):
input_node
=
cvt
.
NodeInfo
()
input_node
.
name
=
input_node_names
[
i
]
if
len
(
input_node_formats
)
==
1
:
input_node
.
data_format
=
data_format_map
[
input_node_formats
[
0
]]
else
:
input_node
.
data_format
=
data_format_map
[
input_node_formats
[
i
]]
input_node
.
data_type
=
data_type_map
[
input_data_types
[
i
]]
input_node
.
data_format
=
data_format_map
[
input_node_formats
[
i
]]
input_node
.
shape
=
parse_int_array_from_str
(
input_node_shapes
[
i
])
if
input_node
.
data_format
==
cvt
.
DataFormat
.
NCHW
and
\
len
(
input_node
.
shape
)
==
4
:
...
...
@@ -166,6 +170,7 @@ def main(unused_args):
option
.
add_input_node
(
input_node
)
output_node_names
=
FLAGS
.
output_node
.
split
(
','
)
output_data_types
=
FLAGS
.
output_data_types
.
split
(
','
)
output_node_shapes
=
FLAGS
.
output_shape
.
split
(
':'
)
output_node_formats
=
FLAGS
.
output_data_formats
.
split
(
","
)
if
len
(
output_node_names
)
!=
len
(
output_node_shapes
):
...
...
@@ -173,10 +178,8 @@ def main(unused_args):
for
i
in
six
.
moves
.
range
(
len
(
output_node_names
)):
output_node
=
cvt
.
NodeInfo
()
output_node
.
name
=
output_node_names
[
i
]
if
len
(
output_node_formats
)
==
1
:
output_node
.
data_format
=
data_format_map
[
output_node_formats
[
0
]]
else
:
output_node
.
data_format
=
data_format_map
[
output_node_formats
[
i
]]
output_node
.
data_type
=
data_type_map
[
output_data_types
[
i
]]
output_node
.
data_format
=
data_format_map
[
output_node_formats
[
i
]]
output_node
.
shape
=
parse_int_array_from_str
(
output_node_shapes
[
i
])
if
output_node
.
data_format
==
cvt
.
DataFormat
.
NCHW
and
\
len
(
output_node
.
shape
)
==
4
:
...
...
@@ -290,6 +293,11 @@ def parse_args():
type
=
str
,
default
=
"input_node"
,
help
=
"e.g., input_node"
)
parser
.
add_argument
(
"--input_data_types"
,
type
=
str
,
default
=
"float32"
,
help
=
"e.g., float32|int32"
)
parser
.
add_argument
(
"--input_data_formats"
,
type
=
str
,
...
...
@@ -297,6 +305,11 @@ def parse_args():
help
=
"e.g., NHWC,NONE"
)
parser
.
add_argument
(
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
parser
.
add_argument
(
"--output_data_types"
,
type
=
str
,
default
=
"float32"
,
help
=
"e.g., float32|int32"
)
parser
.
add_argument
(
"--output_data_formats"
,
type
=
str
,
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
5967c7ab
...
...
@@ -298,6 +298,7 @@ class NodeInfo(object):
def
__init__
(
self
):
self
.
_name
=
None
self
.
_data_type
=
mace_pb2
.
DT_FLOAT
self
.
_shape
=
[]
self
.
_data_format
=
DataFormat
.
NHWC
self
.
_range
=
[
-
1.0
,
1.0
]
...
...
@@ -306,6 +307,10 @@ class NodeInfo(object):
def
name
(
self
):
return
self
.
_name
@
property
def
data_type
(
self
):
return
self
.
_data_type
@
property
def
shape
(
self
):
return
self
.
_shape
...
...
@@ -322,6 +327,10 @@ class NodeInfo(object):
def
name
(
self
,
name
):
self
.
_name
=
name
@
data_type
.
setter
def
data_type
(
self
,
data_type
):
self
.
_data_type
=
data_type
@
shape
.
setter
def
shape
(
self
,
shape
):
self
.
_shape
=
shape
...
...
mace/python/tools/converter_tool/tensorflow_converter.py
浏览文件 @
5967c7ab
...
...
@@ -102,6 +102,7 @@ TFSupportedOps = [
'Mean'
,
'Const'
,
'Gather'
,
'GatherV2'
,
'StridedSlice'
,
'Slice'
,
'ReverseV2'
,
...
...
@@ -241,6 +242,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType
.
Mean
.
name
:
self
.
convert_mean
,
TFOpType
.
Const
.
name
:
self
.
convert_nop
,
TFOpType
.
Gather
.
name
:
self
.
convert_gather
,
TFOpType
.
GatherV2
.
name
:
self
.
convert_gather
,
TFOpType
.
StridedSlice
.
name
:
self
.
convert_stridedslice
,
TFOpType
.
Slice
.
name
:
self
.
convert_slice
,
TFOpType
.
ReverseV2
.
name
:
self
.
convert_reverse
,
...
...
@@ -838,16 +840,11 @@ class TensorflowConverter(base_converter.ConverterInterface):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
ExpandDims
.
name
axis_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
)
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
try
:
axis_value
=
tf_op
.
get_attr
(
'dim'
)
except
ValueError
:
try
:
axis_value
=
tf_op
.
get_attr
(
'axis'
)
except
ValueError
:
axis_value
=
0
axis_arg
.
i
=
axis_value
del
op
.
input
[
1
]
def
convert_squeeze
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
5967c7ab
...
...
@@ -323,7 +323,7 @@ class Transformer(base_converter.ConverterInterface):
input_info
.
name
=
input_node
.
name
input_info
.
data_format
=
input_node
.
data_format
.
value
input_info
.
dims
.
extend
(
input_node
.
shape
)
input_info
.
data_type
=
mace_pb2
.
DT_FLOAT
input_info
.
data_type
=
input_node
.
data_type
output_nodes
=
self
.
_option
.
check_nodes
.
values
()
for
output_node
in
output_nodes
:
...
...
@@ -332,7 +332,7 @@ class Transformer(base_converter.ConverterInterface):
output_info
.
data_format
=
output_node
.
data_format
.
value
output_info
.
dims
.
extend
(
self
.
_producer
[
output_node
.
name
].
output_shape
[
0
].
dims
)
output_info
.
data_type
=
mace_pb2
.
DT_FLOAT
output_info
.
data_type
=
output_node
.
data_type
return
False
...
...
mace/tools/validation/mace_run.cc
浏览文件 @
5967c7ab
...
...
@@ -269,17 +269,18 @@ bool RunModel(const std::string &model_name,
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
outputs
;
for
(
size_t
i
=
0
;
i
<
input_count
;
++
i
)
{
// Allocate input and output
// only support float and int32, use char for generalization
// sizeof(int) == 4, sizeof(float) == 4
int64_t
input_size
=
std
::
accumulate
(
input_shapes
[
i
].
begin
(),
input_shapes
[
i
].
end
(),
1
,
std
::
accumulate
(
input_shapes
[
i
].
begin
(),
input_shapes
[
i
].
end
(),
4
,
std
::
multiplies
<
int64_t
>
());
auto
buffer_in
=
std
::
shared_ptr
<
float
>
(
new
float
[
input_size
],
std
::
default_delete
<
float
[]
>
());
auto
buffer_in
=
std
::
shared_ptr
<
char
>
(
new
char
[
input_size
],
std
::
default_delete
<
char
[]
>
());
// load input
std
::
ifstream
in_file
(
FLAGS_input_file
+
"_"
+
FormatName
(
input_names
[
i
]),
std
::
ios
::
in
|
std
::
ios
::
binary
);
if
(
in_file
.
is_open
())
{
in_file
.
read
(
reinterpret_cast
<
char
*>
(
buffer_in
.
get
()),
input_size
*
sizeof
(
float
));
in_file
.
read
(
buffer_in
.
get
(),
input_size
);
in_file
.
close
();
}
else
{
LOG
(
INFO
)
<<
"Open input file failed"
;
...
...
@@ -290,11 +291,12 @@ bool RunModel(const std::string &model_name,
}
for
(
size_t
i
=
0
;
i
<
output_count
;
++
i
)
{
// only support float and int32, use char for generalization
int64_t
output_size
=
std
::
accumulate
(
output_shapes
[
i
].
begin
(),
output_shapes
[
i
].
end
(),
1
,
std
::
accumulate
(
output_shapes
[
i
].
begin
(),
output_shapes
[
i
].
end
(),
4
,
std
::
multiplies
<
int64_t
>
());
auto
buffer_out
=
std
::
shared_ptr
<
float
>
(
new
float
[
output_size
],
std
::
default_delete
<
float
[]
>
());
auto
buffer_out
=
std
::
shared_ptr
<
char
>
(
new
char
[
output_size
],
std
::
default_delete
<
char
[]
>
());
outputs
[
output_names
[
i
]]
=
mace
::
MaceTensor
(
output_shapes
[
i
],
buffer_out
,
output_data_formats
[
i
]);
}
...
...
@@ -408,12 +410,12 @@ bool RunModel(const std::string &model_name,
std
::
string
output_name
=
FLAGS_output_file
+
"_"
+
FormatName
(
output_names
[
i
]);
std
::
ofstream
out_file
(
output_name
,
std
::
ios
::
binary
);
// only support float and int32
int64_t
output_size
=
std
::
accumulate
(
output_shapes
[
i
].
begin
(),
output_shapes
[
i
].
end
(),
1
,
std
::
accumulate
(
output_shapes
[
i
].
begin
(),
output_shapes
[
i
].
end
(),
4
,
std
::
multiplies
<
int64_t
>
());
out_file
.
write
(
reinterpret_cast
<
char
*>
(
outputs
[
output_names
[
i
]].
data
().
get
()),
output_size
*
sizeof
(
float
));
outputs
[
output_names
[
i
]].
data
<
char
>
().
get
(),
output_size
);
out_file
.
flush
();
out_file
.
close
();
LOG
(
INFO
)
<<
"Write output file "
<<
output_name
<<
" with size "
...
...
@@ -478,6 +480,7 @@ int Main(int argc, char **argv) {
// get cpu capability
Capability
cpu_capability
=
GetCapability
(
DeviceType
::
CPU
);
float
cpu_float32_performance
=
cpu_capability
.
float32_performance
.
exec_time
;
bool
ret
=
false
;
for
(
int
i
=
0
;
i
<
FLAGS_restart_round
;
++
i
)
{
...
...
@@ -485,7 +488,7 @@ int Main(int argc, char **argv) {
ret
=
RunModel
(
FLAGS_model_name
,
input_names
,
input_shape_vec
,
input_data_formats
,
output_names
,
output_shape_vec
,
output_data_formats
,
cpu_
capability
.
float32_performance
.
exec_tim
e
);
cpu_
float32_performanc
e
);
}
if
(
ret
)
{
return
0
;
...
...
tools/common.py
浏览文件 @
5967c7ab
...
...
@@ -397,6 +397,7 @@ class YAMLKeyword(object):
runtime
=
'runtime'
data_type
=
'data_type'
input_data_types
=
'input_data_types'
output_data_types
=
'output_data_types'
input_data_formats
=
'input_data_formats'
output_data_formats
=
'output_data_formats'
limit_opencl_kernel_time
=
'limit_opencl_kernel_time'
...
...
tools/converter.py
浏览文件 @
5967c7ab
...
...
@@ -65,13 +65,13 @@ RuntimeTypeStrs = [
"cpu+gpu"
]
In
p
utDataTypeStrs
=
[
In
O
utDataTypeStrs
=
[
"int32"
,
"float32"
,
]
In
p
utDataType
=
Enum
(
'InputDataType'
,
[(
ele
,
ele
)
for
ele
in
In
p
utDataTypeStrs
],
In
O
utDataType
=
Enum
(
'InputDataType'
,
[(
ele
,
ele
)
for
ele
in
In
O
utDataTypeStrs
],
type
=
str
)
FPDataTypeStrs
=
[
...
...
@@ -410,17 +410,23 @@ def format_model_config(flags):
else
:
subgraph
[
key
]
=
[]
input_data_types
=
subgraph
.
get
(
YAMLKeyword
.
input_data_types
,
""
)
if
input_data_types
:
if
not
isinstance
(
input_data_types
,
list
):
subgraph
[
YAMLKeyword
.
input_data_types
]
=
[
input_data_types
]
for
input_data_type
in
subgraph
[
YAMLKeyword
.
input_data_types
]:
mace_check
(
input_data_type
in
InputDataTypeStrs
,
ModuleName
.
YAML_CONFIG
,
"'input_data_types' must be in "
+
str
(
InputDataTypeStrs
))
else
:
subgraph
[
YAMLKeyword
.
input_data_types
]
=
[]
for
key
in
[
YAMLKeyword
.
input_data_types
,
YAMLKeyword
.
output_data_types
]:
if
key
==
YAMLKeyword
.
input_data_types
:
count
=
input_size
else
:
count
=
output_size
data_types
=
subgraph
.
get
(
key
,
""
)
if
data_types
:
if
not
isinstance
(
data_types
,
list
):
subgraph
[
key
]
=
[
data_types
]
*
count
for
data_type
in
subgraph
[
key
]:
mace_check
(
data_type
in
InOutDataTypeStrs
,
ModuleName
.
YAML_CONFIG
,
key
+
" must be in "
+
str
(
InOutDataTypeStrs
))
else
:
subgraph
[
key
]
=
[
InOutDataType
.
float32
]
*
count
input_data_formats
=
subgraph
.
get
(
YAMLKeyword
.
input_data_formats
,
[])
...
...
@@ -722,8 +728,10 @@ def convert_model(configs, cl_mem_type):
model_config
[
YAMLKeyword
.
model_sha256_checksum
],
model_config
[
YAMLKeyword
.
weight_sha256_checksum
],
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
input_tensors
]),
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
input_data_types
]),
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
input_data_formats
]),
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
output_tensors
]),
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
output_data_types
]),
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
output_data_formats
]),
","
.
join
(
subgraphs
[
0
][
YAMLKeyword
.
check_tensors
]),
runtime
,
...
...
tools/sh_commands.py
浏览文件 @
5967c7ab
...
...
@@ -484,8 +484,10 @@ def gen_model_code(model_codegen_dir,
model_sha256_checksum
,
weight_sha256_checksum
,
input_nodes
,
input_data_types
,
input_data_formats
,
output_nodes
,
output_data_types
,
output_data_formats
,
check_nodes
,
runtime
,
...
...
@@ -519,8 +521,10 @@ def gen_model_code(model_codegen_dir,
"--model_checksum=%s"
%
model_sha256_checksum
,
"--weight_checksum=%s"
%
weight_sha256_checksum
,
"--input_node=%s"
%
input_nodes
,
"--input_data_types=%s"
%
input_data_types
,
"--input_data_formats=%s"
%
input_data_formats
,
"--output_node=%s"
%
output_nodes
,
"--output_data_types=%s"
%
output_data_types
,
"--output_data_formats=%s"
%
output_data_formats
,
"--check_node=%s"
%
check_nodes
,
"--runtime=%s"
%
runtime
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录