Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
a3850281
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a3850281
编写于
9月 07, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update data types and linking opts
上级
68a335f1
变更
28
隐藏空白更改
内联
并排
Showing
28 changed file
with
122 addition
and
150 deletion
+122
-150
mace/core/BUILD
mace/core/BUILD
+6
-0
mace/core/common.h
mace/core/common.h
+1
-2
mace/core/integral_types.h
mace/core/integral_types.h
+0
-19
mace/core/logging.cc
mace/core/logging.cc
+14
-14
mace/core/logging.h
mace/core/logging.h
+1
-3
mace/core/operator.cc
mace/core/operator.cc
+2
-2
mace/core/operator.h
mace/core/operator.h
+1
-1
mace/core/serializer.cc
mace/core/serializer.cc
+9
-9
mace/core/tensor.h
mace/core/tensor.h
+18
-18
mace/core/testing/env_time.h
mace/core/testing/env_time.h
+2
-2
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+10
-10
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+2
-2
mace/core/types.h
mace/core/types.h
+7
-7
mace/examples/BUILD
mace/examples/BUILD
+0
-8
mace/examples/benchmark_example.cc
mace/examples/benchmark_example.cc
+2
-2
mace/kernels/BUILD
mace/kernels/BUILD
+5
-9
mace/kernels/addn.h
mace/kernels/addn.h
+2
-2
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+7
-7
mace/kernels/benchmark/addn_benchmark.cc
mace/kernels/benchmark/addn_benchmark.cc
+2
-2
mace/kernels/benchmark/relu_benchmark.cc
mace/kernels/benchmark/relu_benchmark.cc
+2
-2
mace/kernels/neon/addn_neon.cc
mace/kernels/neon/addn_neon.cc
+7
-7
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+5
-5
mace/kernels/neon/relu_neon.cc
mace/kernels/neon/relu_neon.cc
+3
-3
mace/kernels/relu.h
mace/kernels/relu.h
+2
-2
mace/kernels/test/addn_neon_test.cc
mace/kernels/test/addn_neon_test.cc
+3
-3
mace/kernels/test/relu_neon_test.cc
mace/kernels/test/relu_neon_test.cc
+3
-3
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+3
-3
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+3
-3
未找到文件。
mace/core/BUILD
浏览文件 @
a3850281
...
@@ -7,6 +7,8 @@ package(
...
@@ -7,6 +7,8 @@ package(
licenses
([
"notice"
])
# Apache 2.0
licenses
([
"notice"
])
# Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
)
cc_library
(
cc_library
(
name
=
"core"
,
name
=
"core"
,
srcs
=
glob
([
srcs
=
glob
([
...
@@ -19,6 +21,10 @@ cc_library(
...
@@ -19,6 +21,10 @@ cc_library(
deps
=
[
deps
=
[
"//mace/proto:cc_proto"
,
"//mace/proto:cc_proto"
,
],
],
linkopts
=
if_android
([
"-llog"
,
"-pie"
,
]),
)
)
# Main program for tests
# Main program for tests
...
...
mace/core/common.h
浏览文件 @
a3850281
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
#include <vector>
#include <vector>
#include <algorithm>
#include <algorithm>
#include "mace/core/integral_types.h"
#include "mace/core/logging.h"
#include "mace/core/logging.h"
using
std
::
set
;
using
std
::
set
;
...
@@ -21,7 +20,7 @@ using std::string;
...
@@ -21,7 +20,7 @@ using std::string;
using
std
::
unique_ptr
;
using
std
::
unique_ptr
;
using
std
::
vector
;
using
std
::
vector
;
typedef
int64
TIndex
;
typedef
int64
_t
index_t
;
// Disable the copy and assignment operator for a class.
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#ifndef DISABLE_COPY_AND_ASSIGN
...
...
mace/core/integral_types.h
已删除
100644 → 0
浏览文件 @
68a335f1
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_INTEGRAL_TYPES_H_
#define MACE_CORE_INTEGRAL_TYPES_H_
typedef
int8_t
int8
;
typedef
int16_t
int16
;
typedef
int32_t
int32
;
typedef
int64_t
int64
;
typedef
uint8_t
uint8
;
typedef
uint16_t
uint16
;
typedef
uint32_t
uint32
;
typedef
uint64_t
uint64
;
#endif // MACE_CORE_INTEGRAL_TYPES_H_
mace/core/logging.cc
浏览文件 @
a3850281
...
@@ -69,18 +69,18 @@ void LogMessage::GenerateLogMessage() {
...
@@ -69,18 +69,18 @@ void LogMessage::GenerateLogMessage() {
namespace
{
namespace
{
// Parse log level (int64) from environment variable (char*)
// Parse log level (int64
_t
) from environment variable (char*)
int64
LogLevelStrToInt
(
const
char
*
tf
_env_var_val
)
{
int64
_t
LogLevelStrToInt
(
const
char
*
mace
_env_var_val
)
{
if
(
tf
_env_var_val
==
nullptr
)
{
if
(
mace
_env_var_val
==
nullptr
)
{
return
0
;
return
0
;
}
}
// Ideally we would use env_var / safe_strto64, but it is
// Ideally we would use env_var / safe_strto64, but it is
// hard to use here without pulling in a lot of dependencies,
// hard to use here without pulling in a lot of dependencies,
// so we use std:istringstream instead
// so we use std:istringstream instead
string
min_log_level
(
tf
_env_var_val
);
string
min_log_level
(
mace
_env_var_val
);
std
::
istringstream
ss
(
min_log_level
);
std
::
istringstream
ss
(
min_log_level
);
int64
level
;
int64
_t
level
;
if
(
!
(
ss
>>
level
))
{
if
(
!
(
ss
>>
level
))
{
// Invalid vlog level setting, set level to default (0)
// Invalid vlog level setting, set level to default (0)
level
=
0
;
level
=
0
;
...
@@ -89,26 +89,26 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) {
...
@@ -89,26 +89,26 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) {
return
level
;
return
level
;
}
}
int64
MinLogLevelFromEnv
()
{
int64
_t
MinLogLevelFromEnv
()
{
const
char
*
tf
_env_var_val
=
getenv
(
"MACE_CPP_MIN_LOG_LEVEL"
);
const
char
*
mace
_env_var_val
=
getenv
(
"MACE_CPP_MIN_LOG_LEVEL"
);
return
LogLevelStrToInt
(
tf
_env_var_val
);
return
LogLevelStrToInt
(
mace
_env_var_val
);
}
}
int64
MinVLogLevelFromEnv
()
{
int64
_t
MinVLogLevelFromEnv
()
{
const
char
*
tf
_env_var_val
=
getenv
(
"MACE_CPP_MIN_VLOG_LEVEL"
);
const
char
*
mace
_env_var_val
=
getenv
(
"MACE_CPP_MIN_VLOG_LEVEL"
);
return
LogLevelStrToInt
(
tf
_env_var_val
);
return
LogLevelStrToInt
(
mace
_env_var_val
);
}
}
}
// namespace
}
// namespace
LogMessage
::~
LogMessage
()
{
LogMessage
::~
LogMessage
()
{
// Read the min log level once during the first call to logging.
// Read the min log level once during the first call to logging.
static
int64
min_log_level
=
MinLogLevelFromEnv
();
static
int64
_t
min_log_level
=
MinLogLevelFromEnv
();
if
(
severity_
>=
min_log_level
)
GenerateLogMessage
();
if
(
severity_
>=
min_log_level
)
GenerateLogMessage
();
}
}
int64
LogMessage
::
MinVLogLevel
()
{
int64
_t
LogMessage
::
MinVLogLevel
()
{
static
int64
min_vlog_level
=
MinVLogLevelFromEnv
();
static
int64
_t
min_vlog_level
=
MinVLogLevelFromEnv
();
return
min_vlog_level
;
return
min_vlog_level
;
}
}
...
...
mace/core/logging.h
浏览文件 @
a3850281
...
@@ -9,8 +9,6 @@
...
@@ -9,8 +9,6 @@
#include <limits>
#include <limits>
#include <string>
#include <string>
#include "mace/core/integral_types.h"
#undef ERROR
#undef ERROR
namespace
mace
{
namespace
mace
{
...
@@ -62,7 +60,7 @@ class LogMessage : public std::basic_ostringstream<char> {
...
@@ -62,7 +60,7 @@ class LogMessage : public std::basic_ostringstream<char> {
// Returns the minimum log level for VLOG statements.
// Returns the minimum log level for VLOG statements.
// E.g., if MinVLogLevel() is 2, then VLOG(2) statements will produce output,
// E.g., if MinVLogLevel() is 2, then VLOG(2) statements will produce output,
// but VLOG(3) will not. Defaults to 0.
// but VLOG(3) will not. Defaults to 0.
static
int64
MinVLogLevel
();
static
int64
_t
MinVLogLevel
();
protected:
protected:
void
GenerateLogMessage
();
void
GenerateLogMessage
();
...
...
mace/core/operator.cc
浏览文件 @
a3850281
...
@@ -6,8 +6,8 @@
...
@@ -6,8 +6,8 @@
namespace
mace
{
namespace
mace
{
std
::
map
<
int32
,
OperatorRegistry
*>*
gDeviceTypeRegistry
()
{
std
::
map
<
int32
_t
,
OperatorRegistry
*>*
gDeviceTypeRegistry
()
{
static
std
::
map
<
int32
,
OperatorRegistry
*>
g_device_type_registry
;
static
std
::
map
<
int32
_t
,
OperatorRegistry
*>
g_device_type_registry
;
return
&
g_device_type_registry
;
return
&
g_device_type_registry
;
}
}
...
...
mace/core/operator.h
浏览文件 @
a3850281
...
@@ -44,7 +44,7 @@ class OperatorBase {
...
@@ -44,7 +44,7 @@ class OperatorBase {
*
operator_def_
,
name
,
default_value
);
*
operator_def_
,
name
,
default_value
);
}
}
inline
const
Tensor
*
Input
(
TIndex
idx
)
{
inline
const
Tensor
*
Input
(
index_t
idx
)
{
MACE_CHECK
(
idx
<
inputs_
.
size
());
MACE_CHECK
(
idx
<
inputs_
.
size
());
return
inputs_
[
idx
];
return
inputs_
[
idx
];
}
}
...
...
mace/core/serializer.cc
浏览文件 @
a3850281
...
@@ -17,8 +17,8 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
...
@@ -17,8 +17,8 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
DeviceType
type
)
{
DeviceType
type
)
{
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
proto
.
data_type
()));
proto
.
data_type
()));
vector
<
TIndex
>
dims
;
vector
<
index_t
>
dims
;
for
(
const
TIndex
d
:
proto
.
dims
())
{
for
(
const
index_t
d
:
proto
.
dims
())
{
dims
.
push_back
(
d
);
dims
.
push_back
(
d
);
}
}
tensor
->
Resize
(
dims
);
tensor
->
Resize
(
dims
);
...
@@ -33,31 +33,31 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
...
@@ -33,31 +33,31 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
proto
.
double_data
().
size
());
proto
.
double_data
().
size
());
break
;
break
;
case
DT_INT32
:
case
DT_INT32
:
tensor
->
template
Copy
<
int32
>(
proto
.
int32_data
().
data
(),
tensor
->
template
Copy
<
int32
_t
>(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
break
;
case
DT_UINT8
:
case
DT_UINT8
:
tensor
->
CopyWithCast
<
int32
,
uint8
>
(
proto
.
int32_data
().
data
(),
tensor
->
CopyWithCast
<
int32
_t
,
uint8_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
break
;
case
DT_INT16
:
case
DT_INT16
:
tensor
->
CopyWithCast
<
int32
,
int16
>
(
proto
.
int32_data
().
data
(),
tensor
->
CopyWithCast
<
int32
_t
,
int16_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
break
;
case
DT_INT8
:
case
DT_INT8
:
tensor
->
CopyWithCast
<
int32
,
int8
>
(
proto
.
int32_data
().
data
(),
tensor
->
CopyWithCast
<
int32
_t
,
int8_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
break
;
case
DT_INT64
:
case
DT_INT64
:
tensor
->
Copy
<
int64
>
(
proto
.
int64_data
().
data
(),
tensor
->
Copy
<
int64
_t
>
(
proto
.
int64_data
().
data
(),
proto
.
int64_data
().
size
());
proto
.
int64_data
().
size
());
break
;
break
;
case
DT_UINT16
:
case
DT_UINT16
:
tensor
->
CopyWithCast
<
int32
,
uint16
>
(
proto
.
int32_data
().
data
(),
tensor
->
CopyWithCast
<
int32
_t
,
uint16_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
break
;
case
DT_BOOL
:
case
DT_BOOL
:
tensor
->
CopyWithCast
<
int32
,
bool
>
(
proto
.
int32_data
().
data
(),
tensor
->
CopyWithCast
<
int32
_t
,
bool
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
break
;
case
DT_STRING
:
{
case
DT_STRING
:
{
...
...
mace/core/tensor.h
浏览文件 @
a3850281
...
@@ -25,13 +25,13 @@ namespace mace {
...
@@ -25,13 +25,13 @@ namespace mace {
switch (TYPE_ENUM) { \
switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32, SINGLE_ARG(STMTS)) \
CASE(int32
_t
, SINGLE_ARG(STMTS)) \
CASE(uint8, SINGLE_ARG(STMTS)) \
CASE(uint8
_t
, SINGLE_ARG(STMTS)) \
CASE(uint16, SINGLE_ARG(STMTS)) \
CASE(uint16
_t
, SINGLE_ARG(STMTS)) \
CASE(int16, SINGLE_ARG(STMTS)) \
CASE(int16
_t
, SINGLE_ARG(STMTS)) \
CASE(int8, SINGLE_ARG(STMTS)) \
CASE(int8
_t
, SINGLE_ARG(STMTS)) \
CASE(string, SINGLE_ARG(STMTS)) \
CASE(string, SINGLE_ARG(STMTS)) \
CASE(int64, SINGLE_ARG(STMTS)) \
CASE(int64
_t
, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \
case DT_INVALID: \
INVALID; \
INVALID; \
...
@@ -64,17 +64,17 @@ class Tensor {
...
@@ -64,17 +64,17 @@ class Tensor {
inline
DataType
dtype
()
const
{
return
dtype_
;
}
inline
DataType
dtype
()
const
{
return
dtype_
;
}
inline
const
vector
<
TIndex
>&
shape
()
const
{
return
shape_
;
}
inline
const
vector
<
index_t
>&
shape
()
const
{
return
shape_
;
}
inline
TIndex
dim_size
()
const
{
return
shape_
.
size
();
}
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
inline
TIndex
dim
(
TIndex
index
)
const
{
inline
index_t
dim
(
index_t
index
)
const
{
MACE_CHECK
(
index
<
shape_
.
size
(),
"Exceeding ndim limit"
);
MACE_CHECK
(
index
<
shape_
.
size
(),
"Exceeding ndim limit"
);
MACE_CHECK
(
index
>=
0
,
"Cannot have negative dimension index"
);
MACE_CHECK
(
index
>=
0
,
"Cannot have negative dimension index"
);
return
shape_
[
index
];
return
shape_
[
index
];
}
}
inline
TIndex
size
()
const
{
return
size_
;
}
inline
index_t
size
()
const
{
return
size_
;
}
inline
const
void
*
raw_data
()
const
{
inline
const
void
*
raw_data
()
const
{
MACE_CHECK
(
data_
.
get
()
||
size_
==
0
);
MACE_CHECK
(
data_
.
get
()
||
size_
==
0
);
...
@@ -108,9 +108,9 @@ class Tensor {
...
@@ -108,9 +108,9 @@ class Tensor {
return
static_cast
<
T
*>
(
raw_mutable_data
());
return
static_cast
<
T
*>
(
raw_mutable_data
());
}
}
inline
void
Resize
(
const
vector
<
TIndex
>&
shape
)
{
inline
void
Resize
(
const
vector
<
index_t
>&
shape
)
{
shape_
=
shape
;
shape_
=
shape
;
TIndex
size
=
NumElements
();
index_t
size
=
NumElements
();
if
(
size_
!=
size
)
{
if
(
size_
!=
size
)
{
size_
=
size
;
size_
=
size
;
data_
.
reset
();
data_
.
reset
();
...
@@ -126,14 +126,14 @@ class Tensor {
...
@@ -126,14 +126,14 @@ class Tensor {
}
}
template
<
typename
T
>
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
TIndex
size
)
{
inline
void
Copy
(
const
T
*
src
,
index_t
size
)
{
MACE_CHECK
(
size
==
size_
,
"copy src and dst with different size."
);
MACE_CHECK
(
size
==
size_
,
"copy src and dst with different size."
);
CopyBytes
(
static_cast
<
const
void
*>
(
src
),
sizeof
(
T
)
*
size
);
CopyBytes
(
static_cast
<
const
void
*>
(
src
),
sizeof
(
T
)
*
size
);
}
}
template
<
typename
SrcType
,
typename
DstType
>
template
<
typename
SrcType
,
typename
DstType
>
inline
void
CopyWithCast
(
const
SrcType
*
src
,
size_t
size
)
{
inline
void
CopyWithCast
(
const
SrcType
*
src
,
size_t
size
)
{
MACE_CHECK
(
static_cast
<
TIndex
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
MACE_CHECK
(
static_cast
<
index_t
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
unique_ptr
<
DstType
[]
>
buffer
(
new
DstType
[
size
]);
unique_ptr
<
DstType
[]
>
buffer
(
new
DstType
[
size
]);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
buffer
[
i
]
=
static_cast
<
DstType
>
(
src
[
i
]);
buffer
[
i
]
=
static_cast
<
DstType
>
(
src
[
i
]);
...
@@ -161,15 +161,15 @@ class Tensor {
...
@@ -161,15 +161,15 @@ class Tensor {
}
}
private:
private:
inline
int64
NumElements
()
const
{
inline
int64
_t
NumElements
()
const
{
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64
>
());
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64
_t
>
());
}
}
Allocator
*
alloc_
;
Allocator
*
alloc_
;
TIndex
size_
;
index_t
size_
;
DataType
dtype_
;
DataType
dtype_
;
std
::
shared_ptr
<
void
>
data_
;
std
::
shared_ptr
<
void
>
data_
;
vector
<
TIndex
>
shape_
;
vector
<
index_t
>
shape_
;
};
};
}
// namespace tensor
}
// namespace tensor
...
...
mace/core/testing/env_time.h
浏览文件 @
a3850281
...
@@ -16,10 +16,10 @@ namespace mace {
...
@@ -16,10 +16,10 @@ namespace mace {
namespace
testing
{
namespace
testing
{
inline
int64
NowMicros
()
{
inline
int64
_t
NowMicros
()
{
struct
timeval
tv
;
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
gettimeofday
(
&
tv
,
nullptr
);
return
static_cast
<
int64
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
;
return
static_cast
<
int64
_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
;
}
}
}
// namespace testing
}
// namespace testing
...
...
mace/core/testing/test_benchmark.cc
浏览文件 @
a3850281
...
@@ -16,10 +16,10 @@ namespace testing {
...
@@ -16,10 +16,10 @@ namespace testing {
static
std
::
vector
<
Benchmark
*>*
all_benchmarks
=
nullptr
;
static
std
::
vector
<
Benchmark
*>*
all_benchmarks
=
nullptr
;
static
std
::
string
label
;
static
std
::
string
label
;
static
int64
bytes_processed
;
static
int64
_t
bytes_processed
;
static
int64
items_processed
;
static
int64
_t
items_processed
;
static
int64
accum_time
=
0
;
static
int64
_t
accum_time
=
0
;
static
int64
start_time
=
0
;
static
int64
_t
start_time
=
0
;
Benchmark
::
Benchmark
(
const
char
*
name
,
void
(
*
fn
)(
int
))
Benchmark
::
Benchmark
(
const
char
*
name
,
void
(
*
fn
)(
int
))
:
name_
(
name
),
num_args_
(
0
),
fn0_
(
fn
)
{
:
name_
(
name
),
num_args_
(
0
),
fn0_
(
fn
)
{
...
@@ -112,10 +112,10 @@ void Benchmark::Register() {
...
@@ -112,10 +112,10 @@ void Benchmark::Register() {
}
}
void
Benchmark
::
Run
(
int
arg1
,
int
arg2
,
int
*
run_count
,
double
*
run_seconds
)
{
void
Benchmark
::
Run
(
int
arg1
,
int
arg2
,
int
*
run_count
,
double
*
run_seconds
)
{
static
const
int64
kMinIters
=
100
;
static
const
int64
_t
kMinIters
=
100
;
static
const
int64
kMaxIters
=
1000000000
;
static
const
int64
_t
kMaxIters
=
1000000000
;
static
const
double
kMinTime
=
0.5
;
static
const
double
kMinTime
=
0.5
;
int64
iters
=
kMinIters
;
int64
_t
iters
=
kMinIters
;
while
(
true
)
{
while
(
true
)
{
accum_time
=
0
;
accum_time
=
0
;
start_time
=
NowMicros
();
start_time
=
NowMicros
();
...
@@ -142,13 +142,13 @@ void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) {
...
@@ -142,13 +142,13 @@ void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) {
double
multiplier
=
1.4
*
kMinTime
/
std
::
max
(
seconds
,
1e-9
);
double
multiplier
=
1.4
*
kMinTime
/
std
::
max
(
seconds
,
1e-9
);
multiplier
=
std
::
min
(
10.0
,
multiplier
);
multiplier
=
std
::
min
(
10.0
,
multiplier
);
if
(
multiplier
<=
1.0
)
multiplier
*=
2.0
;
if
(
multiplier
<=
1.0
)
multiplier
*=
2.0
;
iters
=
std
::
max
<
int64
>
(
multiplier
*
iters
,
iters
+
1
);
iters
=
std
::
max
<
int64
_t
>
(
multiplier
*
iters
,
iters
+
1
);
iters
=
std
::
min
(
iters
,
kMaxIters
);
iters
=
std
::
min
(
iters
,
kMaxIters
);
}
}
}
}
void
BytesProcessed
(
int64
n
)
{
bytes_processed
=
n
;
}
void
BytesProcessed
(
int64
_t
n
)
{
bytes_processed
=
n
;
}
void
ItemsProcessed
(
int64
n
)
{
items_processed
=
n
;
}
void
ItemsProcessed
(
int64
_t
n
)
{
items_processed
=
n
;
}
void
StartTiming
()
{
void
StartTiming
()
{
if
(
start_time
==
0
)
start_time
=
NowMicros
();
if
(
start_time
==
0
)
start_time
=
NowMicros
();
}
}
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
a3850281
...
@@ -42,8 +42,8 @@ class Benchmark {
...
@@ -42,8 +42,8 @@ class Benchmark {
};
};
void
RunBenchmarks
();
void
RunBenchmarks
();
void
BytesProcessed
(
int64
);
void
BytesProcessed
(
int64
_t
);
void
ItemsProcessed
(
int64
);
void
ItemsProcessed
(
int64
_t
);
void
StartTiming
();
void
StartTiming
();
void
StopTiming
();
void
StopTiming
();
...
...
mace/core/types.h
浏览文件 @
a3850281
...
@@ -42,16 +42,16 @@ struct EnumToDataType {}; // Specializations below
...
@@ -42,16 +42,16 @@ struct EnumToDataType {}; // Specializations below
MATCH_TYPE_AND_ENUM
(
float
,
DT_FLOAT
);
MATCH_TYPE_AND_ENUM
(
float
,
DT_FLOAT
);
MATCH_TYPE_AND_ENUM
(
double
,
DT_DOUBLE
);
MATCH_TYPE_AND_ENUM
(
double
,
DT_DOUBLE
);
MATCH_TYPE_AND_ENUM
(
int32
,
DT_INT32
);
MATCH_TYPE_AND_ENUM
(
int32
_t
,
DT_INT32
);
MATCH_TYPE_AND_ENUM
(
uint16
,
DT_UINT16
);
MATCH_TYPE_AND_ENUM
(
uint16
_t
,
DT_UINT16
);
MATCH_TYPE_AND_ENUM
(
uint8
,
DT_UINT8
);
MATCH_TYPE_AND_ENUM
(
uint8
_t
,
DT_UINT8
);
MATCH_TYPE_AND_ENUM
(
int16
,
DT_INT16
);
MATCH_TYPE_AND_ENUM
(
int16
_t
,
DT_INT16
);
MATCH_TYPE_AND_ENUM
(
int8
,
DT_INT8
);
MATCH_TYPE_AND_ENUM
(
int8
_t
,
DT_INT8
);
MATCH_TYPE_AND_ENUM
(
string
,
DT_STRING
);
MATCH_TYPE_AND_ENUM
(
string
,
DT_STRING
);
MATCH_TYPE_AND_ENUM
(
int64
,
DT_INT64
);
MATCH_TYPE_AND_ENUM
(
int64
_t
,
DT_INT64
);
MATCH_TYPE_AND_ENUM
(
bool
,
DT_BOOL
);
MATCH_TYPE_AND_ENUM
(
bool
,
DT_BOOL
);
static
const
int32
kint32max
=
((
int32
)
0x7FFFFFFF
);
static
const
int32
_t
kint32_tmax
=
((
int32_t
)
0x7FFFFFFF
);
}
// namespace mace
}
// namespace mace
...
...
mace/examples/BUILD
浏览文件 @
a3850281
...
@@ -7,10 +7,6 @@ cc_binary(
...
@@ -7,10 +7,6 @@ cc_binary(
"helloworld.cc"
,
"helloworld.cc"
,
],
],
copts
=
[
"-std=c++11"
],
copts
=
[
"-std=c++11"
],
linkopts
=
if_android
([
"-pie"
,
"-llog"
,
]),
deps
=
[
deps
=
[
"//mace/core"
,
"//mace/core"
,
"//mace/ops"
,
"//mace/ops"
,
...
@@ -21,10 +17,6 @@ cc_test(
...
@@ -21,10 +17,6 @@ cc_test(
name
=
"benchmark_example"
,
name
=
"benchmark_example"
,
srcs
=
[
"benchmark_example.cc"
],
srcs
=
[
"benchmark_example.cc"
],
copts
=
[
"-std=c++11"
],
copts
=
[
"-std=c++11"
],
linkopts
=
if_android
([
"-pie"
,
"-llog"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
"//mace/core"
,
"//mace/core"
,
...
...
mace/examples/benchmark_example.cc
浏览文件 @
a3850281
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
static
void
foo
(
int
iters
)
{
static
void
foo
(
int
iters
)
{
static
const
int
N
=
32
;
static
const
int
N
=
32
;
const
int64
tot
=
static_cast
<
int64
>
(
iters
)
*
N
;
const
int64
_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
N
;
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
...
@@ -26,7 +26,7 @@ BENCHMARK(foo);
...
@@ -26,7 +26,7 @@ BENCHMARK(foo);
static
void
bar
(
int
iters
,
int
n
)
{
static
void
bar
(
int
iters
,
int
n
)
{
const
int64
tot
=
static_cast
<
int64
>
(
iters
)
*
n
;
const
int64
_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
...
...
mace/kernels/BUILD
浏览文件 @
a3850281
...
@@ -18,6 +18,9 @@ cc_library(
...
@@ -18,6 +18,9 @@ cc_library(
"//mace/core:core"
,
"//mace/core:core"
,
],
],
copts
=
[
'-std=c++11'
],
copts
=
[
'-std=c++11'
],
linkopts
=
[
"-fopenmp"
]
+
if_android
([
"-lm"
,
]),
)
)
cc_test
(
cc_test
(
...
@@ -29,11 +32,9 @@ cc_test(
...
@@ -29,11 +32,9 @@ cc_test(
"//mace/core:core"
,
"//mace/core:core"
,
],
],
copts
=
[
'-std=c++11'
],
copts
=
[
'-std=c++11'
],
linkopts
=
[
"-fopenmp"
]
+
if_android
([
linkopts
=
if_android
([
"-pie"
,
"-pie"
,
"-llog"
,
]),
"-lm"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
testonly
=
1
,
testonly
=
1
,
)
)
...
@@ -47,11 +48,6 @@ cc_test(
...
@@ -47,11 +48,6 @@ cc_test(
"//mace/core:test_benchmark_main"
,
"//mace/core:test_benchmark_main"
,
],
],
copts
=
[
'-std=c++11'
],
copts
=
[
'-std=c++11'
],
linkopts
=
[
"-fopenmp"
]
+
if_android
([
"-pie"
,
"-llog"
,
"-lm"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
testonly
=
1
,
testonly
=
1
,
)
)
mace/kernels/addn.h
浏览文件 @
a3850281
...
@@ -15,7 +15,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
...
@@ -15,7 +15,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
int
n
=
input_tensor
.
size
();
int
n
=
input_tensor
.
size
();
MACE_CHECK
(
n
>
1
);
MACE_CHECK
(
n
>
1
);
MACE_CHECK_NOTNULL
(
input_tensor
[
0
]);
MACE_CHECK_NOTNULL
(
input_tensor
[
0
]);
int64
size
=
input_tensor
[
0
]
->
size
();
int64
_t
size
=
input_tensor
[
0
]
->
size
();
vector
<
const
T
*>
inputs
(
n
);
vector
<
const
T
*>
inputs
(
n
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
inputs
[
i
]
=
input_tensor
[
i
]
->
data
<
T
>
();
inputs
[
i
]
=
input_tensor
[
i
]
->
data
<
T
>
();
...
@@ -24,7 +24,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
...
@@ -24,7 +24,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int64
j
=
0
;
j
<
size
;
++
j
)
{
for
(
int64
_t
j
=
0
;
j
<
size
;
++
j
)
{
output
[
j
]
+=
inputs
[
i
][
j
];
output
[
j
]
+=
inputs
[
i
][
j
];
}
}
}
}
...
...
mace/kernels/batch_norm.h
浏览文件 @
a3850281
...
@@ -30,9 +30,9 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
...
@@ -30,9 +30,9 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
const
T
*
offset
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
mean
,
const
T
*
var
,
const
T
*
var
,
const
TIndex
n
,
const
index_t
n
,
const
TIndex
channel
,
const
index_t
channel
,
const
TIndex
sample_size
,
const
index_t
sample_size
,
T
*
output
)
{
T
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// The calculation formula for inference is
...
@@ -42,15 +42,15 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
...
@@ -42,15 +42,15 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
// new_offset = \offset - mean * common_val;
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
// Y = new_scale * X + new_offset;
T
new_scale
,
new_offset
;
T
new_scale
,
new_offset
;
for
(
TIndex
c
=
0
;
c
<
channel
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
this
->
variance_epsilon_
);
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
this
->
variance_epsilon_
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
TIndex
pos
=
c
*
sample_size
;
index_t
pos
=
c
*
sample_size
;
for
(
TIndex
i
=
0
;
i
<
n
;
++
i
)
{
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
T
*
input_sample_ptr
=
input
+
pos
;
const
T
*
input_sample_ptr
=
input
+
pos
;
T
*
output_sample_ptr
=
output
+
pos
;
T
*
output_sample_ptr
=
output
+
pos
;
for
(
TIndex
j
=
0
;
j
<
sample_size
;
++
j
)
{
for
(
index_t
j
=
0
;
j
<
sample_size
;
++
j
)
{
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
}
}
pos
+=
channel
*
sample_size
;
pos
+=
channel
*
sample_size
;
...
...
mace/kernels/benchmark/addn_benchmark.cc
浏览文件 @
a3850281
...
@@ -11,7 +11,7 @@ using namespace mace;
...
@@ -11,7 +11,7 @@ using namespace mace;
using
namespace
mace
::
kernels
;
using
namespace
mace
::
kernels
;
static
void
AddNBenchmark
(
int
iters
,
int
n
,
int
type
)
{
static
void
AddNBenchmark
(
int
iters
,
int
n
,
int
type
)
{
const
int64
tot
=
static_cast
<
int64
>
(
iters
)
*
n
*
3
;
const
int64
_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
*
3
;
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
...
@@ -35,7 +35,7 @@ static void AddNBenchmark(int iters, int n, int type) {
...
@@ -35,7 +35,7 @@ static void AddNBenchmark(int iters, int n, int type) {
float
*
input3
=
input_tensor3
.
mutable_data
<
float
>
();
float
*
input3
=
input_tensor3
.
mutable_data
<
float
>
();
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
for
(
int64
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
n
;
++
i
)
{
input1
[
i
]
=
nd
(
gen
);
input1
[
i
]
=
nd
(
gen
);
input2
[
i
]
=
nd
(
gen
);
input2
[
i
]
=
nd
(
gen
);
input3
[
i
]
=
nd
(
gen
);
input3
[
i
]
=
nd
(
gen
);
...
...
mace/kernels/benchmark/relu_benchmark.cc
浏览文件 @
a3850281
...
@@ -11,7 +11,7 @@ using namespace mace;
...
@@ -11,7 +11,7 @@ using namespace mace;
using
namespace
mace
::
kernels
;
using
namespace
mace
::
kernels
;
static
void
ReluBenchmark
(
int
iters
,
int
n
,
int
type
)
{
static
void
ReluBenchmark
(
int
iters
,
int
n
,
int
type
)
{
const
int64
tot
=
static_cast
<
int64
>
(
iters
)
*
n
;
const
int64
_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
mace
::
testing
::
BytesProcessed
(
tot
*
(
sizeof
(
float
)));
...
@@ -25,7 +25,7 @@ static void ReluBenchmark(int iters, int n, int type) {
...
@@ -25,7 +25,7 @@ static void ReluBenchmark(int iters, int n, int type) {
output_tensor
.
ResizeLike
(
input_tensor
);
output_tensor
.
ResizeLike
(
input_tensor
);
float
*
input
=
input_tensor
.
mutable_data
<
float
>
();
float
*
input
=
input_tensor
.
mutable_data
<
float
>
();
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
for
(
int64
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
n
;
++
i
)
{
input
[
i
]
=
nd
(
gen
);
input
[
i
]
=
nd
(
gen
);
}
}
...
...
mace/kernels/neon/addn_neon.cc
浏览文件 @
a3850281
...
@@ -14,7 +14,7 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
...
@@ -14,7 +14,7 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
int
n
=
input_tensor
.
size
();
int
n
=
input_tensor
.
size
();
MACE_CHECK
(
n
>
1
);
MACE_CHECK
(
n
>
1
);
MACE_CHECK_NOTNULL
(
input_tensor
[
0
]);
MACE_CHECK_NOTNULL
(
input_tensor
[
0
]);
int64
size
=
input_tensor
[
0
]
->
size
();
int64
_t
size
=
input_tensor
[
0
]
->
size
();
output_tensor
->
ResizeLike
(
input_tensor
[
0
]);
output_tensor
->
ResizeLike
(
input_tensor
[
0
]);
float
*
output
=
output_tensor
->
mutable_data
<
float
>
();
float
*
output
=
output_tensor
->
mutable_data
<
float
>
();
vector
<
const
float
*>
inputs
(
n
);
vector
<
const
float
*>
inputs
(
n
);
...
@@ -22,19 +22,19 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
...
@@ -22,19 +22,19 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
inputs
[
i
]
=
input_tensor
[
i
]
->
data
<
float
>
();
inputs
[
i
]
=
input_tensor
[
i
]
->
data
<
float
>
();
}
}
int64
cost
=
size
*
n
;
int64
_t
cost
=
size
*
n
;
int64
groups
=
1
;
int64
_t
groups
=
1
;
if
(
cost
>
kCostPerGroup
)
{
if
(
cost
>
kCostPerGroup
)
{
groups
=
cost
/
kCostPerGroup
;
groups
=
cost
/
kCostPerGroup
;
}
}
int64
element_per_group
=
size
/
groups
;
int64
_t
element_per_group
=
size
/
groups
;
#pragma omp parallel for num_threads(1) // no significant performance improve
#pragma omp parallel for num_threads(1) // no significant performance improve
for
(
int64
i
=
0
;
i
<
size
;
i
+=
element_per_group
)
{
for
(
int64
_t
i
=
0
;
i
<
size
;
i
+=
element_per_group
)
{
int64
count
=
std
::
min
(
element_per_group
,
size
-
i
);
int64
_t
count
=
std
::
min
(
element_per_group
,
size
-
i
);
int
nn
=
count
>>
2
;
int
nn
=
count
>>
2
;
int
remain
=
count
-
(
nn
<<
2
);
int
remain
=
count
-
(
nn
<<
2
);
for
(
int64
j
=
0
;
j
<
n
;
++
j
)
{
for
(
int64
_t
j
=
0
;
j
<
n
;
++
j
)
{
const
float
*
inptr
=
inputs
[
j
]
+
i
;
const
float
*
inptr
=
inputs
[
j
]
+
i
;
float
*
outptr
=
output
+
i
;
float
*
outptr
=
output
+
i
;
for
(
int
k
=
0
;
k
<
nn
;
++
k
)
{
for
(
int
k
=
0
;
k
<
nn
;
++
k
)
{
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
a3850281
...
@@ -34,18 +34,18 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
...
@@ -34,18 +34,18 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
T
new_scale
,
new_offset
;
T
new_scale
,
new_offset
;
int
count
=
sample_size
>>
2
;
int
count
=
sample_size
>>
2
;
int
remain_count
=
sample_size
-
count
;
int
remain_count
=
sample_size
-
count
;
for
(
TIndex
c
=
0
;
c
<
channel
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
this
->
variance_epsilon_
);
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
this
->
variance_epsilon_
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
TIndex
pos
=
c
*
sample_size
;
index_t
pos
=
c
*
sample_size
;
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
for
(
TIndex
i
=
0
;
i
<
n
;
++
i
)
{
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
float
*
input_sample_ptr
=
input
+
pos
;
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
for
(
TIndex
j
=
0
;
j
<
count
;
++
j
)
{
for
(
index_t
j
=
0
;
j
<
count
;
++
j
)
{
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
float32x4_t
output_f
=
new_offset_f
;
float32x4_t
output_f
=
new_offset_f
;
output_f
=
vfmaq_f32
(
output_f
,
input_f
,
new_scale_f
);
output_f
=
vfmaq_f32
(
output_f
,
input_f
,
new_scale_f
);
...
@@ -53,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
...
@@ -53,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
input_sample_ptr
+=
4
;
input_sample_ptr
+=
4
;
output_sample_ptr
+=
4
;
output_sample_ptr
+=
4
;
}
}
for
(
TIndex
j
=
0
;
j
<
remain_count
;
++
j
)
{
for
(
index_t
j
=
0
;
j
<
remain_count
;
++
j
)
{
*
output_sample_ptr
=
new_scale
*
*
input_sample_ptr
+
new_offset
;
*
output_sample_ptr
=
new_scale
*
*
input_sample_ptr
+
new_offset
;
++
output_sample_ptr
;
++
output_sample_ptr
;
++
input_sample_ptr
;
++
input_sample_ptr
;
...
...
mace/kernels/neon/relu_neon.cc
浏览文件 @
a3850281
...
@@ -10,14 +10,14 @@ namespace kernels {
...
@@ -10,14 +10,14 @@ namespace kernels {
void
NeonReluFuntion_float
(
const
Tensor
*
input_tensor
,
void
NeonReluFuntion_float
(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
)
{
Tensor
*
output_tensor
)
{
int64
size
=
input_tensor
->
size
();
int64
_t
size
=
input_tensor
->
size
();
output_tensor
->
ResizeLike
(
input_tensor
);
output_tensor
->
ResizeLike
(
input_tensor
);
const
float
*
input
=
input_tensor
->
data
<
float
>
();
const
float
*
input
=
input_tensor
->
data
<
float
>
();
float
*
output
=
output_tensor
->
mutable_data
<
float
>
();
float
*
output
=
output_tensor
->
mutable_data
<
float
>
();
#pragma omp parallel for num_threads(1) // no significant performance improve
#pragma omp parallel for num_threads(1) // no significant performance improve
for
(
int64
i
=
0
;
i
<
size
;
i
+=
kCostPerGroup
)
{
for
(
int64
_t
i
=
0
;
i
<
size
;
i
+=
kCostPerGroup
)
{
int64
count
=
std
::
min
(
static_cast
<
int64
>
(
kCostPerGroup
),
size
-
i
);
int64
_t
count
=
std
::
min
(
static_cast
<
int64_t
>
(
kCostPerGroup
),
size
-
i
);
int
nn
=
count
>>
2
;
int
nn
=
count
>>
2
;
int
remain
=
count
-
(
nn
<<
2
);
int
remain
=
count
-
(
nn
<<
2
);
const
float
*
inptr
=
input
+
i
;
const
float
*
inptr
=
input
+
i
;
...
...
mace/kernels/relu.h
浏览文件 @
a3850281
...
@@ -12,12 +12,12 @@ namespace kernels {
...
@@ -12,12 +12,12 @@ namespace kernels {
template
<
typename
T
>
template
<
typename
T
>
void
ReluFuntion
(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
)
{
void
ReluFuntion
(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
)
{
int64
size
=
input_tensor
->
size
();
int64
_t
size
=
input_tensor
->
size
();
output_tensor
->
ResizeLike
(
input_tensor
);
output_tensor
->
ResizeLike
(
input_tensor
);
const
T
*
input
=
input_tensor
->
data
<
T
>
();
const
T
*
input
=
input_tensor
->
data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
for
(
int64
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
size
;
++
i
)
{
output
[
i
]
=
std
::
max
(
input
[
i
],
static_cast
<
T
>
(
0
));
output
[
i
]
=
std
::
max
(
input
[
i
],
static_cast
<
T
>
(
0
));
}
}
}
}
...
...
mace/kernels/test/addn_neon_test.cc
浏览文件 @
a3850281
...
@@ -15,7 +15,7 @@ TEST(NeonTest, AddN) {
...
@@ -15,7 +15,7 @@ TEST(NeonTest, AddN) {
std
::
mt19937
gen
(
rd
());
std
::
mt19937
gen
(
rd
());
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
int64
count
=
100000
;
int64
_t
count
=
100000
;
Tensor
input_tensor1
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
Tensor
input_tensor1
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
input_tensor1
.
Resize
({
100
,
1000
});
input_tensor1
.
Resize
({
100
,
1000
});
Tensor
input_tensor2
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
Tensor
input_tensor2
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
...
@@ -37,7 +37,7 @@ TEST(NeonTest, AddN) {
...
@@ -37,7 +37,7 @@ TEST(NeonTest, AddN) {
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
float
*
output_neon
=
output_tensor_neon
.
mutable_data
<
float
>
();
float
*
output_neon
=
output_tensor_neon
.
mutable_data
<
float
>
();
for
(
int64
i
=
0
;
i
<
count
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
count
;
++
i
)
{
input1
[
i
]
=
nd
(
gen
);
input1
[
i
]
=
nd
(
gen
);
input2
[
i
]
=
nd
(
gen
);
input2
[
i
]
=
nd
(
gen
);
input3
[
i
]
=
nd
(
gen
);
input3
[
i
]
=
nd
(
gen
);
...
@@ -48,7 +48,7 @@ TEST(NeonTest, AddN) {
...
@@ -48,7 +48,7 @@ TEST(NeonTest, AddN) {
ASSERT_EQ
(
count
,
output_tensor
.
size
());
ASSERT_EQ
(
count
,
output_tensor
.
size
());
ASSERT_EQ
(
count
,
output_tensor_neon
.
size
());
ASSERT_EQ
(
count
,
output_tensor_neon
.
size
());
for
(
int64
i
=
0
;
i
<
count
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
count
;
++
i
)
{
ASSERT_FLOAT_EQ
(
output
[
i
],
output_neon
[
i
]);
ASSERT_FLOAT_EQ
(
output
[
i
],
output_neon
[
i
]);
}
}
}
}
...
...
mace/kernels/test/relu_neon_test.cc
浏览文件 @
a3850281
...
@@ -15,7 +15,7 @@ TEST(NeonTest, Relu) {
...
@@ -15,7 +15,7 @@ TEST(NeonTest, Relu) {
std
::
mt19937
gen
(
rd
());
std
::
mt19937
gen
(
rd
());
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
int64
count
=
100000
;
int64
_t
count
=
100000
;
Tensor
input_tensor
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
Tensor
input_tensor
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
input_tensor
.
Resize
({
100
,
1000
});
input_tensor
.
Resize
({
100
,
1000
});
Tensor
output_tensor
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
Tensor
output_tensor
(
cpu_allocator
(),
DataType
::
DT_FLOAT
);
...
@@ -27,7 +27,7 @@ TEST(NeonTest, Relu) {
...
@@ -27,7 +27,7 @@ TEST(NeonTest, Relu) {
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
float
*
output
=
output_tensor
.
mutable_data
<
float
>
();
float
*
output_neon
=
output_tensor_neon
.
mutable_data
<
float
>
();
float
*
output_neon
=
output_tensor_neon
.
mutable_data
<
float
>
();
for
(
int64
i
=
0
;
i
<
count
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
count
;
++
i
)
{
input
[
i
]
=
nd
(
gen
);
input
[
i
]
=
nd
(
gen
);
}
}
...
@@ -36,7 +36,7 @@ TEST(NeonTest, Relu) {
...
@@ -36,7 +36,7 @@ TEST(NeonTest, Relu) {
ASSERT_EQ
(
count
,
output_tensor
.
size
());
ASSERT_EQ
(
count
,
output_tensor
.
size
());
ASSERT_EQ
(
count
,
output_tensor_neon
.
size
());
ASSERT_EQ
(
count
,
output_tensor_neon
.
size
());
for
(
int64
i
=
0
;
i
<
count
;
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
count
;
++
i
)
{
ASSERT_FLOAT_EQ
(
output
[
i
],
output_neon
[
i
]);
ASSERT_FLOAT_EQ
(
output
[
i
],
output_neon
[
i
]);
}
}
}
}
...
...
mace/ops/batch_norm.h
浏览文件 @
a3850281
...
@@ -33,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
...
@@ -33,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
0
);
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
output
->
ResizeLike
(
input
);
const
TIndex
n
=
input
->
dim
(
0
);
const
index_t
n
=
input
->
dim
(
0
);
const
TIndex
channel
=
input
->
dim
(
1
);
const
index_t
channel
=
input
->
dim
(
1
);
const
TIndex
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
float
*
input_ptr
=
input
->
data
<
float
>
();
const
float
*
input_ptr
=
input
->
data
<
float
>
();
const
float
*
scale_ptr
=
scale
->
data
<
float
>
();
const
float
*
scale_ptr
=
scale
->
data
<
float
>
();
...
...
mace/ops/ops_test_util.h
浏览文件 @
a3850281
...
@@ -43,7 +43,7 @@ class OpsTestBase : public ::testing::Test {
...
@@ -43,7 +43,7 @@ class OpsTestBase : public ::testing::Test {
}
}
public:
public:
template
<
typename
T
>
template
<
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
TIndex
>&
shape
,
const
std
::
vector
<
T
>&
data
)
{
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>&
shape
,
const
std
::
vector
<
T
>&
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
float
>
();
float
*
input_data
=
input
->
mutable_data
<
float
>
();
...
@@ -70,7 +70,7 @@ class OpsTestBase : public ::testing::Test {
...
@@ -70,7 +70,7 @@ class OpsTestBase : public ::testing::Test {
};
};
template
<
typename
T
>
template
<
typename
T
>
Tensor
CreateTensor
(
const
std
::
vector
<
TIndex
>&
shape
,
const
std
::
vector
<
T
>&
data
)
{
Tensor
CreateTensor
(
const
std
::
vector
<
index_t
>&
shape
,
const
std
::
vector
<
T
>&
data
)
{
Tensor
res
(
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
Tensor
res
(
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
res
.
Resize
(
shape
);
res
.
Resize
(
shape
);
float
*
input_data
=
res
.
mutable_data
<
float
>
();
float
*
input_data
=
res
.
mutable_data
<
float
>
();
...
@@ -90,7 +90,7 @@ inline std::string ShapeToString(const Tensor& x) {
...
@@ -90,7 +90,7 @@ inline std::string ShapeToString(const Tensor& x) {
std
::
stringstream
stream
;
std
::
stringstream
stream
;
for
(
int
i
=
0
;
i
<
x
.
dim_size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
x
.
dim_size
();
i
++
)
{
if
(
i
>
0
)
stream
<<
","
;
if
(
i
>
0
)
stream
<<
","
;
int64
dim
=
x
.
dim
(
i
);
int64
_t
dim
=
x
.
dim
(
i
);
if
(
dim
<
0
)
{
if
(
dim
<
0
)
{
stream
<<
"?"
;
stream
<<
"?"
;
}
else
{
}
else
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录