Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
f14da1e1
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f14da1e1
编写于
7月 30, 2018
作者:
qnqinan
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/develop' into develop
上级
cf86fb69
3d3614f0
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
336 addition
and
120 deletion
+336
-120
src/common/types.h
src/common/types.h
+7
-1
src/fpga/api/fpga_api.h
src/fpga/api/fpga_api.h
+59
-27
src/framework/program/program-optimize/fusion_op_register.h
src/framework/program/program-optimize/fusion_op_register.h
+3
-1
src/framework/tensor.h
src/framework/tensor.h
+5
-3
src/io/executor.cpp
src/io/executor.cpp
+2
-2
src/operators/concat_op.cpp
src/operators/concat_op.cpp
+4
-1
src/operators/concat_op.h
src/operators/concat_op.h
+1
-0
src/operators/kernel/fpga/concat_kernel.cpp
src/operators/kernel/fpga/concat_kernel.cpp
+55
-0
src/operators/op_param.h
src/operators/op_param.h
+56
-77
src/operators/resize_op.h
src/operators/resize_op.h
+1
-1
test_gemm.cpp
test_gemm.cpp
+136
-0
tools/android-cmake/android.toolchain.cmake
tools/android-cmake/android.toolchain.cmake
+3
-1
tools/build.sh
tools/build.sh
+4
-4
tools/op.cmake
tools/op.cmake
+0
-2
未找到文件。
src/common/types.h
浏览文件 @
f14da1e1
...
...
@@ -20,7 +20,9 @@ limitations under the License. */
#include <vector>
namespace
paddle_mobile
{
enum
class
Precision
:
int
{
FP32
=
0
};
enum
class
Precision
:
int
{
FP32
=
0
,
FP16
=
1
};
typedef
int16_t
half
;
template
<
Precision
p
>
struct
PrecisionTrait
{
...
...
@@ -31,6 +33,10 @@ template <>
struct
PrecisionTrait
<
Precision
::
FP32
>
{
typedef
float
ptype
;
};
template
<
>
struct
PrecisionTrait
<
Precision
::
FP16
>
{
typedef
half
ptype
;
};
//! device type
enum
DeviceTypeEnum
{
kINVALID
=
-
1
,
kCPU
=
0
,
kFPGA
=
1
,
kGPU_MALI
=
2
};
...
...
src/fpga/api/fpga_api.h
浏览文件 @
f14da1e1
...
...
@@ -14,36 +14,35 @@ limitations under the License. */
#pragma once
#include <stdint.h>
#include <cstddef>
#include <iostream>
#include <limits>
// memory management;
namespace
paddle
{
namespace
mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
api
{
int
open_device
();
int
close_device
();
void
*
fpga_malloc
(
size_t
size
);
void
fpga_free
(
void
*
ptr
);
void
fpga_copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
void
*
fpga_malloc
(
size_t
size
);
void
fpga_free
(
void
*
ptr
);
void
fpga_copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
struct
FpgaVersionArgs
{
void
*
buf
;
void
*
buf
;
};
struct
MemoryToPhysicalArgs
{
const
void
*
src
;
const
void
*
src
;
uint64_t
physical
;
};
struct
MemoryCopyArgs
{
void
*
src
;
void
*
dst
;
void
*
src
;
void
*
dst
;
size_t
size
;
};
...
...
@@ -51,38 +50,71 @@ struct FpgaQuantArgs {
float
scale
;
};
struct
FpgaBNArgs
{};
struct
FpgaBNArgs
{
bool
enabled
=
false
;
void
*
bias_addr
;
void
*
scale_addr
;
};
struct
FpgaKernelArgs
{
uint32_t
width
;
uint32_t
height
;
uint32_t
stride_h
;
uint32_t
stride_w
;
};
struct
FpgaImageArgs
{
uint32_t
width
;
uint32_t
height
;
uint32_t
channels
;
uint32_t
pad_h
;
uint32_t
pad_w
;
};
struct
FpgaConvArgs
{
bool
enable_BN
=
false
;
bool
enable_Relu
=
false
;
struct
FpgaBNParam
bn_parm
;
bool
relu_enabled
;
struct
FpgaBNArgs
BNargs
;
void
*
image_addr
;
void
*
filter_addr
;
void
*
bias_addr
;
void
*
output_addr
;
float
quant_scale
;
struct
FpgaImageArgs
image
;
uint32_t
filter_num
;
uint32_t
group_num
;
struct
FpgaKernelArgs
kernel
;
};
struct
FpgaPoolArgs
{
bool
enable_BN
=
false
;
struct
FpgaBNParam
bn_parm
;
void
*
image_addr
;
void
*
output_addr
;
struct
FpgaImageArgs
image
;
struct
FpgaKernelArgs
kernel
;
};
struct
FpgaEWAddArgs
{
// only support X + Y
bool
enable_Relu
=
false
;
struct
FpgaEWAddArgs
{
bool
relu_enabled
;
void
*
image0_addr
;
void
*
image1_addr
;
void
*
result_addr
;
uint32_t
const0
;
uint32_t
const1
;
uint32_t
data_len
;
// aligned element count
};
int
ComputeFpgaConv
(
struct
FpgaConvArgs
);
int
ComputeFpgaPool
(
struct
FpgaPoolArgs
);
int
ComputeFpgaEWAdd
(
struct
FpgaEWAddArgs
);
int
ComputeFpgaConv
(
struct
FpgaConvArgs
args
);
int
ComputeFpgaPool
(
struct
FpgaPoolArgs
args
);
int
ComputeFpgaEWAdd
(
struct
FpgaEWAddArgs
args
);
#define IOCTL_FPGA_MAGIC '
FPGA
'
#define IOCTL_FPGA_MAGIC '
CNN
'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct Fpga
Quant
Args)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
}
// namespace api
}
// namespace fpga
}
// namespace mobile
}
// namespace paddle
}
// namespace paddle_mobile
src/framework/program/program-optimize/fusion_op_register.h
浏览文件 @
f14da1e1
...
...
@@ -14,11 +14,13 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "framework/operator.h"
#include "node.h"
#include "
framework/program/program-optimize/
node.h"
namespace
paddle_mobile
{
namespace
framework
{
...
...
src/framework/tensor.h
浏览文件 @
f14da1e1
...
...
@@ -16,14 +16,15 @@ limitations under the License. */
#include <cstdint>
#include <cstring>
#include <fstream>
#include <memory>
#include <string>
#include <type_traits>
#include <typeindex>
#include <vector>
#include "common/enforce.h"
#include <fstream>
#include "common/enforce.h"
#include "common/types.h"
#include "framework/data_layout.h"
#include "framework/ddim.h"
#include "memory/t_malloc.h"
...
...
@@ -63,7 +64,8 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
};
static
inline
size_t
SizeOfType
(
std
::
type_index
type
)
{
SizeOfTypeFunctor
<
int
,
float
,
double
,
int16_t
,
int64_t
,
bool
,
size_t
>
functor
;
SizeOfTypeFunctor
<
int
,
half
,
float
,
double
,
int16_t
,
int64_t
,
bool
,
size_t
>
functor
;
size_t
size
=
functor
(
type
);
PADDLE_MOBILE_ENFORCE
(
size
!=
0UL
,
"Cannot get size of type %s"
,
type
.
name
());
...
...
src/io/executor.cpp
浏览文件 @
f14da1e1
...
...
@@ -187,7 +187,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
memcpy
(
&
max_value
,
*
data
+
sizeof
(
float
),
sizeof
(
float
));
*
data
+=
2
*
sizeof
(
float
);
const
float
factor
=
(
max_value
-
min_value
)
/
255.0
;
uint8_t
*
uint8_data
=
(
uint8_t
*
)
(
*
data
);
uint8_t
*
uint8_data
=
reinterpret_cast
<
uint8_t
*>
(
*
data
);
for
(
int
k
=
0
;
k
<
memory_size
;
++
k
)
{
static_cast
<
float
*>
(
memory
)[
k
]
=
uint8_data
[
k
]
*
factor
+
min_value
;
}
...
...
@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
}
template
class
Executor
<
CPU
,
Precision
::
FP32
>;
template
class
Executor
<
FPGA
,
Precision
::
FP32
>;
template
class
Executor
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
Executor
<
FPGA
,
Precision
::
FP16
>;
}
// namespace paddle_mobile
src/operators/concat_op.cpp
浏览文件 @
f14da1e1
...
...
@@ -14,7 +14,9 @@ limitations under the License. */
#ifdef CONCAT_OP
#include "concat_op.h"
#include <vector>
#include "operators/concat_op.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -68,6 +70,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
REGISTER_OPERATOR_MALI_GPU
(
concat
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
concat
,
ops
::
ConcatOp
);
#endif
#endif
src/operators/concat_op.h
浏览文件 @
f14da1e1
...
...
@@ -53,6 +53,7 @@ USE_OP_CPU(concat);
USE_OP_MALI_GPU
(
concat
);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA
(
concat
);
#endif
#endif
src/operators/kernel/fpga/concat_kernel.cpp
0 → 100644
浏览文件 @
f14da1e1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONCAT_OP
#include "operators/kernel/concat_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ConcatKernel
<
FPGA
,
half
>::
Init
(
ConcatParam
*
param
)
{
return
true
;
}
template
<
>
void
ConcatKernel
<
FPGA
,
half
>::
Compute
(
const
ConcatParam
&
param
)
const
{
auto
inputs
=
param
.
Inputs
();
auto
*
out
=
param
.
Out
();
int64_t
axis
=
param
.
Axis
();
out
->
mutable_data
<
half
>
();
DDim
out_dim
=
out
->
dims
();
int
pixels
=
out_dim
[
1
]
*
out_dim
[
2
];
auto
out_channel
=
out_dim
[
3
];
auto
out_offset
=
0
;
for
(
int
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
input
=
inputs
[
i
];
auto
channels
=
input
[
3
];
out_offset
+=
channels
;
auto
src
=
input
->
data
<
half
>
();
for
(
int
j
=
0
;
j
<
pixels
;
++
j
)
{
auto
dst
=
out
->
data
<
half
>
()
+
out_offset
;
memory
::
Copy
(
dst
,
src
,
sizeof
(
half
));
}
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/op_param.h
浏览文件 @
f14da1e1
...
...
@@ -22,6 +22,9 @@ limitations under the License. */
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam {
Tensor
*
input_y_
;
Tensor
*
out_
;
int
axis_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaEWAddArgs
fpga_EW_add_args
;
public:
const
fpga
::
FpgaEWAddArgs
&
FpgaArgs
()
const
{
return
fpga_EW_add_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaEWAddArgs
&
args
)
{
fpga_EW_add_args
=
args
;
}
#endif
};
#ifdef FUSION_ELEMENTWISEADDRELU_OP
...
...
@@ -450,80 +462,15 @@ class PoolParam : public OpParam {
vector
<
int
>
paddings_
;
bool
ceil_mode_
;
bool
global_pooling_
=
false
;
};
#endif
#ifdef FUSION_POOLBN_OP
class
FusionPoolBNParam
:
OpParam
{
public:
FusionPoolBNParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_
=
InputXFrom
<
LoDTensor
>
(
inputs
,
scope
);
pooling_type_
=
GetAttr
<
string
>
(
"pooling_type"
,
attrs
);
ksize_
=
GetAttr
<
vector
<
int
>>
(
"ksize"
,
attrs
);
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
ceil_mode_
=
GetAttr
<
bool
>
(
"ceil_mode"
,
attrs
);
global_pooling_
=
GetAttr
<
bool
>
(
"global_pooling"
,
attrs
);
output_y_
=
OutputYFrom
<
LoDTensor
>
(
outputs
,
scope
);
input_bias_
=
InputBiasFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_mean_
=
InputMeanFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_scale_
=
InputScaleFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_variance_
=
InputVarianceFrom
<
LoDTensor
>
(
inputs
,
scope
);
epsilon_
=
GetAttr
<
float
>
(
"epsilon"
,
attrs
);
momentum_
=
GetAttr
<
float
>
(
"momentum"
,
attrs
);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const
Tensor
*
Input
()
const
{
return
input_
;
}
const
string
&
PoolingType
()
const
{
return
pooling_type_
;
}
const
vector
<
int
>
&
Ksize
()
const
{
return
ksize_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
const
vector
<
int
>
&
Paddings
()
const
{
return
paddings_
;
}
bool
isCeilMode
()
const
{
return
ceil_mode_
;
}
bool
isGlobalPooling
()
const
{
return
global_pooling_
;
}
Tensor
*
OutputY
()
const
{
return
output_y_
;
}
const
Tensor
*
InputBias
()
const
{
return
input_bias_
;
}
const
Tensor
*
InputMean
()
const
{
return
input_mean_
;
}
const
Tensor
*
InputScale
()
const
{
return
input_scale_
;
}
const
Tensor
*
InputVariance
()
const
{
return
input_variance_
;
}
const
float
&
Epsilon
()
const
{
return
epsilon_
;
}
const
float
&
Momentum
()
const
{
return
momentum_
;
}
const
bool
&
IsTest
()
const
{
return
is_test_
;
}
const
string
&
DataFormat
()
const
{
return
data_format_
;
}
#ifdef PADDLE_MOBILE_FPGA
private:
Tensor
*
input_
;
string
pooling_type_
;
vector
<
int
>
ksize_
;
vector
<
int
>
strides_
;
vector
<
int
>
paddings_
;
bool
ceil_mode_
;
bool
global_pooling_
=
false
;
Tensor
*
output_y_
;
Tensor
*
input_bias_
;
Tensor
*
input_mean_
;
Tensor
*
input_scale_
;
Tensor
*
input_variance_
;
float
epsilon_
;
float
momentum_
;
bool
is_test_
;
string
data_format_
;
fpga
::
FpgaPoolArgs
fpga_pool_args
;
public:
const
fpga
::
FpgaPoolArgs
&
FpgaArgs
()
const
{
return
fpga_pool_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaPoolArgs
&
args
)
{
fpga_pool_args
=
args
;
}
#endif
};
#endif
...
...
@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam {
class
FeedParam
:
public
OpParam
{
public:
FeedParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
&
scope
)
{
const
AttributeMap
&
attrs
,
Scope
const
&
scope
)
{
input_x_
=
InputXFrom
<
LoDTensor
>
(
inputs
,
scope
);
out_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
auto
var
=
scope
.
Var
(
"batch_size"
);
...
...
@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam {
int
x_num_col_dims_
;
int
y_num_col_dims_
;
int
axis_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#ifdef FUSION_FCRELU_OP
...
...
@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam {
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
int
groups
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
Print
&
operator
<<
(
Print
&
printer
,
const
FusionConvAddParam
&
conv_param
);
...
...
@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam {
bool
is_test_
;
Tensor
*
new_bias_
;
Tensor
*
new_scale_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam {
bool
is_test_
;
Tensor
*
new_bias_
;
Tensor
*
new_scale_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam {
};
#endif
#ifdef REGION_OP
class
RegionParam
:
public
OpParam
{};
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/resize_op.h
浏览文件 @
f14da1e1
...
...
@@ -33,7 +33,7 @@ class ResizeOp
DeviceType
,
ResizeParam
,
operators
::
ResizeKernel
<
DeviceType
,
T
>>
{
public:
ResizeOp
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
attrs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
ResizeParam
,
operators
::
ResizeKernel
<
DeviceType
,
T
>>
(
...
...
test_gemm.cpp
0 → 100644
浏览文件 @
f14da1e1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <cstdlib>
#include <ctime>
#include "../test_helper.h"
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm.h"
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
void
print_matirx
(
int
m
,
int
n
,
int
ldc
,
float
*
c
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
std
::
cout
<<
c
(
i
,
0
);
for
(
int
j
=
1
;
j
<
n
;
++
j
)
{
std
::
cout
<<
" | "
<<
c
(
i
,
j
);
}
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
int
do_sgemm
(
int
m
,
int
n
,
int
k
,
bool
relu
,
int
t1
,
int
t2
,
int
pr
)
{
int
lda
=
k
;
int
ldb
=
n
;
int
ldc
=
n
;
float
*
a
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
*
k
));
float
*
b
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
k
*
n
));
float
*
c
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
*
n
));
float
*
c1
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
*
n
));
float
*
scale
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
));
float
*
bias
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
));
srand
(
unsigned
(
time
(
0
)));
for
(
int
i
=
0
;
i
<
m
*
k
;
++
i
)
{
a
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
k
*
n
;
++
i
)
{
b
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
scale
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
bias
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
for
(
int
j
=
0
;
j
<
n
;
++
j
)
{
float
r
=
0
;
for
(
int
p
=
0
;
p
<
k
;
p
++
)
{
r
+=
a
(
i
,
p
)
*
b
(
p
,
j
);
}
r
*=
scale
[
i
];
r
+=
bias
[
i
];
if
(
relu
&&
(
r
<
0
))
{
r
=
0
;
}
c1
(
i
,
j
)
=
r
;
}
}
paddle_mobile
::
operators
::
math
::
SgemmWithBn
(
m
,
n
,
k
,
0.9
,
a
,
lda
,
b
,
ldb
,
0.3
,
c
,
ldc
,
relu
,
scale
,
bias
);
int
eq
=
0
;
int
neq
=
0
;
for
(
int
i
=
0
;
i
<
m
*
n
;
++
i
)
{
if
(
static_cast
<
int
>
(
c
[
i
])
==
static_cast
<
int
>
(
c1
[
i
]))
{
++
eq
;
}
else
{
++
neq
;
}
}
if
(
pr
>
0
)
{
std
::
cout
<<
"A:"
<<
std
::
endl
;
print_matirx
(
m
,
k
,
lda
,
a
);
std
::
cout
<<
"B:"
<<
std
::
endl
;
print_matirx
(
k
,
n
,
ldb
,
b
);
std
::
cout
<<
"C:"
<<
std
::
endl
;
print_matirx
(
m
,
n
,
ldc
,
c
);
std
::
cout
<<
"C1:"
<<
std
::
endl
;
print_matirx
(
m
,
n
,
ldc
,
c1
);
}
std
::
cout
<<
"mnk="
<<
m
<<
" "
<<
n
<<
" "
<<
k
<<
" relu="
<<
relu
<<
" eq="
<<
eq
<<
" neq="
<<
neq
<<
std
::
endl
;
paddle_mobile
::
memory
::
Free
(
a
);
paddle_mobile
::
memory
::
Free
(
b
);
paddle_mobile
::
memory
::
Free
(
c
);
paddle_mobile
::
memory
::
Free
(
c1
);
paddle_mobile
::
memory
::
Free
(
scale
);
paddle_mobile
::
memory
::
Free
(
bias
);
return
0
;
}
int
main
()
{
do_sgemm
(
9
,
9
,
9
,
true
,
10
,
10
,
10
);
do_sgemm
(
10
,
6
,
12
,
false
,
10
,
10
,
0
);
do_sgemm
(
512
,
256
,
384
,
false
,
10
,
10
,
0
);
do_sgemm
(
1366
,
768
,
256
,
false
,
10
,
10
,
0
);
do_sgemm
(
1255
,
755
,
333
,
false
,
10
,
10
,
0
);
do_sgemm
(
555
,
777
,
999
,
false
,
10
,
10
,
0
);
do_sgemm
(
10
,
6
,
12
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
512
,
256
,
384
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
1366
,
768
,
256
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
1255
,
755
,
333
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
555
,
777
,
999
,
true
,
-
4
,
10
,
0
);
return
0
;
}
tools/android-cmake/android.toolchain.cmake
浏览文件 @
f14da1e1
...
...
@@ -65,6 +65,8 @@ endif()
file
(
TO_CMAKE_PATH
"
${
ANDROID_NDK
}
"
ANDROID_NDK
)
# Android NDK revision
message
(
"
${
ANDROID_NDK
}
"
)
file
(
READ
"
${
ANDROID_NDK
}
/source.properties"
ANDROID_NDK_SOURCE_PROPERTIES
)
set
(
ANDROID_NDK_SOURCE_PROPERTIES_REGEX
"^Pkg
\\
.Desc = Android NDK
\n
Pkg
\\
.Revision = ([0-9]+)
\\
."
)
...
...
@@ -159,7 +161,7 @@ endif()
# Default values for configurable variables.
if
(
NOT ANDROID_TOOLCHAIN
)
set
(
ANDROID_TOOLCHAIN
clang
)
set
(
ANDROID_TOOLCHAIN
gcc
)
endif
()
if
(
NOT ANDROID_ABI
)
set
(
ANDROID_ABI armeabi-v7a
)
...
...
tools/build.sh
浏览文件 @
f14da1e1
...
...
@@ -40,8 +40,8 @@ build_for_android() {
fi
if
[
-z
"
$PLATFORM
"
]
;
then
PLATFORM
=
"arm-v7a"
# Users could choose "arm-v8a" platform.
#
PLATFORM="arm-v8a"
#
PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
PLATFORM
=
"arm-v8a"
fi
if
[
"
${
PLATFORM
}
"
=
"arm-v7a"
]
;
then
...
...
@@ -63,7 +63,7 @@ build_for_android() {
TOOLCHAIN_FILE
=
"./tools/android-cmake/android.toolchain.cmake"
ANDROID_ARM_MODE
=
"arm"
if
[
"
${#
NETS
}
"
>
1
]
;
then
if
[
"
${#
NETS
}
"
-gt
1
]
;
then
cmake ..
\
-B
"../build/release/
${
PLATFORM
}
"
\
-DANDROID_ABI
=
"
${
ABI
}
"
\
...
...
@@ -99,7 +99,7 @@ build_for_ios() {
BUILD_DIR
=
../build/release/
"
${
PLATFORM
}
"
/
TOOLCHAIN_FILE
=
"./tools/ios-cmake/ios.toolchain.cmake"
mkdir
-p
"
${
BUILD_DIR
}
"
if
[
"
${#
NETS
}
"
>
1
]
;
then
if
[
"
${#
NETS
}
"
-gt
1
]
;
then
cmake ..
\
-B
"
${
BUILD_DIR
}
"
\
-DCMAKE_BUILD_TYPE
=
"
${
MODE
}
"
\
...
...
tools/op.cmake
浏览文件 @
f14da1e1
...
...
@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET)
set
(
FUSION_CONVADDRELU_OP ON
)
set
(
FUSION_CONVADDBNRELU_OP ON
)
set
(
FUSION_CONVADDBN_OP ON
)
set
(
FUSION_POOLBN_OP ON
)
set
(
FUSION_ELEMENTWISEADDRELU_OP ON
)
set
(
FUSION_FC_OP ON
)
set
(
FUSION_FCRELU_OP ON
)
set
(
REGION_OP ON
)
set
(
POOL_OP ON
)
set
(
CONCAT_OP ON
)
set
(
SOFTMAX_OP ON
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录