Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
33122506
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
33122506
编写于
8月 02, 2018
作者:
qnqinan
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/develop' into develop
上级
2c668b4d
b5c14d86
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
166 addition
and
250 deletion
+166
-250
src/fpga/api/fpga_api.cpp
src/fpga/api/fpga_api.cpp
+10
-12
src/fpga/api/fpga_api.h
src/fpga/api/fpga_api.h
+96
-54
src/framework/tensor.h
src/framework/tensor.h
+16
-0
src/io/executor.cpp
src/io/executor.cpp
+1
-1
src/memory/t_malloc.cpp
src/memory/t_malloc.cpp
+3
-3
src/operators/feed_op.h
src/operators/feed_op.h
+1
-1
src/operators/kernel/arm/dropout_kernel.cpp
src/operators/kernel/arm/dropout_kernel.cpp
+0
-2
src/operators/kernel/dropout_kernel.h
src/operators/kernel/dropout_kernel.h
+1
-1
src/operators/kernel/fpga/concat_kernel.cpp
src/operators/kernel/fpga/concat_kernel.cpp
+1
-1
src/operators/kernel/im2sequence_kernel.h
src/operators/kernel/im2sequence_kernel.h
+1
-3
src/operators/kernel/mali/fushion_fc_kernel.cpp
src/operators/kernel/mali/fushion_fc_kernel.cpp
+0
-2
src/operators/kernel/prelu_kernel.h
src/operators/kernel/prelu_kernel.h
+1
-1
src/operators/kernel/scale_kernel.h
src/operators/kernel/scale_kernel.h
+1
-1
src/operators/kernel/slice_kernel.h
src/operators/kernel/slice_kernel.h
+1
-1
src/operators/op_param.h
src/operators/op_param.h
+22
-22
test/framework/test_load.cpp
test/framework/test_load.cpp
+5
-5
test/test_helper.h
test/test_helper.h
+1
-0
test_gemm.cpp
test_gemm.cpp
+0
-136
tools/build.sh
tools/build.sh
+2
-2
tools/quantification/convert.cpp
tools/quantification/convert.cpp
+2
-2
tools/quantification/src/block_desc_local.h
tools/quantification/src/block_desc_local.h
+1
-0
未找到文件。
src/fpga/api/fpga_api.cpp
浏览文件 @
33122506
...
...
@@ -29,15 +29,15 @@ limitations under the License. */
#include "fpga/api/fpga_api.h"
namespace
paddle
{
namespace
mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
api
{
static
int
fd
=
-
1
;
static
const
char
*
device_path
=
"/dev/fpgadrv0"
;
static
inline
int
do_ioctl
(
int
req
,
void
*
arg
)
{
return
ioctl
(
req
,
arg
);
}
static
inline
int
do_ioctl
(
int
req
,
void
*
arg
)
{
return
ioctl
(
req
,
(
long
unsigned
int
)
arg
);
}
int
open_device
()
{
if
(
fd
==
-
1
)
{
...
...
@@ -48,8 +48,8 @@ int open_device() {
// memory management;
void
*
fpga_malloc
(
size_t
size
)
{
return
reinterpret_cast
<
(
void
*
)
>
mmap64
(
NULL
,
size
,
PROT_READ
|
PROT_WRITE
,
MAP_SHARED
,
fd
,
0
);
return
reinterpret_cast
<
void
*>
(
mmap64
(
NULL
,
size
,
PROT_READ
|
PROT_WRITE
,
MAP_SHARED
,
fd
,
0
)
);
}
void
fpga_free
(
void
*
ptr
)
{
munmap
(
ptr
,
0
);
}
...
...
@@ -58,11 +58,9 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy
(
dest
,
src
,
num
);
}
int
ComputeFpgaConv
(
struct
FpgaConvA
rgs
)
{}
int
ComputeFpgaPool
(
struct
FpgaPoolA
rgs
)
{}
int
ComputeFpgaEWAdd
(
struct
FpgaEWAddA
rgs
)
{}
int
ComputeFpgaConv
(
struct
ConvArgs
a
rgs
)
{}
int
ComputeFpgaPool
(
struct
PoolingArgs
a
rgs
)
{}
int
ComputeFpgaEWAdd
(
struct
EWAddArgs
a
rgs
)
{}
}
// namespace api
}
// namespace fpga
}
// namespace mobile
}
// namespace paddle
}
// namespace paddle_mobile
src/fpga/api/fpga_api.h
浏览文件 @
33122506
...
...
@@ -31,90 +31,132 @@ void* fpga_malloc(size_t size);
void
fpga_free
(
void
*
ptr
);
void
fpga_copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
struct
FpgaVersionArgs
{
void
*
buf
;
};
struct
MemoryToPhysicalArgs
{
const
void
*
src
;
uint64_t
physical
;
struct
VersionArgs
{
void
*
buffer
;
};
struct
MemoryCopyArgs
{
void
*
src
;
void
*
dst
;
void
*
d
e
st
;
size_t
size
;
};
struct
FpgaQuantArgs
{
float
scale
;
};
struct
FpgaBNArgs
{
bool
enabled
=
false
;
void
*
bias_addr
;
void
*
scale_addr
;
struct
BNArgs
{
bool
enabled
;
void
*
bias_address
;
void
*
scale_address
;
};
struct
FpgaKernelArgs
{
/**
Conv and Pooling kernel
*/
struct
KernelArgs
{
uint32_t
width
;
uint32_t
height
;
uint32_t
stride_h
;
uint32_t
stride_w
;
uint32_t
stride_h
;
};
struct
FpgaImage
Args
{
uint32_t
width
;
uint32_t
height
;
struct
ImageInput
Args
{
void
*
address
;
// input featuremap virtual address
float
*
scale_address
;
// input scale address
;
uint32_t
channels
;
uint32_t
pad_h
;
uint32_t
pad_w
;
uint32_t
width
;
// featuremap width
uint32_t
height
;
uint32_t
pad_width
;
// padding width;
uint32_t
pad_height
;
};
struct
ImageOutputArgs
{
void
*
address
;
// output result address;
float
*
scale_address
;
// output scale address;
};
struct
Fpga
ConvArgs
{
struct
ConvArgs
{
bool
relu_enabled
;
struct
FpgaBNArgs
BNargs
;
void
*
image_addr
;
void
*
filter_addr
;
void
*
bias_addr
;
void
*
output_addr
;
float
quant_scale
;
struct
FpgaImageArgs
image
;
void
*
bias_address
;
void
*
filter_address
;
uint32_t
filter_num
;
uint32_t
group_num
;
struct
FpgaKernelArgs
kernel
;
struct
BNArgs
bn
;
struct
KernelArgs
kernel
;
struct
ImageInputArgs
image
;
// input image;
struct
ImageOutputArgs
output
;
};
struct
FpgaPoolArgs
{
void
*
image_addr
;
void
*
output_addr
;
struct
FpgaImageArgs
image
;
struct
FpgaKernelArgs
kernel
;
struct
PoolingArgs
{
struct
KernelArgs
kernel
;
struct
ImageInputArgs
image
;
// input image;
struct
ImageOutputArgs
output
;
};
struct
FpgaEWAddArgs
{
// elementwise add arguments
struct
EWAddArgs
{
bool
relu_enabled
;
void
*
image0_addr
;
void
*
image1_addr
;
void
*
result_addr
;
uint32_t
const0
;
uint32_t
const1
;
uint32_t
data_len
;
// aligned element count
float
const0
;
// output0 = const0 x input0 + const1 x input1;
float
const1
;
struct
ImageInputArgs
image0
;
struct
ImageInputArgs
image1
;
struct
ImageOutputArgs
output
;
};
struct
FpgaRegWriteArgs
{
uint64_t
address
;
//
uint64_t
value
;
};
int
ComputeFpgaConv
(
struct
FpgaConvArgs
args
);
int
ComputeFpgaPool
(
struct
FpgaPoolArgs
args
);
int
ComputeFpgaEWAdd
(
struct
FpgaEWAddArgs
args
);
struct
FpgaRegReadArgs
{
uint64_t
address
;
uint64_t
value
;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
#define IOCTL_SEPARATOR_0 10
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
#define IOCTL_SEPARATOR_1 20
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
enum
FPGA_ERR_TYPE
{
ERR_IOCTL_CMD
=
-
1
,
ERR_TIMEOUT
=
-
2
,
ERR_COMPLETION_TIMEOUT
=
-
3
,
ERR_INVALID_FPGA_ADDR
=
-
4
,
ERR_NOMEM
=
-
5
,
ERR_NO_RESERVE_MEM
=
-
6
,
ERR_COPY_FROM_USER
=
-
7
,
ERR_COPY_TO_USER
=
-
8
,
ERR_DEL_TIMER
=
-
9
,
ERR_ENABLE_MSI
=
-
10
,
ERR_REGISTER_IRQ
=
-
11
,
ERR_PCIE_REGISTER
=
-
12
,
ERR_PCIE_PROBE
=
-
13
,
ERR_REGISTER_BLOCK
=
-
14
,
ERR_ALLOC_GENDISK
=
-
15
,
ERR_INIT_QUEUE
=
-
16
,
ERR_WAIT
=
-
17
,
ERR_ECC_ERROR
=
-
31
,
ERR_FPGA_FAIL_STOP
=
-
64
,
ERR_FPGA_DEBUG_STOP
=
-
113
,
DEV_TMP_UNAVAILABLE
=
-
128
};
//============================== API =============================
int
ComputeFpgaConv
(
struct
ConvArgs
args
);
int
ComputeFpgaPool
(
struct
PoolingArgs
args
);
int
ComputeFpgaEWAdd
(
struct
EWAddArgs
args
);
}
// namespace fpga
}
// namespace paddle_mobile
src/framework/tensor.h
浏览文件 @
33122506
...
...
@@ -253,6 +253,18 @@ class Tensor {
"Tensor's dims_ is out of bound. "
);
}
#ifdef PADDLE_MOBILE_FPGA
struct
FPGAArgs
{
float
scale
;
inline
float
*
scale_pointer
()
{
return
&
scale
;
}
};
struct
&
fpga_args
()
const
{
return
fpgaArgs_
;
}
#endif
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a
...
...
@@ -319,6 +331,10 @@ class Tensor {
* begins.
*/
size_t
offset_
;
#ifdef PADDLE_MOBILE_FPGA
FPGAArgs
fpgaArgs_
;
#endif
};
#ifdef PADDLE_MOBILE_DEBUG
...
...
src/io/executor.cpp
浏览文件 @
33122506
...
...
@@ -420,6 +420,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
template
class
Executor
<
CPU
,
Precision
::
FP32
>;
template
class
Executor
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
Executor
<
FPGA
,
Precision
::
FP
16
>;
template
class
Executor
<
FPGA
,
Precision
::
FP
32
>;
}
// namespace paddle_mobile
src/memory/t_malloc.cpp
浏览文件 @
33122506
...
...
@@ -27,17 +27,17 @@ namespace memory {
const
int
MALLOC_ALIGN
=
64
;
#ifdef PADDLE_MOBILE_FPGA
namespace
api
=
paddle
::
mobile
::
fpga
::
api
;
namespace
fpga
=
paddle_mobile
::
fpga
;
void
Copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
void
*
Alloc
(
size_t
size
)
{
return
api
::
malloc
(
size
);
}
void
*
Alloc
(
size_t
size
)
{
return
fpga
::
fpga_
malloc
(
size
);
}
void
Free
(
void
*
ptr
)
{
if
(
ptr
)
{
api
::
fpga_free
(
ptr
);
fpga
::
fpga_free
(
ptr
);
}
}
...
...
src/operators/feed_op.h
浏览文件 @
33122506
...
...
@@ -29,7 +29,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorBase
<
DeviceType
>
(
type
,
inputs
,
outputs
,
attrs
,
scope
),
param_
(
inputs
,
outputs
,
attrs
,
*
scope
)
{}
param_
(
inputs
,
outputs
,
attrs
,
scope
.
get
()
)
{}
void
RunImpl
()
const
{
param_
.
Out
()
->
ShareDataWith
(
*
param_
.
InputX
());
}
void
Init
()
{}
...
...
src/operators/kernel/arm/dropout_kernel.cpp
浏览文件 @
33122506
...
...
@@ -14,8 +14,6 @@ limitations under the License. */
#ifdef DROPOUT_OP
#pragma once
#include "operators/kernel/dropout_kernel.h"
#include <operators/math/transform.h>
...
...
src/operators/kernel/dropout_kernel.h
浏览文件 @
33122506
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once
;
#pragma once
namespace
paddle_mobile
{
namespace
operators
{
...
...
src/operators/kernel/fpga/concat_kernel.cpp
浏览文件 @
33122506
...
...
@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam ¶m) const {
for
(
int
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
input
=
inputs
[
i
];
auto
channels
=
input
[
3
];
auto
channels
=
input
->
dims
()
[
3
];
out_offset
+=
channels
;
auto
src
=
input
->
data
<
half
>
();
for
(
int
j
=
0
;
j
<
pixels
;
++
j
)
{
...
...
src/operators/kernel/im2sequence_kernel.h
浏览文件 @
33122506
...
...
@@ -20,13 +20,11 @@ limitations under the License. */
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
#pragma once
;
#pragma once
namespace
paddle_mobile
{
namespace
operators
{
using
namespace
framework
;
template
<
typename
DeviceType
,
typename
T
>
class
Im2SequenceKernel
:
public
framework
::
OpKernelBase
<
DeviceType
,
Im2SequenceParam
>
{
...
...
src/operators/kernel/mali/fushion_fc_kernel.cpp
浏览文件 @
33122506
...
...
@@ -14,8 +14,6 @@ limitations under the License. */
#ifdef FUSION_FC_OP
#pragma once
#include "operators/kernel/fusion_fc_kernel.h"
namespace
paddle_mobile
{
...
...
src/operators/kernel/prelu_kernel.h
浏览文件 @
33122506
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once
;
#pragma once
namespace
paddle_mobile
{
namespace
operators
{
...
...
src/operators/kernel/scale_kernel.h
浏览文件 @
33122506
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once
;
#pragma once
namespace
paddle_mobile
{
namespace
operators
{
...
...
src/operators/kernel/slice_kernel.h
浏览文件 @
33122506
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once
;
#pragma once
namespace
paddle_mobile
{
namespace
operators
{
...
...
src/operators/op_param.h
浏览文件 @
33122506
...
...
@@ -262,11 +262,11 @@ class ElementwiseAddParam : OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
Fpga
EWAddArgs
fpga_EW_add_args
;
fpga
::
EWAddArgs
fpga_EW_add_args
;
public:
const
fpga
::
Fpga
EWAddArgs
&
FpgaArgs
()
const
{
return
fpga_EW_add_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Fpga
EWAddArgs
&
args
)
{
fpga_EW_add_args
=
args
;
}
const
fpga
::
EWAddArgs
&
FpgaArgs
()
const
{
return
fpga_EW_add_args
;
}
void
SetFpgaArgs
(
const
fpga
::
EWAddArgs
&
args
)
{
fpga_EW_add_args
=
args
;
}
#endif
};
...
...
@@ -465,11 +465,11 @@ class PoolParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaPool
Args
fpga_pool_args
;
fpga
::
Pooling
Args
fpga_pool_args
;
public:
const
fpga
::
FpgaPool
Args
&
FpgaArgs
()
const
{
return
fpga_pool_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaPool
Args
&
args
)
{
fpga_pool_args
=
args
;
}
const
fpga
::
Pooling
Args
&
FpgaArgs
()
const
{
return
fpga_pool_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Pooling
Args
&
args
)
{
fpga_pool_args
=
args
;
}
#endif
};
#endif
...
...
@@ -651,10 +651,10 @@ class MultiClassNMSParam : public OpParam {
class
FeedParam
:
public
OpParam
{
public:
FeedParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
const
&
scope
)
{
input_x_
=
InputXFrom
<
LoDTensor
>
(
inputs
,
scope
);
out_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
auto
var
=
scope
.
Var
(
"batch_size"
);
const
AttributeMap
&
attrs
,
Scope
*
scope
)
{
input_x_
=
InputXFrom
<
LoDTensor
>
(
inputs
,
*
scope
);
out_
=
OutFrom
<
LoDTensor
>
(
outputs
,
*
scope
);
auto
var
=
scope
->
Var
(
"batch_size"
);
batch_size
=
var
->
GetValue
<
int
>
();
}
const
Tensor
*
InputX
()
const
{
return
input_x_
;
}
...
...
@@ -933,11 +933,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
Fpga
ConvArgs
fpga_conv_args
;
fpga
::
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
Fpga
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Fpga
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
...
...
@@ -991,11 +991,11 @@ class FusionConvAddParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
Fpga
ConvArgs
fpga_conv_args
;
fpga
::
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
Fpga
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Fpga
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
...
...
@@ -1096,11 +1096,11 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
Fpga
ConvArgs
fpga_conv_args
;
fpga
::
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
Fpga
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Fpga
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1190,11 +1190,11 @@ class FusionConvAddBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
Fpga
ConvArgs
fpga_conv_args
;
fpga
::
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
Fpga
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Fpga
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
test/framework/test_load.cpp
浏览文件 @
33122506
...
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "../test_helper.h"
#include "io/loader.h"
...
...
@@ -20,12 +22,10 @@ int main() {
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
// auto program = loader.Load(g_googlenet, true);
// auto program = loader.Load(g_mobilenet_ssd, true);
auto
program
=
loader
.
Load
(
g_mobilenet_ssd
,
true
);
// auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine +
// "/params", true);
auto
program
=
loader
.
Load
(
std
::
string
(
g_ocr
)
+
"/model"
,
std
::
string
(
g_ocr
)
+
"/params"
,
false
);
// program.originProgram->Description("program desc: ");
return
0
;
}
test/test_helper.h
浏览文件 @
33122506
...
...
@@ -24,6 +24,7 @@ limitations under the License. */
#include "framework/ddim.h"
#include "framework/tensor.h"
static
const
char
*
g_ocr
=
"../models/ocr"
;
static
const
char
*
g_mobilenet_ssd
=
"../models/mobilenet+ssd"
;
static
const
char
*
g_mobilenet_ssd_gesture
=
"../models/mobilenet+ssd_gesture"
;
static
const
char
*
g_squeezenet
=
"../models/squeezenet"
;
...
...
test_gemm.cpp
已删除
100644 → 0
浏览文件 @
2c668b4d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <cstdlib>
#include <ctime>
#include "../test_helper.h"
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm.h"
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
void
print_matirx
(
int
m
,
int
n
,
int
ldc
,
float
*
c
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
std
::
cout
<<
c
(
i
,
0
);
for
(
int
j
=
1
;
j
<
n
;
++
j
)
{
std
::
cout
<<
" | "
<<
c
(
i
,
j
);
}
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
int
do_sgemm
(
int
m
,
int
n
,
int
k
,
bool
relu
,
int
t1
,
int
t2
,
int
pr
)
{
int
lda
=
k
;
int
ldb
=
n
;
int
ldc
=
n
;
float
*
a
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
*
k
));
float
*
b
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
k
*
n
));
float
*
c
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
*
n
));
float
*
c1
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
*
n
));
float
*
scale
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
));
float
*
bias
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
m
));
srand
(
unsigned
(
time
(
0
)));
for
(
int
i
=
0
;
i
<
m
*
k
;
++
i
)
{
a
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
k
*
n
;
++
i
)
{
b
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
scale
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
bias
[
i
]
=
t1
+
rand
()
%
t2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
for
(
int
j
=
0
;
j
<
n
;
++
j
)
{
float
r
=
0
;
for
(
int
p
=
0
;
p
<
k
;
p
++
)
{
r
+=
a
(
i
,
p
)
*
b
(
p
,
j
);
}
r
*=
scale
[
i
];
r
+=
bias
[
i
];
if
(
relu
&&
(
r
<
0
))
{
r
=
0
;
}
c1
(
i
,
j
)
=
r
;
}
}
paddle_mobile
::
operators
::
math
::
SgemmWithBn
(
m
,
n
,
k
,
0.9
,
a
,
lda
,
b
,
ldb
,
0.3
,
c
,
ldc
,
relu
,
scale
,
bias
);
int
eq
=
0
;
int
neq
=
0
;
for
(
int
i
=
0
;
i
<
m
*
n
;
++
i
)
{
if
(
static_cast
<
int
>
(
c
[
i
])
==
static_cast
<
int
>
(
c1
[
i
]))
{
++
eq
;
}
else
{
++
neq
;
}
}
if
(
pr
>
0
)
{
std
::
cout
<<
"A:"
<<
std
::
endl
;
print_matirx
(
m
,
k
,
lda
,
a
);
std
::
cout
<<
"B:"
<<
std
::
endl
;
print_matirx
(
k
,
n
,
ldb
,
b
);
std
::
cout
<<
"C:"
<<
std
::
endl
;
print_matirx
(
m
,
n
,
ldc
,
c
);
std
::
cout
<<
"C1:"
<<
std
::
endl
;
print_matirx
(
m
,
n
,
ldc
,
c1
);
}
std
::
cout
<<
"mnk="
<<
m
<<
" "
<<
n
<<
" "
<<
k
<<
" relu="
<<
relu
<<
" eq="
<<
eq
<<
" neq="
<<
neq
<<
std
::
endl
;
paddle_mobile
::
memory
::
Free
(
a
);
paddle_mobile
::
memory
::
Free
(
b
);
paddle_mobile
::
memory
::
Free
(
c
);
paddle_mobile
::
memory
::
Free
(
c1
);
paddle_mobile
::
memory
::
Free
(
scale
);
paddle_mobile
::
memory
::
Free
(
bias
);
return
0
;
}
int
main
()
{
do_sgemm
(
9
,
9
,
9
,
true
,
10
,
10
,
10
);
do_sgemm
(
10
,
6
,
12
,
false
,
10
,
10
,
0
);
do_sgemm
(
512
,
256
,
384
,
false
,
10
,
10
,
0
);
do_sgemm
(
1366
,
768
,
256
,
false
,
10
,
10
,
0
);
do_sgemm
(
1255
,
755
,
333
,
false
,
10
,
10
,
0
);
do_sgemm
(
555
,
777
,
999
,
false
,
10
,
10
,
0
);
do_sgemm
(
10
,
6
,
12
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
512
,
256
,
384
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
1366
,
768
,
256
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
1255
,
755
,
333
,
true
,
-
4
,
10
,
0
);
do_sgemm
(
555
,
777
,
999
,
true
,
-
4
,
10
,
0
);
return
0
;
}
tools/build.sh
浏览文件 @
33122506
...
...
@@ -40,8 +40,8 @@ build_for_android() {
fi
if
[
-z
"
$PLATFORM
"
]
;
then
#
PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
PLATFORM
=
"arm-v8a"
PLATFORM
=
"arm-v7a"
# Users could choose "arm-v8a" platform.
#
PLATFORM="arm-v8a"
fi
if
[
"
${
PLATFORM
}
"
=
"arm-v7a"
]
;
then
...
...
tools/quantification/convert.cpp
浏览文件 @
33122506
...
...
@@ -3,8 +3,8 @@
#include "src/enforce.h"
#include "src/var_desc.h"
#include "src/program_desc.h"
#include <cstring>
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <utility>
...
...
@@ -13,7 +13,7 @@
#include "src/protobuf-c.h"
#include <fstream>
#include <iostream>
#include <limits>
const
size_t
kSize64
=
sizeof
(
uint64_t
);
const
size_t
kSize32
=
sizeof
(
uint32_t
);
...
...
tools/quantification/src/block_desc_local.h
浏览文件 @
33122506
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#include <memory>
#include <vector>
#include "src/var_desc.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录