Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
5ec7afed
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ec7afed
编写于
9月 04, 2018
作者:
Z
zhangyang0701
提交者:
GitHub
9月 04, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #903 from zhangyang0701/develop
Format data for FPGA track. close
#900
上级
88960828
735bf697
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
377 addition
and
42 deletion
+377
-42
src/fpga/api.cpp
src/fpga/api.cpp
+60
-0
src/fpga/api.h
src/fpga/api.h
+8
-0
src/fpga/bias_scale.cpp
src/fpga/bias_scale.cpp
+25
-4
src/fpga/bias_scale.h
src/fpga/bias_scale.h
+2
-0
src/fpga/filter.cpp
src/fpga/filter.cpp
+178
-9
src/fpga/filter.h
src/fpga/filter.h
+5
-4
src/fpga/image.cpp
src/fpga/image.cpp
+54
-0
src/fpga/image.h
src/fpga/image.h
+13
-0
src/framework/tensor.h
src/framework/tensor.h
+32
-25
未找到文件。
src/fpga/api.cpp
浏览文件 @
5ec7afed
...
@@ -26,8 +26,14 @@ limitations under the License. */
...
@@ -26,8 +26,14 @@ limitations under the License. */
#include <cmath>
#include <cmath>
#include <cstdio>
#include <cstdio>
#include <cstring>
#include <cstring>
#include <vector>
#include "api.h"
#include "api.h"
#include "bias_scale.h"
#include "common/enforce.h"
#include "common/types.h"
#include "filter.h"
#include "image.h"
#define FPGA_TEST_MODE
#define FPGA_TEST_MODE
#ifdef FPGA_TEST_MODE
#ifdef FPGA_TEST_MODE
...
@@ -164,5 +170,59 @@ int PerformBypass(const struct BypassArgs &args) {
...
@@ -164,5 +170,59 @@ int PerformBypass(const struct BypassArgs &args) {
return
do_ioctl
(
IOCTL_CONFIG_BYPASS
,
&
args
);
return
do_ioctl
(
IOCTL_CONFIG_BYPASS
,
&
args
);
}
}
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
auto
dims
=
image_tensor
->
dims
();
int
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
image_tensor
->
mutable_data
<
float
>
();
size_t
memory_size
=
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
image
::
format_image
(
&
new_data
,
channel
,
height
,
width
);
image_tensor
->
reset_data_ptr
(
new_data
);
}
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
)
{
auto
dims
=
ofm_tensor
->
dims
();
int
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
size_t
memory_size
=
height
*
align_to_x
(
channel
*
width
,
IMAGE_ALIGNMENT
)
*
sizeof
(
half
);
ofm_tensor
->
reset_data_ptr
(
fpga_malloc
(
memory_size
));
}
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
int
num
=
dims
[
0
],
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
filter_tensor
->
mutable_data
<
float
>
();
size_t
memory_size
=
num
*
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
float
max_value
=
filter
::
find_max
(
new_data
,
num
*
channel
*
height
*
width
);
filter
::
format_filter
(
&
new_data
,
num
,
channel
,
height
,
width
,
group_num
,
max_value
);
filter_tensor
->
reset_data_ptr
(
new_data
);
}
void
format_fc_matrix
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
,
int
height
,
int
width
)
{
auto
dims
=
filter_tensor
->
dims
();
PADDLE_MOBILE_ENFORCE
(
dims
[
0
]
%
(
height
*
width
)
==
0
,
"Filter number should be divisible by group number"
);
int
num
=
dims
[
1
],
channel
=
dims
[
0
]
/
height
/
width
;
auto
data_ptr
=
filter_tensor
->
mutable_data
<
float
>
();
size_t
memory_size
=
num
*
channel
*
height
*
width
*
sizeof
(
float
);
float
*
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
float
max_value
=
filter
::
find_max
(
new_data
,
num
*
channel
*
height
*
width
);
filter
::
format_filter
(
&
new_data
,
num
,
channel
,
height
,
width
,
group_num
,
max_value
);
filter_tensor
->
reset_data_ptr
(
new_data
);
}
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
)
{
bias_scale
::
format_bias_scale_array
(
bias_scale_array
,
element_num_per_division
,
num
);
}
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/api.h
浏览文件 @
5ec7afed
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <cstddef>
#include <cstddef>
#include <iostream>
#include <iostream>
#include <limits>
#include <limits>
#include "framework/tensor.h"
// memory management;
// memory management;
...
@@ -175,6 +176,13 @@ int ComputeFpgaPool(const struct PoolingArgs& args);
...
@@ -175,6 +176,13 @@ int ComputeFpgaPool(const struct PoolingArgs& args);
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
);
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
);
static
inline
int
align_to_x
(
int
num
,
int
x
)
{
return
(
num
+
x
-
1
)
/
x
*
x
;
}
static
inline
int
align_to_x
(
int
num
,
int
x
)
{
return
(
num
+
x
-
1
)
/
x
*
x
;
}
void
format_image
(
framework
::
Tensor
*
image_tensor
);
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
);
// only allocate memory
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
);
void
format_fc_matrix
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
,
int
height
=
1
,
int
width
=
1
);
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
);
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/bias_scale.cpp
浏览文件 @
5ec7afed
...
@@ -21,6 +21,7 @@ namespace fpga {
...
@@ -21,6 +21,7 @@ namespace fpga {
namespace
bias_scale
{
namespace
bias_scale
{
void
align_element
(
float
**
data_in
,
int
num_per_div_before_alignment
,
int
num
)
{
void
align_element
(
float
**
data_in
,
int
num_per_div_before_alignment
,
int
num
)
{
int
copynum
=
0
;
float
*
ptr_unaligned
=
*
data_in
;
float
*
ptr_unaligned
=
*
data_in
;
int
div_num
=
int
div_num
=
(
num
+
num_per_div_before_alignment
-
1
)
/
num_per_div_before_alignment
;
(
num
+
num_per_div_before_alignment
-
1
)
/
num_per_div_before_alignment
;
...
@@ -33,8 +34,20 @@ void align_element(float **data_in, int num_per_div_before_alignment, int num) {
...
@@ -33,8 +34,20 @@ void align_element(float **data_in, int num_per_div_before_alignment, int num) {
memset
(
ptr_aligned
,
0
,
num_element
*
sizeof
(
float
));
memset
(
ptr_aligned
,
0
,
num_element
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
div_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
div_num
;
i
++
)
{
memcpy
(
ptr_aligned
+
i
*
num_per_div_after_alignment
,
ptr_unaligned
,
if
(
i
==
div_num
-
1
)
{
num_per_div_before_alignment
*
sizeof
(
float
));
copynum
=
(
num_per_div_after_alignment
*
div_num
>
num
)
?
(
num
%
num_per_div_after_alignment
)
:
(
num_per_div_before_alignment
);
}
else
{
copynum
=
num_per_div_before_alignment
;
}
memcpy
(
ptr_aligned
+
i
*
num_per_div_after_alignment
,
ptr_unaligned
+
num_per_div_before_alignment
*
i
,
copynum
*
sizeof
(
float
));
memcpy
(
ptr_aligned
+
(
div_num
+
i
)
*
num_per_div_after_alignment
,
ptr_unaligned
+
num_per_div_before_alignment
*
i
+
num
,
copynum
*
sizeof
(
float
));
}
}
fpga_free
(
ptr_unaligned
);
fpga_free
(
ptr_unaligned
);
...
@@ -52,14 +65,22 @@ void interleave(float **data_in, int num_after_alignment) {
...
@@ -52,14 +65,22 @@ void interleave(float **data_in, int num_after_alignment) {
memcpy
(
ptr_interleaved
+
8
*
i
,
ptr_uninterleaved
+
4
*
i
,
memcpy
(
ptr_interleaved
+
8
*
i
,
ptr_uninterleaved
+
4
*
i
,
4
*
sizeof
(
float
));
4
*
sizeof
(
float
));
memcpy
(
ptr_interleaved
+
8
*
i
+
4
,
memcpy
(
ptr_interleaved
+
8
*
i
+
4
,
ptr_uninterleaved
+
num_after_alignment
*
sizeof
(
float
)
+
4
*
i
,
ptr_uninterleaved
+
num_after_alignment
+
4
*
i
,
4
*
sizeof
(
float
));
4
*
sizeof
(
float
));
}
}
fpga_free
(
ptr_uninterleaved
);
fpga_free
(
ptr_uninterleaved
);
*
data_in
=
ptr_interleaved
;
*
data_in
=
ptr_interleaved
;
}
}
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
)
{
align_element
(
bias_scale_array
,
element_num_per_division
,
num
);
int
div_num
=
(
num
+
element_num_per_division
-
1
)
/
element_num_per_division
;
int
element_num_after_division
=
align_to_x
(
element_num_per_division
,
BS_NUM_ALIGNMENT
);
interleave
(
bias_scale_array
,
div_num
*
element_num_after_division
);
}
}
// namespace bias_scale
}
// namespace bias_scale
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/bias_scale.h
浏览文件 @
5ec7afed
...
@@ -22,6 +22,8 @@ namespace bias_scale {
...
@@ -22,6 +22,8 @@ namespace bias_scale {
void
align_element
(
float
**
data_in
,
int
num_per_div_before_alignment
,
int
num
);
void
align_element
(
float
**
data_in
,
int
num_per_div_before_alignment
,
int
num
);
void
interleave
(
float
**
data_in
,
int
num_after_alignment
);
void
interleave
(
float
**
data_in
,
int
num_after_alignment
);
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
);
}
// namespace bias_scale
}
// namespace bias_scale
}
// namespace fpga
}
// namespace fpga
...
...
src/fpga/filter.cpp
浏览文件 @
5ec7afed
...
@@ -19,21 +19,190 @@ namespace paddle_mobile {
...
@@ -19,21 +19,190 @@ namespace paddle_mobile {
namespace
fpga
{
namespace
fpga
{
namespace
filter
{
namespace
filter
{
void
convert_to_hwc
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
calc_division_capacity
(
int
chw
)
{
return
2048
/
((
chw
+
15
)
/
16
)
*
32
;
}
int
width
)
{}
float
find_max
(
float
*
data_in
,
int
num
)
{
return
0
;
}
int
calc_split_num
(
int
num
,
int
division_capacity
)
{
return
(
num
+
division_capacity
-
1
)
/
division_capacity
;
}
void
quantize
(
float
*
data_in
,
int
num
)
{}
int
calc_division_number
(
int
num
,
int
group_num
,
int
division_capacity
)
{
PADDLE_MOBILE_ENFORCE
(
num
%
group_num
==
0
,
"Filter number should be divisible by group number"
);
int
split_num
=
calc_split_num
(
num
,
division_capacity
);
PADDLE_MOBILE_ENFORCE
(
group_num
==
1
||
split_num
==
1
,
"Split number or group number should be 1"
);
return
group_num
*
split_num
;
}
void
align_element
(
float
**
data_in
,
int
num
,
int
chw
)
{}
int
calc_num_per_div
(
int
num
,
int
group_num
,
int
division_capacity
)
{
if
(
group_num
==
1
)
{
if
(
num
>
division_capacity
)
{
return
division_capacity
;
}
else
{
return
num
;
}
}
else
{
return
(
num
+
group_num
-
1
)
/
group_num
;
}
}
void
align_num
(
float
**
data_in
,
int
num_per_div_before_alignment
,
int
num
,
void
convert_to_hwc
(
char
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
chw
)
{}
int
width
)
{
char
*
tmp
=
*
data_in
;
int
chw
=
channel
*
height
*
width
;
char
*
data_tmp
=
(
char
*
)
fpga_malloc
(
chw
*
num
*
sizeof
(
char
));
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
int64_t
amount_per_row
=
width
*
channel
;
for
(
int
c
=
0
;
c
<
channel
;
c
++
)
{
for
(
int
h
=
0
;
h
<
height
;
h
++
)
{
int64_t
offset_height
=
h
*
amount_per_row
;
for
(
int
w
=
0
;
w
<
width
;
w
++
)
{
*
(
data_tmp
+
n
*
chw
+
offset_height
+
w
*
channel
+
c
)
=
*
((
*
data_in
)
++
);
}
}
}
}
void
reorder
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
)
{}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
void
interleave
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
)
{}
float
find_max
(
float
*
data_in
,
int
data_size
)
{
float
max
=
0.0
;
for
(
int
i
=
0
;
i
<
data_size
;
++
i
)
{
float
value
=
data_in
[
i
];
float
abs
=
value
>
0
?
value
:
-
value
;
max
=
std
::
max
(
max
,
abs
);
}
return
max
;
}
void
quantize
(
float
**
data_in
,
int
data_size
,
float
max
)
{
float
*
tmp
=
*
data_in
;
float
fix_range
=
127
;
float
scale
=
fix_range
/
max
;
char
*
tmp_data
=
(
char
*
)
fpga_malloc
(
data_size
*
sizeof
(
char
));
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
(
char
)((
*
data_in
)[
i
]
*
scale
);
}
*
data_in
=
(
float
*
)
tmp_data
;
fpga_free
(
tmp
);
}
void
align_element
(
char
**
data_in
,
int
num
,
int
chw
)
{
int
i
=
0
;
int
j
=
0
;
int
align_chw
=
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
);
if
(
align_chw
!=
chw
)
{
printf
(
"align %d
\n
"
,
align_chw
);
char
*
tmp
=
*
data_in
;
char
*
data_tmp
=
(
char
*
)
fpga_malloc
(
num
*
align_chw
*
sizeof
(
char
));
memset
(
data_tmp
,
0
,
num
*
align_chw
);
for
(
j
=
0
;
j
<
num
;
j
++
)
{
memcpy
(
data_tmp
+
j
*
align_chw
,
(
*
data_in
)
+
j
*
chw
,
chw
);
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
}
void
align_num
(
char
**
data_in
,
int
num_per_div_before_alignment
,
int
num
,
int
chw
)
{
int
i
=
0
;
int
align_chw
=
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
);
int
num_per_div_after_alignment
=
align_to_x
(
num_per_div_before_alignment
,
FILTER_NUM_ALIGNMENT
);
if
(
num_per_div_after_alignment
!=
num_per_div_before_alignment
)
{
char
*
tmp
=
*
data_in
;
int
div_num
=
(
num
+
num_per_div_before_alignment
-
1
)
/
num_per_div_before_alignment
;
int
num_element
=
div_num
*
num_per_div_after_alignment
*
align_chw
;
char
*
data_tmp
=
(
char
*
)
fpga_malloc
(
num_element
*
sizeof
(
char
));
memset
(
data_tmp
,
0
,
num_element
*
sizeof
(
char
));
for
(
i
=
0
;
i
<
div_num
;
i
++
)
{
memcpy
(
data_tmp
+
num_per_div_after_alignment
*
align_chw
*
i
,
*
data_in
+
num_per_div_before_alignment
*
align_chw
*
i
,
num_per_div_before_alignment
*
align_chw
);
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
}
void
reorder
(
char
**
data_in
,
int
num_after_alignment
,
int
chw
)
{
int
index
=
0
;
int
new_index
;
int
chw_align
=
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
);
char
*
data_tmp
=
(
char
*
)
fpga_malloc
(
chw_align
*
num_after_alignment
*
sizeof
(
char
));
char
*
tmp
=
*
data_in
;
for
(
index
=
0
;
index
<
num_after_alignment
;
index
++
)
{
new_index
=
index
/
32
*
32
+
(
index
%
16
/
4
*
8
)
+
(
index
%
16
%
4
)
+
(
index
/
16
%
2
*
4
);
memcpy
(
data_tmp
+
index
*
chw_align
,
*
data_in
+
new_index
*
chw_align
,
chw_align
);
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
void
interleave
(
char
**
data_in
,
int
num_after_alignment
,
int
chw
)
{
int
i
=
0
;
int
j
=
0
;
int
k
=
0
;
int
interleave_per_num
=
16
;
;
int
chw_align
=
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
);
char
*
data_tmp
=
(
char
*
)
fpga_malloc
(
chw_align
*
num_after_alignment
*
sizeof
(
char
));
char
*
tmp
=
*
data_in
;
int
interleave_num
=
chw_align
*
2
/
interleave_per_num
;
for
(
i
=
0
;
i
<
num_after_alignment
;
i
+=
2
)
{
for
(
j
=
0
,
k
=
0
;
j
<
interleave_num
;
j
+=
2
,
k
++
)
{
memcpy
(
data_tmp
+
i
*
chw_align
+
interleave_per_num
*
j
,
*
data_in
+
i
*
chw_align
+
interleave_per_num
*
k
,
interleave_per_num
);
memcpy
(
data_tmp
+
i
*
chw_align
+
interleave_per_num
*
(
j
+
1
),
*
data_in
+
(
i
+
1
)
*
chw_align
+
interleave_per_num
*
k
,
interleave_per_num
);
}
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
void
format_filter
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
,
int
group_num
,
float
max
)
{
int
data_size
=
channel
*
height
*
width
*
num
;
int
chw
=
channel
*
height
*
width
;
int
division_capacity
=
calc_division_capacity
(
chw
);
int
num_per_div_before_alignment
=
calc_num_per_div
(
num
,
group_num
,
division_capacity
);
int
num_per_div_after_alignment
=
align_to_x
(
num_per_div_before_alignment
,
FILTER_NUM_ALIGNMENT
);
int
div_num
=
(
num
+
num_per_div_before_alignment
-
1
)
/
num_per_div_before_alignment
;
int
num_after_alignment
=
num_per_div_after_alignment
*
div_num
;
quantize
(
data_in
,
data_size
,
max
);
char
**
quantize_data
=
(
char
**
)
data_in
;
convert_to_hwc
(
quantize_data
,
num
,
channel
,
height
,
width
);
align_element
(
quantize_data
,
num
,
chw
);
align_num
(
quantize_data
,
num_per_div_before_alignment
,
num
,
chw
);
reorder
(
quantize_data
,
num_after_alignment
,
chw
);
interleave
(
quantize_data
,
num_after_alignment
,
chw
);
}
}
// namespace filter
}
// namespace filter
}
// namespace fpga
}
// namespace fpga
...
...
src/fpga/filter.h
浏览文件 @
5ec7afed
...
@@ -22,14 +22,15 @@ namespace fpga {
...
@@ -22,14 +22,15 @@ namespace fpga {
namespace
filter
{
namespace
filter
{
void
convert_to_hwc
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
void
convert_to_hwc
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
);
int
width
);
float
find_max
(
float
*
data_in
,
int
num
);
float
find_max
(
float
*
data_in
,
int
data_size
);
void
quantize
(
float
*
data_in
,
int
num
);
void
quantize
(
float
*
*
data_in
,
int
data_size
,
float
max
);
void
align_element
(
float
**
data_in
,
int
num
,
int
chw
);
void
align_element
(
float
**
data_in
,
int
num
,
int
chw
);
void
align_num
(
float
**
data_in
,
int
num_per_div_before_alignment
,
int
num
,
void
align_num
(
char
**
data_in
,
int
num_per_div_before_alignment
,
int
num
,
int
chw
);
int
chw
);
void
reorder
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
);
void
reorder
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
);
void
interleave
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
);
void
interleave
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
);
void
format_filter
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
,
int
group_num
,
float
max
);
}
// namespace filter
}
// namespace filter
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/image.cpp
浏览文件 @
5ec7afed
...
@@ -11,3 +11,57 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,3 +11,57 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "image.h"
#include <memory.h>
#include "api.h"
namespace
paddle_mobile
{
namespace
fpga
{
namespace
image
{
void
convert_to_hwc
(
float
**
data_in
,
int
channel
,
int
height
,
int
width
)
{
float
*
tmp
=
*
data_in
;
float
*
data_tmp
=
(
float
*
)
fpga_malloc
(
channel
*
height
*
width
*
sizeof
(
float
));
int64_t
amount_per_row
=
width
*
channel
;
for
(
int
c
=
0
;
c
<
channel
;
c
++
)
{
for
(
int
h
=
0
;
h
<
height
;
h
++
)
{
int64_t
offset_height
=
h
*
amount_per_row
;
for
(
int
w
=
0
;
w
<
width
;
w
++
)
{
*
(
data_tmp
+
offset_height
+
w
*
channel
+
c
)
=
*
((
*
data_in
)
++
);
}
}
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
void
align_element_conv
(
float
**
data_in
,
int
height
,
int
cw
)
{
int
i
=
0
;
int
h
=
0
;
int
align_cw
=
align_to_x
(
cw
,
IMAGE_ALIGNMENT
);
if
(
align_cw
!=
cw
)
{
float
*
tmp
=
*
data_in
;
float
*
data_tmp
=
(
float
*
)
fpga_malloc
(
height
*
align_cw
*
sizeof
(
float
));
memset
(
data_tmp
,
0
,
height
*
align_cw
*
sizeof
(
float
));
for
(
h
=
0
;
h
<
height
;
h
++
)
{
memcpy
((
void
*
)(
data_tmp
+
h
*
align_cw
),
(
void
*
)(
*
data_in
+
h
*
cw
),
cw
*
sizeof
(
float
));
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
}
void
format_image
(
float
**
data_in
,
int
channel
,
int
height
,
int
width
)
{
convert_to_hwc
(
data_in
,
channel
,
height
,
width
);
align_element_conv
(
data_in
,
height
,
channel
*
width
);
}
}
// namespace image
}
// namespace fpga
}
// namespace paddle_mobile
src/fpga/image.h
浏览文件 @
5ec7afed
...
@@ -11,3 +11,16 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,3 +11,16 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#define IMAGE_ALIGNMENT 16 // Aligned to 16
namespace
paddle_mobile
{
namespace
fpga
{
namespace
image
{
void
convert_to_hwc
(
float
**
data_in
,
int
channel
,
int
height
,
int
width
);
void
align_element_conv
(
float
**
data_in
,
int
height
,
int
cw
);
void
format_image
(
float
**
data_in
,
int
channel
,
int
height
,
int
width
);
}
// namespace image
}
// namespace fpga
}
// namespace paddle_mobile
src/framework/tensor.h
浏览文件 @
5ec7afed
...
@@ -254,30 +254,6 @@ class Tensor {
...
@@ -254,30 +254,6 @@ class Tensor {
"Tensor's dims_ is out of bound. "
);
"Tensor's dims_ is out of bound. "
);
}
}
#ifdef PADDLE_MOBILE_FPGA
struct
FPGAArgs
{
friend
class
Tensor
;
inline
float
*
scale_pointer
()
{
return
scale_
;
}
inline
float
scale
()
{
return
*
scale_
;
}
private:
float
*
scale_
;
};
struct
FPGAArgs
fpga_args
()
const
{
FPGAArgs
args
;
args
.
scale_
=
scale
.
get
();
return
args
;
}
void
SetFpgaScale
(
float
s
)
{
*
(
scale
.
get
())
=
s
;
}
private:
std
::
shared_ptr
<
float
>
scale
=
std
::
make_shared
<
float
>
(
0
);
#endif
private:
private:
/**
/**
* @note Placeholder hides type T, so it doesn't appear as a
* @note Placeholder hides type T, so it doesn't appear as a
...
@@ -313,9 +289,12 @@ class Tensor {
...
@@ -313,9 +289,12 @@ class Tensor {
virtual
std
::
type_index
type
()
const
{
return
type_
;
}
virtual
std
::
type_index
type
()
const
{
return
type_
;
}
virtual
void
set_type
(
std
::
type_index
type
)
{
type_
=
type
;
}
virtual
void
set_type
(
std
::
type_index
type
)
{
type_
=
type
;
}
#ifndef PADDLE_MOBILE_FPGA
/*! the pointer of memory block. */
/*! the pointer of memory block. */
std
::
unique_ptr
<
uint8_t
,
memory
::
PODDeleter
<
uint8_t
>>
ptr_
;
std
::
unique_ptr
<
uint8_t
,
memory
::
PODDeleter
<
uint8_t
>>
ptr_
;
#else
std
::
shared_ptr
<
uint8_t
>
ptr_
;
#endif
/*! the size of memory block. */
/*! the size of memory block. */
size_t
size_
;
size_t
size_
;
...
@@ -344,6 +323,34 @@ class Tensor {
...
@@ -344,6 +323,34 @@ class Tensor {
* begins.
* begins.
*/
*/
size_t
offset_
;
size_t
offset_
;
#ifdef PADDLE_MOBILE_FPGA
public:
inline
void
reset_data_ptr
(
void
*
p
)
{
((
PlaceholderImpl
*
)(
holder_
.
get
()))
->
ptr_
.
reset
((
uint8_t
*
)
p
);
}
struct
FPGAArgs
{
friend
class
Tensor
;
inline
float
*
scale_pointer
()
{
return
scale_
;
}
inline
float
scale
()
{
return
*
scale_
;
}
private:
float
*
scale_
;
};
struct
FPGAArgs
fpga_args
()
const
{
FPGAArgs
args
;
args
.
scale_
=
scale
.
get
();
return
args
;
}
void
SetFpgaScale
(
float
s
)
{
*
(
scale
.
get
())
=
s
;
}
private:
std
::
shared_ptr
<
float
>
scale
=
std
::
make_shared
<
float
>
(
0
);
#endif
};
};
#ifdef PADDLE_MOBILE_DEBUG
#ifdef PADDLE_MOBILE_DEBUG
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录