Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
202d5f29
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
202d5f29
编写于
11月 19, 2018
作者:
qnqinan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add FPGA support op
上级
8c4a035f
变更
40
显示空白变更内容
内联
并排
Showing
40 changed file
with
624 addition
and
423 deletion
+624
-423
src/common/types.cpp
src/common/types.cpp
+5
-1
src/common/types.h
src/common/types.h
+3
-0
src/fpga/V2/api.cpp
src/fpga/V2/api.cpp
+0
-74
src/fpga/V2/api.h
src/fpga/V2/api.h
+0
-118
src/fpga/V2/bias_scale.cpp
src/fpga/V2/bias_scale.cpp
+0
-7
src/fpga/V2/driver/bitmap.cpp
src/fpga/V2/driver/bitmap.cpp
+0
-5
src/fpga/V2/driver/bitmap.h
src/fpga/V2/driver/bitmap.h
+0
-4
src/fpga/V2/driver/driver.cpp
src/fpga/V2/driver/driver.cpp
+0
-50
src/fpga/V2/driver/driver.h
src/fpga/V2/driver/driver.h
+0
-26
src/fpga/V2/driver/pe.cpp
src/fpga/V2/driver/pe.cpp
+0
-42
src/fpga/V2/driver/pe.h
src/fpga/V2/driver/pe.h
+0
-4
src/fpga/V2/filter.cpp
src/fpga/V2/filter.cpp
+0
-6
src/fpga/V2/image.cpp
src/fpga/V2/image.cpp
+0
-9
src/framework/executor.cpp
src/framework/executor.cpp
+0
-3
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+0
-7
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+0
-3
src/operators/fusion_conv_add_op.cpp
src/operators/fusion_conv_add_op.cpp
+3
-1
src/operators/fusion_conv_add_relu_op.h
src/operators/fusion_conv_add_relu_op.h
+3
-2
src/operators/fusion_deconv_add_op.cpp
src/operators/fusion_deconv_add_op.cpp
+33
-0
src/operators/fusion_deconv_add_op.h
src/operators/fusion_deconv_add_op.h
+108
-0
src/operators/fusion_deconv_add_relu_op.cpp
src/operators/fusion_deconv_add_relu_op.cpp
+33
-0
src/operators/fusion_deconv_add_relu_op.h
src/operators/fusion_deconv_add_relu_op.h
+109
-0
src/operators/kernel/arm/fetch_kernel.cpp
src/operators/kernel/arm/fetch_kernel.cpp
+0
-8
src/operators/kernel/cl/cl_kernel/feed_kernel.cl
src/operators/kernel/cl/cl_kernel/feed_kernel.cl
+0
-9
src/operators/kernel/cl/feed_kernel.cpp
src/operators/kernel/cl/feed_kernel.cpp
+0
-6
src/operators/kernel/deconv_add_kernel.h
src/operators/kernel/deconv_add_kernel.h
+39
-0
src/operators/kernel/deconv_add_relu_kernel.h
src/operators/kernel/deconv_add_relu_kernel.h
+39
-0
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
+61
-0
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
+36
-0
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
+36
-0
src/operators/kernel/fpga/V2/slice_kernel.cpp
src/operators/kernel/fpga/V2/slice_kernel.cpp
+0
-3
src/operators/kernel/fpga/V2/softmax_kernel.cpp
src/operators/kernel/fpga/V2/softmax_kernel.cpp
+0
-4
src/operators/kernel/fpga/V2/split_kernel.cpp
src/operators/kernel/fpga/V2/split_kernel.cpp
+30
-0
src/operators/kernel/fpga/V2/transpose2_kernel.cpp
src/operators/kernel/fpga/V2/transpose2_kernel.cpp
+35
-0
src/operators/op_param.h
src/operators/op_param.h
+35
-4
src/operators/split_op.cpp
src/operators/split_op.cpp
+4
-1
src/operators/tanh_op.cpp
src/operators/tanh_op.cpp
+1
-1
src/operators/transpose2_op.cpp
src/operators/transpose2_op.cpp
+3
-0
test/CMakeLists.txt
test/CMakeLists.txt
+8
-8
test/net/test_yologpu.cpp
test/net/test_yologpu.cpp
+0
-17
未找到文件。
src/common/types.cpp
浏览文件 @
202d5f29
...
@@ -73,6 +73,8 @@ const char *G_OP_TYPE_QUANTIZE = "quantize";
...
@@ -73,6 +73,8 @@ const char *G_OP_TYPE_QUANTIZE = "quantize";
const
char
*
G_OP_TYPE_DEQUANTIZE
=
"dequantize"
;
const
char
*
G_OP_TYPE_DEQUANTIZE
=
"dequantize"
;
extern
const
char
*
G_OP_TYPE_TANH
=
"tanh"
;
extern
const
char
*
G_OP_TYPE_TANH
=
"tanh"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
=
"fusion_deconv_relu"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
=
"fusion_deconv_relu"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD
=
"fusion_deconv_add"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD_RELU
=
"fusion_deconv_add_relu"
;
std
::
unordered_map
<
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
...
@@ -133,5 +135,7 @@ std::unordered_map<
...
@@ -133,5 +135,7 @@ std::unordered_map<
{
G_OP_TYPE_QUANTIZE
,
{{
"X"
},
{
"Out"
,
"OutScale"
}}},
{
G_OP_TYPE_QUANTIZE
,
{{
"X"
},
{
"Out"
,
"OutScale"
}}},
{
G_OP_TYPE_DEQUANTIZE
,
{{
"X"
,
"Scale"
},
{
"Out"
}}},
{
G_OP_TYPE_DEQUANTIZE
,
{{
"X"
,
"Scale"
},
{
"Out"
}}},
{
G_OP_TYPE_TANH
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_TANH
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_RELU
,
{{
"Input"
},
{
"Out"
}}}};
{
G_OP_TYPE_FUSION_DECONV_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_ADD
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_ADD_RELU
,
{{
"Input"
},
{
"Out"
}}}};
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/common/types.h
浏览文件 @
202d5f29
...
@@ -142,6 +142,9 @@ extern const char *G_OP_TYPE_DEQUANTIZE;
...
@@ -142,6 +142,9 @@ extern const char *G_OP_TYPE_DEQUANTIZE;
extern
const
char
*
G_OP_TYPE_TANH
;
extern
const
char
*
G_OP_TYPE_TANH
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD_RELU
;
extern
std
::
unordered_map
<
extern
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
op_input_output_key
;
op_input_output_key
;
...
...
src/fpga/V2/api.cpp
浏览文件 @
202d5f29
...
@@ -16,47 +16,29 @@ limitations under the License. */
...
@@ -16,47 +16,29 @@ limitations under the License. */
#include <algorithm>
#include <algorithm>
#include "fpga/V2/bias_scale.h"
#include "fpga/V2/bias_scale.h"
#include "fpga/V2/config.h"
#include "fpga/V2/config.h"
<<<<<<<
HEAD
=======
#include "fpga/V2/driver/driver.h"
#include "fpga/V2/driver/driver.h"
>>>>>>>
upstream
/
develop
#include "fpga/V2/filter.h"
#include "fpga/V2/filter.h"
#include "fpga/V2/image.h"
#include "fpga/V2/image.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
<<<<<<<
HEAD
static
std
::
map
<
void
*
,
size_t
>
memory_map
;
int
open_device
()
{
int
ret
=
open_device_driver
();
=======
static
std
::
map
<
void
*
,
size_t
>
memory_map
;
static
std
::
map
<
void
*
,
size_t
>
memory_map
;
int
open_device
()
{
int
open_device
()
{
int
ret
=
driver
::
open_device_driver
();
int
ret
=
driver
::
open_device_driver
();
>>>>>>>
upstream
/
develop
return
ret
;
return
ret
;
}
}
int
close_device
()
{
int
close_device
()
{
<<<<<<<
HEAD
int
ret
=
close_device_driver
();
=======
int
ret
=
driver
::
close_device_driver
();
int
ret
=
driver
::
close_device_driver
();
>>>>>>>
upstream
/
develop
return
ret
;
return
ret
;
}
}
void
*
fpga_malloc
(
size_t
size
)
{
void
*
fpga_malloc
(
size_t
size
)
{
static
uint64_t
counter
=
0
;
static
uint64_t
counter
=
0
;
#ifdef PADDLE_MOBILE_ZU5
#ifdef PADDLE_MOBILE_ZU5
<<<<<<<
HEAD
auto
ptr
=
fpga_malloc_driver
(
size
);
=======
auto
ptr
=
driver
::
fpga_malloc_driver
(
size
);
auto
ptr
=
driver
::
fpga_malloc_driver
(
size
);
>>>>>>>
upstream
/
develop
#else
#else
auto
ptr
=
malloc
(
size
);
auto
ptr
=
malloc
(
size
);
#endif
#endif
...
@@ -75,11 +57,7 @@ void fpga_free(void *ptr) {
...
@@ -75,11 +57,7 @@ void fpga_free(void *ptr) {
size
=
iter
->
second
;
size
=
iter
->
second
;
memory_map
.
erase
(
iter
);
memory_map
.
erase
(
iter
);
#ifdef PADDLE_MOBILE_ZU5
#ifdef PADDLE_MOBILE_ZU5
<<<<<<<
HEAD
fpga_free_driver
(
ptr
);
=======
driver
::
fpga_free_driver
(
ptr
);
driver
::
fpga_free_driver
(
ptr
);
>>>>>>>
upstream
/
develop
#else
#else
free
(
ptr
);
free
(
ptr
);
#endif
#endif
...
@@ -90,28 +68,6 @@ void fpga_free(void *ptr) {
...
@@ -90,28 +68,6 @@ void fpga_free(void *ptr) {
DLOG
<<
"Invalid pointer"
;
DLOG
<<
"Invalid pointer"
;
}
}
}
}
<<<<<<<
HEAD
half
fp32_2_fp16
(
float
fp32_num
)
{
unsigned
long
tmp
=
*
(
unsigned
long
*
)(
&
fp32_num
);
// NOLINT
auto
t
=
(
half
)(((
tmp
&
0x007fffff
)
>>
13
)
|
((
tmp
&
0x80000000
)
>>
16
)
|
(((
tmp
&
0x7f800000
)
>>
13
)
-
(
112
<<
10
)));
if
(
tmp
&
0x1000
)
{
t
++
;
// roundoff
}
return
t
;
}
float
fp16_2_fp32
(
half
fp16_num
)
{
int
frac
=
(
fp16_num
&
0x3ff
);
int
exp
=
((
fp16_num
&
0x7c00
)
>>
10
)
+
112
;
int
s
=
fp16_num
&
0x8000
;
int
tmp
=
0
;
float
fp32_num
;
tmp
=
s
<<
16
|
exp
<<
23
|
frac
<<
13
;
fp32_num
=
*
(
float
*
)
&
tmp
;
// NOLINT
return
fp32_num
;
=======
void
fpga_copy
(
void
*
dest
,
const
void
*
src
,
size_t
num
)
{
void
fpga_copy
(
void
*
dest
,
const
void
*
src
,
size_t
num
)
{
#ifdef PADDLE_MOBILE_ZU5
#ifdef PADDLE_MOBILE_ZU5
driver
::
fpga_copy_driver
(
dest
,
src
,
num
);
driver
::
fpga_copy_driver
(
dest
,
src
,
num
);
...
@@ -133,7 +89,6 @@ int fpga_invalidate(void *address, size_t size) {
...
@@ -133,7 +89,6 @@ int fpga_invalidate(void *address, size_t size) {
#else
#else
return
0
;
return
0
;
#endif
#endif
>>>>>>>
upstream
/
develop
}
}
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
void
format_image
(
framework
::
Tensor
*
image_tensor
)
{
...
@@ -288,11 +243,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
...
@@ -288,11 +243,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg
->
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
arg
->
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
arg
->
output
.
address
=
out_ptr
;
arg
->
output
.
address
=
out_ptr
;
arg
->
output
.
scale_address
=
out
->
scale
;
arg
->
output
.
scale_address
=
out
->
scale
;
<<<<<<<
HEAD
arg
->
conv_args
=
=======
arg
->
conv_arg
=
arg
->
conv_arg
=
>>>>>>>
upstream
/
develop
(
ConvArgs
*
)
fpga_malloc
(
arg
->
split_num
*
sizeof
(
ConvArgs
));
// NOLINT
(
ConvArgs
*
)
fpga_malloc
(
arg
->
split_num
*
sizeof
(
ConvArgs
));
// NOLINT
arg
->
concat_arg
.
image_num
=
arg
->
split_num
;
arg
->
concat_arg
.
image_num
=
arg
->
split_num
;
...
@@ -310,30 +261,6 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
...
@@ -310,30 +261,6 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
(
uint32_t
*
)
fpga_malloc
(
n
*
sizeof
(
uint32_t
));
// NOLINT
(
uint32_t
*
)
fpga_malloc
(
n
*
sizeof
(
uint32_t
));
// NOLINT
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
<<<<<<<
HEAD
arg
->
conv_args
[
i
].
relu_enabled
=
relu_enabled
;
arg
->
conv_args
[
i
].
sb_address
=
bs_ptr
;
arg
->
conv_args
[
i
].
filter_address
=
(
int8_t
*
)
filter_ptr
;
// NOLINT
arg
->
conv_args
[
i
].
filter_scale_address
=
filter
->
scale
;
arg
->
conv_args
[
i
].
filter_num
=
arg
->
filter_num
;
arg
->
conv_args
[
i
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
conv_args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
stride_h
;
arg
->
conv_args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
stride_w
;
arg
->
conv_args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
arg
->
conv_args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
arg
->
conv_args
[
i
].
image
.
address
=
input_ptr
;
arg
->
conv_args
[
i
].
image
.
scale_address
=
input
->
scale
;
arg
->
conv_args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
arg
->
conv_args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
arg
->
conv_args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
arg
->
conv_args
[
i
].
image
.
pad_height
=
(
uint32_t
)
padding_h
;
arg
->
conv_args
[
i
].
image
.
pad_width
=
(
uint32_t
)
padding_w
;
arg
->
conv_args
[
i
].
output
.
address
=
out_ptr
;
arg
->
conv_args
[
i
].
output
.
scale_address
=
out
->
scale
;
=======
arg
->
conv_arg
[
i
].
relu_enabled
=
relu_enabled
;
arg
->
conv_arg
[
i
].
relu_enabled
=
relu_enabled
;
arg
->
conv_arg
[
i
].
sb_address
=
bs_ptr
;
arg
->
conv_arg
[
i
].
sb_address
=
bs_ptr
;
arg
->
conv_arg
[
i
].
filter_address
=
(
int8_t
*
)
filter_ptr
;
// NOLINT
arg
->
conv_arg
[
i
].
filter_address
=
(
int8_t
*
)
filter_ptr
;
// NOLINT
...
@@ -361,7 +288,6 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
...
@@ -361,7 +288,6 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter
::
calc_aligned_num
((
int
)
input
->
dims
()[
1
],
arg
->
filter_num
);
filter
::
calc_aligned_num
((
int
)
input
->
dims
()[
1
],
arg
->
filter_num
);
arg
->
conv_arg
[
i
].
free_space
=
arg
->
conv_arg
[
i
].
free_space
=
fpga_malloc
(
num_after_alignment
*
2
*
sizeof
(
half
));
fpga_malloc
(
num_after_alignment
*
2
*
sizeof
(
half
));
>>>>>>>
upstream
/
develop
}
}
}
}
...
...
src/fpga/V2/api.h
浏览文件 @
202d5f29
...
@@ -14,132 +14,20 @@ limitations under the License. */
...
@@ -14,132 +14,20 @@ limitations under the License. */
#pragma once
#pragma once
<<<<<<<
HEAD
#include <stdint.h>
#include <cstddef>
#include <iostream>
#include <limits>
#include "fpga/V2/driver/driver.h"
#include "fpga/V2/driver/pe.h"
=======
#include "fpga/V2/driver/pe.h"
#include "fpga/V2/driver/pe.h"
#include "fpga/V2/fpga_common.h"
#include "fpga/V2/fpga_common.h"
>>>>>>>
upstream
/
develop
#include "framework/tensor.h"
#include "framework/tensor.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
<<<<<<<
HEAD
enum
DataType
{
DATA_TYPE_FP32
=
1
,
DATA_TYPE_FP16
=
0
,
};
enum
LayoutType
{
LAYOUT_CHW
=
1
,
LAYOUT_HWC
=
0
,
};
struct
KernelArgs
{
uint32_t
width
;
uint32_t
height
;
uint32_t
stride_w
;
uint32_t
stride_h
;
};
struct
ImageInputArgs
{
void
*
address
;
// input featuremap virtual address
float
*
scale_address
;
// input scale address;
uint32_t
channels
;
uint32_t
width
;
// featuremap width
uint32_t
height
;
uint32_t
pad_width
;
// padding width;
uint32_t
pad_height
;
};
struct
ImageOutputArgs
{
void
*
address
;
// output result address;
float
*
scale_address
;
// output scale address;
uint64_t
timer_cnt
;
// time counter for FPGA computation
};
struct
ConvArgs
{
bool
relu_enabled
;
void
*
sb_address
;
// scale and bias are interlaced;
void
*
filter_address
;
float
*
filter_scale_address
;
uint32_t
filter_num
;
uint32_t
group_num
;
struct
KernelArgs
kernel
;
struct
ImageInputArgs
image
;
// input image;
struct
ImageOutputArgs
output
;
};
struct
ConcatArgs
{
uint32_t
image_num
;
half
**
images_in
;
float
**
scales_in
;
void
*
image_out
;
float
*
scale_out
;
uint32_t
*
channel_num
;
uint32_t
*
aligned_channel_num
;
uint32_t
out_channel
;
uint32_t
height
;
uint32_t
width
;
};
struct
SplitConvArgs
{
uint32_t
split_num
;
uint32_t
group_num
;
uint32_t
filter_num
;
struct
ImageOutputArgs
output
;
struct
ConvArgs
*
conv_args
;
struct
ConcatArgs
concat_arg
;
};
struct
PoolingArgs
{
int16_t
mode
;
// mode: 0:max, 1:avg
half
kernel_reciprocal
;
struct
KernelArgs
kernel
;
struct
ImageInputArgs
image
;
// input image;
struct
ImageOutputArgs
output
;
};
struct
EWAddArgs
{
bool
relu_enabled
;
uint32_t
const0
;
// output0 = const0 x input0 + const1 x input1;
uint32_t
const1
;
struct
ImageInputArgs
image0
;
struct
ImageInputArgs
image1
;
struct
ImageOutputArgs
output
;
};
struct
BypassArgs
{
enum
DataType
input_data_type
;
enum
DataType
output_data_type
;
enum
LayoutType
input_layout_type
;
enum
LayoutType
output_layout_type
;
struct
ImageInputArgs
image
;
struct
ImageOutputArgs
output
;
};
=======
>>>>>>>
upstream
/
develop
int
open_device
();
int
open_device
();
int
close_device
();
int
close_device
();
void
*
fpga_malloc
(
size_t
size
);
void
*
fpga_malloc
(
size_t
size
);
void
fpga_free
(
void
*
ptr
);
void
fpga_free
(
void
*
ptr
);
<<<<<<<
HEAD
static
inline
int
align_to_x
(
int
num
,
int
x
)
{
return
(
num
+
x
-
1
)
/
x
*
x
;
}
=======
void
fpga_copy
(
void
*
dest
,
const
void
*
src
,
size_t
num
);
void
fpga_copy
(
void
*
dest
,
const
void
*
src
,
size_t
num
);
int
fpga_flush
(
void
*
address
,
size_t
size
);
int
fpga_flush
(
void
*
address
,
size_t
size
);
int
fpga_invalidate
(
void
*
address
,
size_t
size
);
int
fpga_invalidate
(
void
*
address
,
size_t
size
);
>>>>>>>
upstream
/
develop
float
filter_find_max
(
framework
::
Tensor
*
filter_tensor
);
float
filter_find_max
(
framework
::
Tensor
*
filter_tensor
);
int
get_aligned_channel_num
(
int
channel_num
);
int
get_aligned_channel_num
(
int
channel_num
);
...
@@ -167,11 +55,5 @@ void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
...
@@ -167,11 +55,5 @@ void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
<<<<<<<
HEAD
half
fp32_2_fp16
(
float
fp32_num
);
float
fp16_2_fp32
(
half
fp16_num
);
=======
>>>>>>>
upstream
/
develop
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/V2/bias_scale.cpp
浏览文件 @
202d5f29
...
@@ -27,11 +27,7 @@ void align_element(float **data_in, int num, int num_after_alignment) {
...
@@ -27,11 +27,7 @@ void align_element(float **data_in, int num, int num_after_alignment) {
(
float
*
)
fpga_malloc
(
total_element
*
sizeof
(
float
));
// NOLINT
(
float
*
)
fpga_malloc
(
total_element
*
sizeof
(
float
));
// NOLINT
memset
(
ptr_aligned
,
0
,
total_element
*
sizeof
(
float
));
memset
(
ptr_aligned
,
0
,
total_element
*
sizeof
(
float
));
<<<<<<<
HEAD
for
(
int
i
=
1
;
i
<
num
;
i
++
)
{
=======
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
>>>>>>>
upstream
/
develop
ptr_aligned
[
i
*
2
+
0
]
=
ptr_unaligned
[
i
];
ptr_aligned
[
i
*
2
+
0
]
=
ptr_unaligned
[
i
];
ptr_aligned
[
i
*
2
+
1
]
=
ptr_unaligned
[
i
+
num
];
ptr_aligned
[
i
*
2
+
1
]
=
ptr_unaligned
[
i
+
num
];
}
}
...
@@ -43,10 +39,7 @@ void align_element(float **data_in, int num, int num_after_alignment) {
...
@@ -43,10 +39,7 @@ void align_element(float **data_in, int num, int num_after_alignment) {
void
format_bias_scale_array
(
float
**
data_in
,
int
num
,
void
format_bias_scale_array
(
float
**
data_in
,
int
num
,
int
num_after_alignment
)
{
int
num_after_alignment
)
{
align_element
(
data_in
,
num
,
num_after_alignment
);
align_element
(
data_in
,
num
,
num_after_alignment
);
<<<<<<<
HEAD
=======
fpga_flush
(
*
data_in
,
2
*
num_after_alignment
*
sizeof
(
float
));
fpga_flush
(
*
data_in
,
2
*
num_after_alignment
*
sizeof
(
float
));
>>>>>>>
upstream
/
develop
}
}
}
// namespace bias_scale
}
// namespace bias_scale
...
...
src/fpga/V2/driver/bitmap.cpp
浏览文件 @
202d5f29
...
@@ -57,13 +57,8 @@ static uint64_t ffs(uint64_t data) {
...
@@ -57,13 +57,8 @@ static uint64_t ffs(uint64_t data) {
uint64_t
bit
=
0
;
uint64_t
bit
=
0
;
int
i
=
0
;
int
i
=
0
;
<<<<<<<
HEAD
for
(
i
=
0
;
i
<
sizeof
(
data
);
i
++
)
{
if
(
data
&
(
1
<<
i
))
{
=======
for
(
i
=
0
;
i
<
sizeof
(
data
)
*
8
;
i
++
)
{
for
(
i
=
0
;
i
<
sizeof
(
data
)
*
8
;
i
++
)
{
if
(
data
&
(
1UL
<<
i
))
{
if
(
data
&
(
1UL
<<
i
))
{
>>>>>>>
upstream
/
develop
bit
=
i
;
bit
=
i
;
break
;
break
;
}
}
...
...
src/fpga/V2/driver/bitmap.h
浏览文件 @
202d5f29
...
@@ -25,11 +25,7 @@ limitations under the License. */
...
@@ -25,11 +25,7 @@ limitations under the License. */
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))
#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))
<<<<<<<
HEAD
#define round_down(x, y) ((x) & ((y)-1))
=======
#define round_down(x, y) ((x) & ~((y)-1))
#define round_down(x, y) ((x) & ~((y)-1))
>>>>>>>
upstream
/
develop
namespace
fpga_bitmap
{
namespace
fpga_bitmap
{
void
bitmap_set
(
uint64_t
*
map
,
unsigned
int
start
,
int
len
);
void
bitmap_set
(
uint64_t
*
map
,
unsigned
int
start
,
int
len
);
...
...
src/fpga/V2/driver/driver.cpp
浏览文件 @
202d5f29
...
@@ -17,10 +17,7 @@ limitations under the License. */
...
@@ -17,10 +17,7 @@ limitations under the License. */
#include <stdint.h>
#include <stdint.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
<<<<<<<
HEAD
=======
#include <sys/ioctl.h>
#include <sys/ioctl.h>
>>>>>>>
upstream
/
develop
#include <sys/mman.h>
#include <sys/mman.h>
#include <unistd.h>
#include <unistd.h>
#include <algorithm>
#include <algorithm>
...
@@ -36,10 +33,7 @@ limitations under the License. */
...
@@ -36,10 +33,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
<<<<<<<
HEAD
=======
namespace
driver
{
namespace
driver
{
>>>>>>>
upstream
/
develop
struct
FPGA_INFO
g_fpgainfo
;
struct
FPGA_INFO
g_fpgainfo
;
int
open_drvdevice
()
{
int
open_drvdevice
()
{
...
@@ -51,12 +45,8 @@ int open_drvdevice() {
...
@@ -51,12 +45,8 @@ int open_drvdevice() {
int
open_memdevice
()
{
int
open_memdevice
()
{
if
(
g_fpgainfo
.
fd_mem
==
-
1
)
{
if
(
g_fpgainfo
.
fd_mem
==
-
1
)
{
<<<<<<<
HEAD
g_fpgainfo
.
fd_mem
=
open
(
g_fpgainfo
.
memdevice_path
,
O_RDWR
|
O_DSYNC
);
=======
// g_fpgainfo.fd_mem = open(g_fpgainfo.memdevice_path, O_RDWR | O_DSYNC);
// g_fpgainfo.fd_mem = open(g_fpgainfo.memdevice_path, O_RDWR | O_DSYNC);
g_fpgainfo
.
fd_mem
=
open
(
g_fpgainfo
.
memdevice_path
,
O_RDWR
);
g_fpgainfo
.
fd_mem
=
open
(
g_fpgainfo
.
memdevice_path
,
O_RDWR
);
>>>>>>>
upstream
/
develop
}
}
return
g_fpgainfo
.
fd_mem
;
return
g_fpgainfo
.
fd_mem
;
}
}
...
@@ -64,10 +54,6 @@ int open_memdevice() {
...
@@ -64,10 +54,6 @@ int open_memdevice() {
void
pl_reset
()
{
void
pl_reset
()
{
// DLOG << "PL RESET";
// DLOG << "PL RESET";
<<<<<<<
HEAD
// reg_writeq(0x5a, REG_FPGA_RESET);
=======
>>>>>>>
upstream
/
develop
usleep
(
100
*
1000
);
usleep
(
100
*
1000
);
}
}
...
@@ -147,11 +133,7 @@ int pl_get_status() { return 0; }
...
@@ -147,11 +133,7 @@ int pl_get_status() { return 0; }
int
fpga_regpoll
(
uint64_t
reg
,
uint64_t
val
,
int
time
)
{
int
fpga_regpoll
(
uint64_t
reg
,
uint64_t
val
,
int
time
)
{
uint64_t
i
=
0
;
uint64_t
i
=
0
;
/*timeout精确性待确认*/
/*timeout精确性待确认*/
<<<<<<<
HEAD
int64_t
timeout
=
time
*
CPU_FREQ
/
1000000
;
=======
int64_t
timeout
=
time
*
6
;
int64_t
timeout
=
time
*
6
;
>>>>>>>
upstream
/
develop
for
(
i
=
0
;
i
<
timeout
;
i
++
)
{
for
(
i
=
0
;
i
<
timeout
;
i
++
)
{
if
(
val
==
reg_readq
(
reg
))
{
if
(
val
==
reg_readq
(
reg
))
{
...
@@ -193,11 +175,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
...
@@ -193,11 +175,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
}
}
void
memory_release
(
struct
fpga_memory
*
memory
)
{
void
memory_release
(
struct
fpga_memory
*
memory
)
{
<<<<<<<
HEAD
pthread_mutex_lock
(
&
memory
->
mutex
);
fpga_bitmap
::
bitmap_clear
(
memory
->
bitmap
,
0
,
memory
->
page_num
);
pthread_mutex_unlock
(
&
memory
->
mutex
);
=======
void
*
ptr
=
nullptr
;
void
*
ptr
=
nullptr
;
/*unmap memory*/
/*unmap memory*/
...
@@ -206,7 +183,6 @@ void memory_release(struct fpga_memory *memory) {
...
@@ -206,7 +183,6 @@ void memory_release(struct fpga_memory *memory) {
for
(
iter
=
map
.
begin
();
iter
!=
map
.
end
();
iter
++
)
{
for
(
iter
=
map
.
begin
();
iter
!=
map
.
end
();
iter
++
)
{
fpga_free_driver
(
ptr
);
fpga_free_driver
(
ptr
);
}
}
>>>>>>>
upstream
/
develop
}
}
int
create_fpga_memory_inner
(
struct
fpga_memory
*
memory
,
size_t
memory_size
)
{
int
create_fpga_memory_inner
(
struct
fpga_memory
*
memory
,
size_t
memory_size
)
{
...
@@ -269,10 +245,6 @@ int init_fpga_memory(struct fpga_memory *memory) {
...
@@ -269,10 +245,6 @@ int init_fpga_memory(struct fpga_memory *memory) {
return
rc
;
return
rc
;
}
}
<<<<<<<
HEAD
// spin_lock_init(&memory->spin);
=======
>>>>>>>
upstream
/
develop
fpga_bitmap
::
bitmap_clear
(
memory
->
bitmap
,
0
,
memory
->
page_num
);
fpga_bitmap
::
bitmap_clear
(
memory
->
bitmap
,
0
,
memory
->
page_num
);
fpga_bitmap
::
bitmap_set
(
memory
->
bitmap
,
0
,
1
);
// NOTE reserve fpga page 0.
fpga_bitmap
::
bitmap_set
(
memory
->
bitmap
,
0
,
1
);
// NOTE reserve fpga page 0.
...
@@ -327,11 +299,6 @@ void *fpga_reg_malloc(size_t size) {
...
@@ -327,11 +299,6 @@ void *fpga_reg_malloc(size_t size) {
return
ret
;
return
ret
;
}
}
<<<<<<<
HEAD
void
*
fpga_malloc_driver
(
size_t
size
)
{
void
*
ret
=
nullptr
;
uint64_t
phy_addr
=
0
;
=======
void
*
fpga_reg_free
(
void
*
ptr
)
{
void
*
fpga_reg_free
(
void
*
ptr
)
{
size_t
size
=
0
;
size_t
size
=
0
;
...
@@ -349,7 +316,6 @@ void *fpga_malloc_driver(size_t size) {
...
@@ -349,7 +316,6 @@ void *fpga_malloc_driver(size_t size) {
void
*
ret
=
nullptr
;
void
*
ret
=
nullptr
;
uint64_t
phy_addr
=
0
;
uint64_t
phy_addr
=
0
;
int
i
=
0
;
int
i
=
0
;
>>>>>>>
upstream
/
develop
memory_request
(
g_fpgainfo
.
memory_info
,
size
,
&
phy_addr
);
memory_request
(
g_fpgainfo
.
memory_info
,
size
,
&
phy_addr
);
...
@@ -365,19 +331,14 @@ void *fpga_malloc_driver(size_t size) {
...
@@ -365,19 +331,14 @@ void *fpga_malloc_driver(size_t size) {
void
fpga_free_driver
(
void
*
ptr
)
{
void
fpga_free_driver
(
void
*
ptr
)
{
size_t
size
=
0
;
size_t
size
=
0
;
<<<<<<<
HEAD
=======
uint32_t
pos
=
0
;
uint32_t
pos
=
0
;
uint64_t
p_addr
=
0
;
uint64_t
p_addr
=
0
;
>>>>>>>
upstream
/
develop
auto
iter
=
g_fpgainfo
.
fpga_addr2size_map
.
find
(
ptr
);
auto
iter
=
g_fpgainfo
.
fpga_addr2size_map
.
find
(
ptr
);
if
(
iter
!=
g_fpgainfo
.
fpga_addr2size_map
.
end
())
{
if
(
iter
!=
g_fpgainfo
.
fpga_addr2size_map
.
end
())
{
size
=
iter
->
second
;
size
=
iter
->
second
;
g_fpgainfo
.
fpga_addr2size_map
.
erase
(
iter
);
g_fpgainfo
.
fpga_addr2size_map
.
erase
(
iter
);
munmap
(
ptr
,
size
);
munmap
(
ptr
,
size
);
<<<<<<<
HEAD
=======
p_addr
=
vaddr_to_paddr
(
ptr
);
p_addr
=
vaddr_to_paddr
(
ptr
);
pos
=
(
p_addr
-
g_fpgainfo
.
memory_info
->
mem_start
)
/
FPGA_PAGE_SIZE
;
pos
=
(
p_addr
-
g_fpgainfo
.
memory_info
->
mem_start
)
/
FPGA_PAGE_SIZE
;
...
@@ -387,14 +348,11 @@ void fpga_free_driver(void *ptr) {
...
@@ -387,14 +348,11 @@ void fpga_free_driver(void *ptr) {
fpga_bitmap
::
bitmap_clear
(
g_fpgainfo
.
memory_info
->
bitmap
,
pos
,
fpga_bitmap
::
bitmap_clear
(
g_fpgainfo
.
memory_info
->
bitmap
,
pos
,
g_fpgainfo
.
memory_info
->
nr
[
pos
]);
g_fpgainfo
.
memory_info
->
nr
[
pos
]);
pthread_mutex_unlock
(
&
g_fpgainfo
.
memory_info
->
mutex
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
memory_info
->
mutex
);
>>>>>>>
upstream
/
develop
}
else
{
}
else
{
DLOG
<<
"Invalid pointer"
;
DLOG
<<
"Invalid pointer"
;
}
}
}
}
<<<<<<<
HEAD
=======
static
inline
int
do_ioctl
(
unsigned
long
req
,
const
void
*
arg
)
{
static
inline
int
do_ioctl
(
unsigned
long
req
,
const
void
*
arg
)
{
return
ioctl
(
g_fpgainfo
.
fd_mem
,
req
,
arg
);
return
ioctl
(
g_fpgainfo
.
fd_mem
,
req
,
arg
);
}
}
...
@@ -437,7 +395,6 @@ void fpga_copy_driver(void *dest, const void *src, size_t num) {
...
@@ -437,7 +395,6 @@ void fpga_copy_driver(void *dest, const void *src, size_t num) {
return
;
return
;
}
}
>>>>>>>
upstream
/
develop
int
open_device_driver
()
{
int
open_device_driver
()
{
g_fpgainfo
.
FpgaRegPhyAddr
=
FPGA_REG_PHY_ADDR
;
g_fpgainfo
.
FpgaRegPhyAddr
=
FPGA_REG_PHY_ADDR
;
g_fpgainfo
.
FpgaMemPhyAddr
=
FPGA_MEM_PHY_ADDR
;
g_fpgainfo
.
FpgaMemPhyAddr
=
FPGA_MEM_PHY_ADDR
;
...
@@ -463,20 +420,13 @@ int open_device_driver() {
...
@@ -463,20 +420,13 @@ int open_device_driver() {
int
close_device_driver
()
{
int
close_device_driver
()
{
pl_destroy
();
pl_destroy
();
<<<<<<<
HEAD
fpga_free_driver
(
g_fpgainfo
.
FpgaRegVirAddr
);
=======
fpga_reg_free
(
g_fpgainfo
.
FpgaRegVirAddr
);
fpga_reg_free
(
g_fpgainfo
.
FpgaRegVirAddr
);
>>>>>>>
upstream
/
develop
memory_release
(
g_fpgainfo
.
memory_info
);
memory_release
(
g_fpgainfo
.
memory_info
);
destroy_fpga_memory
(
g_fpgainfo
.
memory_info
);
destroy_fpga_memory
(
g_fpgainfo
.
memory_info
);
return
0
;
return
0
;
}
}
<<<<<<<
HEAD
=======
}
// namespace driver
}
// namespace driver
>>>>>>>
upstream
/
develop
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/V2/driver/driver.h
浏览文件 @
202d5f29
...
@@ -24,10 +24,7 @@ limitations under the License. */
...
@@ -24,10 +24,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
<<<<<<<
HEAD
=======
namespace
driver
{
namespace
driver
{
>>>>>>>
upstream
/
develop
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
...
@@ -51,8 +48,6 @@ const int PE_IDX_BYPASS = 3;
...
@@ -51,8 +48,6 @@ const int PE_IDX_BYPASS = 3;
enum
pe_status
{
IDLE
=
0
,
BUSY
=
1
};
enum
pe_status
{
IDLE
=
0
,
BUSY
=
1
};
<<<<<<<
HEAD
=======
struct
MemoryCacheArgs
{
struct
MemoryCacheArgs
{
void
*
offset
;
void
*
offset
;
size_t
size
;
size_t
size
;
...
@@ -62,7 +57,6 @@ struct MemoryCacheArgs {
...
@@ -62,7 +57,6 @@ struct MemoryCacheArgs {
#define IOCTL_MEMCACHE_INVAL _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryCacheArgs)
#define IOCTL_MEMCACHE_INVAL _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryCacheArgs)
#define IOCTL_MEMCACHE_FLUSH _IOW(IOCTL_FPGA_MAGIC, 13, struct MemoryCacheArgs)
#define IOCTL_MEMCACHE_FLUSH _IOW(IOCTL_FPGA_MAGIC, 13, struct MemoryCacheArgs)
>>>>>>>
upstream
/
develop
struct
fpga_pe
{
struct
fpga_pe
{
char
type_name
[
MAX_TYPE_NAME_LENTH
+
1
];
char
type_name
[
MAX_TYPE_NAME_LENTH
+
1
];
struct
pe_data_s
*
outer
;
struct
pe_data_s
*
outer
;
...
@@ -111,39 +105,20 @@ extern struct FPGA_INFO g_fpgainfo;
...
@@ -111,39 +105,20 @@ extern struct FPGA_INFO g_fpgainfo;
inline
uint64_t
reg_readq
(
uint32_t
offset
)
{
inline
uint64_t
reg_readq
(
uint32_t
offset
)
{
// DLOG << "offset : " << offset;
// DLOG << "offset : " << offset;
<<<<<<<
HEAD
uint64_t
value
=
*
(
uint64_t
*
)((
uint8_t
*
)
g_fpgainfo
.
FpgaRegVirAddr
+
offset
);
// NOLINT
=======
uint64_t
value
=
*
(
volatile
uint64_t
*
)((
uint8_t
*
)
g_fpgainfo
.
FpgaRegVirAddr
+
uint64_t
value
=
*
(
volatile
uint64_t
*
)((
uint8_t
*
)
g_fpgainfo
.
FpgaRegVirAddr
+
offset
);
// NOLINT
offset
);
// NOLINT
>>>>>>>
upstream
/
develop
return
value
;
return
value
;
}
}
inline
void
reg_writeq
(
uint64_t
value
,
uint32_t
offset
)
{
inline
void
reg_writeq
(
uint64_t
value
,
uint32_t
offset
)
{
// DLOG << "offset : " << offset << ", value : " << value;
// DLOG << "offset : " << offset << ", value : " << value;
<<<<<<<
HEAD
*
(
uint64_t
*
)((
uint8_t
*
)
g_fpgainfo
.
FpgaRegVirAddr
+
offset
)
=
// NOLINT
=======
*
(
volatile
uint64_t
*
)((
uint8_t
*
)
g_fpgainfo
.
FpgaRegVirAddr
+
*
(
volatile
uint64_t
*
)((
uint8_t
*
)
g_fpgainfo
.
FpgaRegVirAddr
+
offset
)
=
// NOLINT
offset
)
=
// NOLINT
>>>>>>>
upstream
/
develop
value
;
value
;
}
}
int
open_device_driver
();
int
open_device_driver
();
<<<<<<<
HEAD
int
close_device_driver
();
void
*
fpga_malloc_driver
(
size_t
size
);
void
fpga_free_driver
(
void
*
ptr
);
/*pe*/
uint64_t
vaddr_to_paddr
(
void
*
address
);
int
fpga_regpoll
(
uint64_t
reg
,
uint64_t
val
,
int
time
);
=======
int
close_device_driver
();
int
close_device_driver
();
...
@@ -164,6 +139,5 @@ uint64_t vaddr_to_paddr(void *address);
...
@@ -164,6 +139,5 @@ uint64_t vaddr_to_paddr(void *address);
int
fpga_regpoll
(
uint64_t
reg
,
uint64_t
val
,
int
time
);
int
fpga_regpoll
(
uint64_t
reg
,
uint64_t
val
,
int
time
);
}
// namespace driver
}
// namespace driver
>>>>>>>
upstream
/
develop
}
// namespace fpga
}
// namespace fpga
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/fpga/V2/driver/pe.cpp
浏览文件 @
202d5f29
...
@@ -20,49 +20,29 @@ limitations under the License. */
...
@@ -20,49 +20,29 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
<<<<<<<
HEAD
#define MUL8(x) (x * 8)
=======
#define MUL8(x) ((x)*8)
#define MUL8(x) ((x)*8)
>>>>>>>
upstream
/
develop
#define BYPASS_DONE 1
#define BYPASS_DONE 1
float
Findfp16Max
()
{
float
Findfp16Max
()
{
uint16_t
abs_vals
[
16
];
uint16_t
abs_vals
[
16
];
uint64_t
max_fp16
;
uint64_t
max_fp16
;
<<<<<<<
HEAD
max_fp16
=
reg_readq
(
MUL8
(
49
));
=======
max_fp16
=
driver
::
reg_readq
(
MUL8
(
49
));
max_fp16
=
driver
::
reg_readq
(
MUL8
(
49
));
>>>>>>>
upstream
/
develop
abs_vals
[
0
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
// NOLINT
abs_vals
[
0
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
// NOLINT
abs_vals
[
1
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
1
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
2
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
2
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
3
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
48
));
// NOLINT
abs_vals
[
3
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
48
));
// NOLINT
<<<<<<<
HEAD
max_fp16
=
reg_readq
(
MUL8
(
50
));
=======
max_fp16
=
driver
::
reg_readq
(
MUL8
(
50
));
max_fp16
=
driver
::
reg_readq
(
MUL8
(
50
));
>>>>>>>
upstream
/
develop
abs_vals
[
4
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
// NOLINT
abs_vals
[
4
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
// NOLINT
abs_vals
[
5
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
5
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
6
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
6
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
7
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
48
));
// NOLINT
abs_vals
[
7
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
48
));
// NOLINT
<<<<<<<
HEAD
max_fp16
=
reg_readq
(
MUL8
(
51
));
=======
max_fp16
=
driver
::
reg_readq
(
MUL8
(
51
));
max_fp16
=
driver
::
reg_readq
(
MUL8
(
51
));
>>>>>>>
upstream
/
develop
abs_vals
[
8
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
// NOLINT
abs_vals
[
8
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
// NOLINT
abs_vals
[
9
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
9
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
10
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
10
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
11
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
48
));
// NOLINT
abs_vals
[
11
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
48
));
// NOLINT
<<<<<<<
HEAD
max_fp16
=
reg_readq
(
MUL8
(
52
));
=======
max_fp16
=
driver
::
reg_readq
(
MUL8
(
52
));
max_fp16
=
driver
::
reg_readq
(
MUL8
(
52
));
>>>>>>>
upstream
/
develop
abs_vals
[
12
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
abs_vals
[
12
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
));
abs_vals
[
13
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
13
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
16
));
// NOLINT
abs_vals
[
14
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
abs_vals
[
14
]
=
(
uint16_t
)(
0x0000007f
&
(
max_fp16
>>
32
));
// NOLINT
...
@@ -78,11 +58,7 @@ float Findfp16Max() {
...
@@ -78,11 +58,7 @@ float Findfp16Max() {
}
}
int
ComputeFpgaConv
(
const
struct
SplitConvArgs
&
args
)
{
int
ComputeFpgaConv
(
const
struct
SplitConvArgs
&
args
)
{
<<<<<<<
HEAD
ComputeBasicConv
(
args
.
conv_args
[
0
]);
=======
ComputeBasicConv
(
args
.
conv_arg
[
0
]);
ComputeBasicConv
(
args
.
conv_arg
[
0
]);
>>>>>>>
upstream
/
develop
}
}
int
ComputeBasicConv
(
const
struct
ConvArgs
&
args
)
{
int
ComputeBasicConv
(
const
struct
ConvArgs
&
args
)
{
...
@@ -190,13 +166,8 @@ int PerformBypass(const struct BypassArgs &args) {
...
@@ -190,13 +166,8 @@ int PerformBypass(const struct BypassArgs &args) {
return
0
;
return
0
;
#endif
#endif
<<<<<<<
HEAD
uint64_t
ifm_src_paddr
=
vaddr_to_paddr
(
args
.
image
.
address
);
uint64_t
ifm_dst_paddr
=
vaddr_to_paddr
(
args
.
output
.
address
);
=======
uint64_t
ifm_src_paddr
=
driver
::
vaddr_to_paddr
(
args
.
image
.
address
);
uint64_t
ifm_src_paddr
=
driver
::
vaddr_to_paddr
(
args
.
image
.
address
);
uint64_t
ifm_dst_paddr
=
driver
::
vaddr_to_paddr
(
args
.
output
.
address
);
uint64_t
ifm_dst_paddr
=
driver
::
vaddr_to_paddr
(
args
.
output
.
address
);
>>>>>>>
upstream
/
develop
uint64_t
bp_enable
;
uint64_t
bp_enable
;
int64_t
length
;
int64_t
length
;
uint64_t
pixels
;
uint64_t
pixels
;
...
@@ -225,18 +196,6 @@ int PerformBypass(const struct BypassArgs &args) {
...
@@ -225,18 +196,6 @@ int PerformBypass(const struct BypassArgs &args) {
}
}
// start bypass
// start bypass
<<<<<<<
HEAD
reg_writeq
(
ifm_src_paddr
,
MUL8
(
27
));
reg_writeq
(
ifm_dst_paddr
,
MUL8
(
28
));
reg_writeq
(
0
,
MUL8
(
0
));
reg_writeq
(
bp_enable
,
MUL8
(
0
));
// poll
int
ret
=
-
1
;
ret
=
fpga_regpoll
(
MUL8
(
48
),
BYPASS_DONE
,
0xffffffff
);
if
(
ret
!=
-
1
)
{
// clear "irq"
reg_readq
(
MUL8
(
63
));
=======
driver
::
reg_writeq
(
ifm_src_paddr
,
MUL8
(
27
));
driver
::
reg_writeq
(
ifm_src_paddr
,
MUL8
(
27
));
driver
::
reg_writeq
(
ifm_dst_paddr
,
MUL8
(
28
));
driver
::
reg_writeq
(
ifm_dst_paddr
,
MUL8
(
28
));
driver
::
reg_writeq
(
0
,
MUL8
(
0
));
driver
::
reg_writeq
(
0
,
MUL8
(
0
));
...
@@ -247,7 +206,6 @@ int PerformBypass(const struct BypassArgs &args) {
...
@@ -247,7 +206,6 @@ int PerformBypass(const struct BypassArgs &args) {
if
(
ret
!=
-
1
)
{
if
(
ret
!=
-
1
)
{
// clear "irq"
// clear "irq"
driver
::
reg_readq
(
MUL8
(
63
));
driver
::
reg_readq
(
MUL8
(
63
));
>>>>>>>
upstream
/
develop
}
}
// get max value
// get max value
if
((
!
args
.
input_data_type
)
&&
(
!
args
.
output_data_type
))
{
if
((
!
args
.
input_data_type
)
&&
(
!
args
.
output_data_type
))
{
...
...
src/fpga/V2/driver/pe.h
浏览文件 @
202d5f29
...
@@ -12,12 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,12 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
<<<<<<<
HEAD
#include "fpga/V2/api.h"
=======
#include "fpga/V2/fpga_common.h"
#include "fpga/V2/fpga_common.h"
>>>>>>>
upstream
/
develop
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
fpga
{
...
...
src/fpga/V2/filter.cpp
浏览文件 @
202d5f29
...
@@ -94,10 +94,7 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
...
@@ -94,10 +94,7 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
convert_to_hwc
(
data_in
,
num
,
channel
,
height
,
width
);
convert_to_hwc
(
data_in
,
num
,
channel
,
height
,
width
);
align_filter
(
data_in
,
num
,
channel
,
height
,
width
);
align_filter
(
data_in
,
num
,
channel
,
height
,
width
);
int
pixel_num
=
calc_aligned_total_pixel_num
(
num
,
channel
,
height
,
width
);
int
pixel_num
=
calc_aligned_total_pixel_num
(
num
,
channel
,
height
,
width
);
<<<<<<<
HEAD
=======
fpga_flush
(
*
data_in
,
pixel_num
*
sizeof
(
float
));
fpga_flush
(
*
data_in
,
pixel_num
*
sizeof
(
float
));
>>>>>>>
upstream
/
develop
}
}
void
convert_fc_filter
(
float
**
data_in
,
int
num
,
int
chw
)
{
void
convert_fc_filter
(
float
**
data_in
,
int
num
,
int
chw
)
{
...
@@ -117,11 +114,8 @@ void format_fc_filter(float **data_in, int num, int channel, int height,
...
@@ -117,11 +114,8 @@ void format_fc_filter(float **data_in, int num, int channel, int height,
int
chw
=
channel
*
height
*
width
;
int
chw
=
channel
*
height
*
width
;
convert_fc_filter
(
data_in
,
num
,
chw
);
convert_fc_filter
(
data_in
,
num
,
chw
);
align_filter
(
data_in
,
num
,
channel
,
height
,
width
);
align_filter
(
data_in
,
num
,
channel
,
height
,
width
);
<<<<<<<
HEAD
=======
int
pixel_num
=
calc_aligned_total_pixel_num
(
num
,
channel
,
height
,
width
);
int
pixel_num
=
calc_aligned_total_pixel_num
(
num
,
channel
,
height
,
width
);
fpga_flush
(
*
data_in
,
pixel_num
*
sizeof
(
float
));
fpga_flush
(
*
data_in
,
pixel_num
*
sizeof
(
float
));
>>>>>>>
upstream
/
develop
}
}
float
find_max
(
float
*
data_in
,
int
data_size
)
{
float
find_max
(
float
*
data_in
,
int
data_size
)
{
...
...
src/fpga/V2/image.cpp
浏览文件 @
202d5f29
...
@@ -58,10 +58,7 @@ void format_image(float **data_in, int channel, int height, int width,
...
@@ -58,10 +58,7 @@ void format_image(float **data_in, int channel, int height, int width,
int
aligned_channel
)
{
int
aligned_channel
)
{
convert_to_hwc
(
data_in
,
channel
,
height
,
width
);
convert_to_hwc
(
data_in
,
channel
,
height
,
width
);
align_image
(
data_in
,
channel
,
height
,
width
,
aligned_channel
);
align_image
(
data_in
,
channel
,
height
,
width
,
aligned_channel
);
<<<<<<<
HEAD
=======
fpga_flush
(
*
data_in
,
aligned_channel
*
height
*
width
*
sizeof
(
float
));
fpga_flush
(
*
data_in
,
aligned_channel
*
height
*
width
*
sizeof
(
float
));
>>>>>>>
upstream
/
develop
}
}
void
concat_images
(
int16_t
**
images_in
,
float
**
scales_in
,
void
*
image_out
,
void
concat_images
(
int16_t
**
images_in
,
float
**
scales_in
,
void
*
image_out
,
...
@@ -73,11 +70,8 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
...
@@ -73,11 +70,8 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
scale_out
[
1
]
=
0.0
;
scale_out
[
1
]
=
0.0
;
for
(
int
i
=
0
;
i
<
image_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
image_num
;
i
++
)
{
scale_out
[
0
]
=
std
::
max
(
*
scale_out
,
scales_in
[
i
][
0
]);
scale_out
[
0
]
=
std
::
max
(
*
scale_out
,
scales_in
[
i
][
0
]);
<<<<<<<
HEAD
=======
fpga_invalidate
(
images_in
[
i
],
fpga_invalidate
(
images_in
[
i
],
height
*
width
*
aligned_channel_num
[
i
]
*
sizeof
(
int16_t
));
height
*
width
*
aligned_channel_num
[
i
]
*
sizeof
(
int16_t
));
>>>>>>>
upstream
/
develop
}
}
scale_out
[
1
]
=
1
/
scale_out
[
0
];
scale_out
[
1
]
=
1
/
scale_out
[
0
];
...
@@ -92,10 +86,7 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
...
@@ -92,10 +86,7 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
tmp_channel_sum
+=
channel_num
[
i
];
tmp_channel_sum
+=
channel_num
[
i
];
}
}
}
}
<<<<<<<
HEAD
=======
fpga_flush
(
image_out
,
hw
*
out_channel
*
sizeof
(
int16_t
));
fpga_flush
(
image_out
,
hw
*
out_channel
*
sizeof
(
int16_t
));
>>>>>>>
upstream
/
develop
}
}
}
// namespace image
}
// namespace image
...
...
src/framework/executor.cpp
浏览文件 @
202d5f29
...
@@ -26,10 +26,7 @@ limitations under the License. */
...
@@ -26,10 +26,7 @@ limitations under the License. */
#include "framework/program/var_desc.h"
#include "framework/program/var_desc.h"
#include "framework/scope.h"
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/tensor.h"
<<<<<<<
HEAD
:
src
/
framework
/
executor
.
cpp
=======
#include "memory/t_malloc.h"
#include "memory/t_malloc.h"
>>>>>>>
upstream
/
develop
:
src
/
framework
/
executor
.
cpp
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <queue>
#include <queue>
...
...
src/io/paddle_mobile.cpp
浏览文件 @
202d5f29
...
@@ -21,7 +21,6 @@ limitations under the License. */
...
@@ -21,7 +21,6 @@ limitations under the License. */
#include "operators/math/gemm.h"
#include "operators/math/gemm.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
static
std
::
mutex
lc
;
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Dtype
,
Precision
P
>
void
PaddleMobile
<
Dtype
,
P
>::
SetThreadNum
(
int
num
)
{
void
PaddleMobile
<
Dtype
,
P
>::
SetThreadNum
(
int
num
)
{
#ifdef _OPENMP
#ifdef _OPENMP
...
@@ -203,10 +202,7 @@ void PaddleMobile<Dtype, P>::Predict_To(int end) {
...
@@ -203,10 +202,7 @@ void PaddleMobile<Dtype, P>::Predict_To(int end) {
#endif
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
<<<<<<<
HEAD
=======
static
std
::
mutex
lc
;
static
std
::
mutex
lc
;
>>>>>>>
upstream
/
develop
template
<
typename
Dtype
,
Precision
P
>
template
<
typename
Dtype
,
Precision
P
>
void
PaddleMobile
<
Dtype
,
P
>::
SetCLPath
(
std
::
string
path
)
{
void
PaddleMobile
<
Dtype
,
P
>::
SetCLPath
(
std
::
string
path
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
lc
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
lc
);
...
@@ -214,8 +210,6 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
...
@@ -214,8 +210,6 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
framework
::
CLEngine
::
Instance
()
->
setClPath
(
path
);
framework
::
CLEngine
::
Instance
()
->
setClPath
(
path
);
}
}
}
}
<<<<<<<
HEAD
=======
template
<
>
template
<
>
double
PaddleMobile
<
GPU_CL
,
Precision
::
FP32
>::
GetPredictTime
()
{
double
PaddleMobile
<
GPU_CL
,
Precision
::
FP32
>::
GetPredictTime
()
{
cl_int
status
;
cl_int
status
;
...
@@ -418,7 +412,6 @@ int PaddleMobile<Dtype, P>::readText(
...
@@ -418,7 +412,6 @@ int PaddleMobile<Dtype, P>::readText(
return
size
+
1
;
return
size
+
1
;
}
}
>>>>>>>
upstream
/
develop
#endif
#endif
template
class
PaddleMobile
<
CPU
,
Precision
::
FP32
>;
template
class
PaddleMobile
<
CPU
,
Precision
::
FP32
>;
...
...
src/io/paddle_mobile.h
浏览文件 @
202d5f29
...
@@ -81,11 +81,8 @@ class PaddleMobile {
...
@@ -81,11 +81,8 @@ class PaddleMobile {
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
public:
public:
void
SetCLPath
(
std
::
string
cl_path
);
void
SetCLPath
(
std
::
string
cl_path
);
<<<<<<<
HEAD
=======
int
readText
(
const
char
*
kernelPath
,
int
readText
(
const
char
*
kernelPath
,
char
**
pcode
);
// 读取文本文件放入 pcode,返回字符串长度
char
**
pcode
);
// 读取文本文件放入 pcode,返回字符串长度
>>>>>>>
upstream
/
develop
#endif
#endif
private:
private:
...
...
src/operators/fusion_conv_add_op.cpp
浏览文件 @
202d5f29
...
@@ -61,5 +61,7 @@ REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
...
@@ -61,5 +61,7 @@ REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
REGISTER_OPERATOR_CL
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#endif
#endif
src/operators/fusion_conv_add_relu_op.h
浏览文件 @
202d5f29
...
@@ -29,8 +29,9 @@ namespace operators {
...
@@ -29,8 +29,9 @@ namespace operators {
class
FusionConvAddReluOpMatcher
:
public
framework
::
FusionOpMatcher
{
class
FusionConvAddReluOpMatcher
:
public
framework
::
FusionOpMatcher
{
public:
public:
FusionConvAddReluOpMatcher
()
{
FusionConvAddReluOpMatcher
()
{
// node_ = framework::Node(G_OP_TYPE_FUSION_CONV_ADD);
node_
=
framework
::
Node
(
G_OP_TYPE_CONV
);
// node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
}
void
FolderNodes
(
void
FolderNodes
(
...
...
src/operators/fusion_deconv_add_op.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#include "operators/fusion_deconv_add_op.h"
namespace
paddle_mobile
{
namespace
operators
{}
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add
,
ops
::
FusionDeconvAddMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add
,
ops
::
FusionDeconvAddOp
);
#endif
#endif
src/operators/fusion_deconv_add_op.h
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/deconv_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
using
std
::
vector
;
class
FusionDeconvAddMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionDeconvAddMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_CONV_TRANSPOSE
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_DECONV_ADD
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionDeconvAddOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddParam
<
DeviceType
>
,
operators
::
DeconvAddKernel
<
DeviceType
,
T
>>
{
public:
FusionDeconvAddOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddParam
<
DeviceType
>
,
operators
::
DeconvAddKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
void
InferShape
()
const
{
auto
input
=
this
->
param_
.
Input
();
auto
in_dims
=
input
->
dims
();
auto
filter
=
this
->
param_
.
Filter
();
auto
filter_dims
=
filter
->
dims
();
std
::
vector
<
int
>
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
int
groups
=
this
->
param_
.
Groups
();
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
filter_dims
.
size
(),
"ConvTransposeOp input dimension and filter dimension "
"should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
-
strides
.
size
()
==
2U
,
"ConvTransposeOp input dimension and strides dimension should "
"be consistent."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
strides
.
size
(),
"ConvTransposeOp paddings dimension and strides "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
dilations
.
size
(),
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
[
1
]
==
filter_dims
[
0
],
"In ConvTransposeOp, The number of input channels should "
"be equal to the number of filter's channels."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]
*
groups
});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
auto
filter_extent
=
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
;
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_extent
);
}
this
->
param_
.
Output
()
->
Resize
(
framework
::
make_ddim
(
output_shape
));
}
protected:
};
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_DECONV_ADD_OP
src/operators/fusion_deconv_add_relu_op.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#include "operators/fusion_deconv_add_relu_op.h"
namespace
paddle_mobile
{
namespace
operators
{}
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluOp
);
#endif
#endif
src/operators/fusion_deconv_add_relu_op.h
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/deconv_add_relu_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
using
std
::
vector
;
class
FusionDeconvAddReluMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionDeconvAddReluMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_CONV_TRANSPOSE
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_DECONV_ADD_RELU
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionDeconvAddReluOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddReluParam
<
DeviceType
>
,
operators
::
DeconvAddReluKernel
<
DeviceType
,
T
>>
{
public:
FusionDeconvAddReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddReluParam
<
DeviceType
>
,
operators
::
DeconvAddReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
void
InferShape
()
const
{
auto
input
=
this
->
param_
.
Input
();
auto
in_dims
=
input
->
dims
();
auto
filter
=
this
->
param_
.
Filter
();
auto
filter_dims
=
filter
->
dims
();
std
::
vector
<
int
>
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
int
groups
=
this
->
param_
.
Groups
();
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
filter_dims
.
size
(),
"ConvTransposeOp input dimension and filter dimension "
"should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
-
strides
.
size
()
==
2U
,
"ConvTransposeOp input dimension and strides dimension should "
"be consistent."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
strides
.
size
(),
"ConvTransposeOp paddings dimension and strides "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
dilations
.
size
(),
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
[
1
]
==
filter_dims
[
0
],
"In ConvTransposeOp, The number of input channels should "
"be equal to the number of filter's channels."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]
*
groups
});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
auto
filter_extent
=
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
;
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_extent
);
}
this
->
param_
.
Output
()
->
Resize
(
framework
::
make_ddim
(
output_shape
));
}
protected:
};
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_DECONV_ADD_RELU_OP
src/operators/kernel/arm/fetch_kernel.cpp
浏览文件 @
202d5f29
...
@@ -8,10 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -8,10 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
<<<<<<<
HEAD
#ifdef FUSION_CONVADD_OP
=======
>>>>>>>
upstream
/
develop
#include "operators/kernel/fetch_kernel.h"
#include "operators/kernel/fetch_kernel.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
...
@@ -26,7 +22,3 @@ void FetchKernel<CPU, float>::Compute(const FetchParam<CPU> ¶m) {
...
@@ -26,7 +22,3 @@ void FetchKernel<CPU, float>::Compute(const FetchParam<CPU> ¶m) {
template
class
FetchKernel
<
CPU
,
float
>;
template
class
FetchKernel
<
CPU
,
float
>;
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
<<<<<<<
HEAD
#endif
=======
>>>>>>>
upstream
/
develop
src/operators/kernel/cl/cl_kernel/feed_kernel.cl
浏览文件 @
202d5f29
...
@@ -13,20 +13,12 @@ See the License for the specific language governing permissions and
...
@@ -13,20 +13,12 @@ See the License for the specific language governing permissions and
limitations
under
the
License.
*/
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
<<<<<<<
HEAD
__kernel
void
feed
(
__global
float
*in,
__write_only
image2d_t
outputImage,int
h,int
w
)
=======
__kernel
void
feed
(
__global
float
*in,
__write_only
image2d_t
outputImage,int
h,int
w,int
c
)
__kernel
void
feed
(
__global
float
*in,
__write_only
image2d_t
outputImage,int
h,int
w,int
c
)
>>>>>>>
upstream/develop
{
{
int
i
=
get_global_id
(
0
)
;
int
i
=
get_global_id
(
0
)
;
int
j
=
get_global_id
(
1
)
;
int
j
=
get_global_id
(
1
)
;
half4
pixel
;
half4
pixel
;
pixel.x
=
convert_half
(
in[
(
i
*
w
+
j
)
]
)
;
pixel.x
=
convert_half
(
in[
(
i
*
w
+
j
)
]
)
;
<<<<<<<
HEAD
pixel.y
=
convert_half
(
in[h
*
w
+
(
i
*
w
+
j
)
]
)
;
pixel.z
=
convert_half
(
in[2
*
h
*
w
+
(
i
*
w
+
j
)
]
)
;
=======
if
(
c>=2
)
{
if
(
c>=2
)
{
pixel.y
=
convert_half
(
in[h
*
w
+
(
i
*
w
+
j
)
]
)
;
pixel.y
=
convert_half
(
in[h
*
w
+
(
i
*
w
+
j
)
]
)
;
}else{
}else{
...
@@ -37,7 +29,6 @@ __kernel void feed(__global float *in, __write_only image2d_t outputImage,int h,
...
@@ -37,7 +29,6 @@ __kernel void feed(__global float *in, __write_only image2d_t outputImage,int h,
}else{
}else{
pixel.z
=
0.0
;
pixel.z
=
0.0
;
}
}
>>>>>>>
upstream/develop
pixel.w
=
0.0
;
pixel.w
=
0.0
;
int2
coords
;
int2
coords
;
coords.x
=
j
;
coords.x
=
j
;
...
...
src/operators/kernel/cl/feed_kernel.cpp
浏览文件 @
202d5f29
...
@@ -36,10 +36,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
...
@@ -36,10 +36,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
int
numel
=
input
->
numel
();
int
numel
=
input
->
numel
();
cl_mem
cl_image
=
output
->
GetCLImage
();
cl_mem
cl_image
=
output
->
GetCLImage
();
<<<<<<<
HEAD
=======
int
c
=
input
->
dims
()[
1
];
int
c
=
input
->
dims
()[
1
];
>>>>>>>
upstream
/
develop
int
height
=
output
->
dims
()[
2
];
int
height
=
output
->
dims
()[
2
];
int
width
=
output
->
dims
()[
3
];
int
width
=
output
->
dims
()[
3
];
CLTensor
input_cl_tensor
(
this
->
cl_helper_
.
CLContext
(),
CLTensor
input_cl_tensor
(
this
->
cl_helper_
.
CLContext
(),
...
@@ -55,11 +52,8 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
...
@@ -55,11 +52,8 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_int
),
&
height
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_int
),
&
height
);
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
<<<<<<<
HEAD
=======
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
&
c
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
&
c
);
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
>>>>>>>
upstream
/
develop
size_t
global_work_size
[
2
]
=
{
width
,
height
};
size_t
global_work_size
[
2
]
=
{
width
,
height
};
...
...
src/operators/kernel/deconv_add_kernel.h
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
DeconvAddKernel
:
public
OpKernelBase
<
DeviceType
,
FusionDeconvAddParam
<
DeviceType
>>
{
public:
void
Compute
(
const
FusionDeconvAddParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionDeconvAddParam
<
DeviceType
>
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/deconv_add_relu_kernel.h
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
DeconvAddReluKernel
:
public
OpKernelBase
<
DeviceType
,
FusionDeconvAddReluParam
<
DeviceType
>>
{
public:
void
Compute
(
const
FusionDeconvAddReluParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionDeconvAddReluParam
<
DeviceType
>
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ConvAddKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
// NOLINT
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
bias_ptr
[
i
];
}
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
template
<
>
void
ConvAddKernel
<
FPGA
,
float
>::
Compute
(
const
FusionConvAddParam
<
FPGA
>
&
param
)
{
fpga
::
ComputeFpgaConv
(
param
.
FpgaArgs
());
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#include "operators/kernel/deconv_add_kernel.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
DeconvAddKernel
<
FPGA
,
float
>::
Init
(
FusionDeconvAddParam
<
FPGA
>
*
param
)
{
return
true
;
}
template
<
>
void
DeconvAddKernel
<
FPGA
,
float
>::
Compute
(
const
FusionDeconvAddParam
<
FPGA
>
&
param
)
{}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#include "operators/kernel/deconv_add_relu_kernel.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
DeconvAddReluKernel
<
FPGA
,
float
>::
Init
(
FusionDeconvAddReluParam
<
FPGA
>
*
param
)
{
return
true
;
}
template
<
>
void
DeconvAddReluKernel
<
FPGA
,
float
>::
Compute
(
const
FusionDeconvAddReluParam
<
FPGA
>
&
param
)
{}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/slice_kernel.cpp
浏览文件 @
202d5f29
...
@@ -24,10 +24,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) {
...
@@ -24,10 +24,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) {
}
}
template
<
>
template
<
>
void
SliceKernel
<
FPGA
,
float
>::
Compute
(
const
SliceParam
<
FPGA
>&
param
)
{}
void
SliceKernel
<
FPGA
,
float
>::
Compute
(
const
SliceParam
<
FPGA
>&
param
)
{}
<<<<<<<
HEAD
=======
>>>>>>>
upstream
/
develop
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/fpga/V2/softmax_kernel.cpp
浏览文件 @
202d5f29
...
@@ -49,16 +49,12 @@ void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> ¶m) {
...
@@ -49,16 +49,12 @@ void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> ¶m) {
Tensor
*
out
=
param
.
Out
();
Tensor
*
out
=
param
.
Out
();
fpga
::
PerformBypass
(
param
.
FpgaArgs
());
fpga
::
PerformBypass
(
param
.
FpgaArgs
());
<<<<<<<
HEAD
math
::
SoftmaxFuntor
<
CPU
,
float
>
()(
in_x
,
out
);
=======
fpga
::
fpga_invalidate
(
fpga
::
fpga_invalidate
(
(
void
*
)
in_x
->
data
<
float
>
(),
// NOLINT
(
void
*
)
in_x
->
data
<
float
>
(),
// NOLINT
fpga
::
get_aligned_channel_num
((
int
)
in_x
->
dims
()[
1
])
*
// NOLINT
fpga
::
get_aligned_channel_num
((
int
)
in_x
->
dims
()[
1
])
*
// NOLINT
sizeof
(
float
));
sizeof
(
float
));
math
::
SoftmaxFuntor
<
CPU
,
float
>
()(
in_x
,
out
);
math
::
SoftmaxFuntor
<
CPU
,
float
>
()(
in_x
,
out
);
fpga
::
fpga_flush
(
out
->
data
<
float
>
(),
out
->
memory_size
());
fpga
::
fpga_flush
(
out
->
data
<
float
>
(),
out
->
memory_size
());
>>>>>>>
upstream
/
develop
}
}
}
// namespace operators
}
// namespace operators
...
...
src/operators/kernel/fpga/V2/split_kernel.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SPLIT_OP
#include "operators/kernel/split_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
SplitKernel
<
FPGA
,
float
>::
Init
(
SplitParam
<
FPGA
>*
param
)
{
return
true
;
}
template
<
>
void
SplitKernel
<
FPGA
,
float
>::
Compute
(
const
SplitParam
<
FPGA
>&
param
)
{}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/transpose2_kernel.cpp
0 → 100644
浏览文件 @
202d5f29
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#include "operators/kernel/transpose2_kernel.h"
#include "operators/kernel/central-arm-func/transpose2_arm_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
Transpose2Kernel
<
FPGA
,
float
>::
Init
(
Transpose2Param
<
FPGA
>
*
param
)
{
return
true
;
}
template
<
>
void
Transpose2Kernel
<
FPGA
,
float
>::
Compute
(
const
Transpose2Param
<
FPGA
>
&
param
)
{
//Transpose2Compute<float>(param);
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/op_param.h
浏览文件 @
202d5f29
...
@@ -1532,8 +1532,6 @@ class ReluParam<GPU_CL> : public ReluParamBase<GPU_CL> {
...
@@ -1532,8 +1532,6 @@ class ReluParam<GPU_CL> : public ReluParamBase<GPU_CL> {
};
};
#endif
#endif
<<<<<<<
HEAD
=======
#endif
#endif
#ifdef TANH_OP
#ifdef TANH_OP
...
@@ -1555,7 +1553,6 @@ class TanhParam : public OpParam {
...
@@ -1555,7 +1553,6 @@ class TanhParam : public OpParam {
RType
*
input_x_
;
RType
*
input_x_
;
RType
*
out_
;
RType
*
out_
;
};
};
>>>>>>>
upstream
/
develop
#endif
#endif
#ifdef PRELU_OP
#ifdef PRELU_OP
...
@@ -2224,7 +2221,10 @@ class ConvTransposeParam : public OpParam {
...
@@ -2224,7 +2221,10 @@ class ConvTransposeParam : public OpParam {
const
Scope
&
scope
)
{
const
Scope
&
scope
)
{
filter_
=
FilterFrom
<
GType
>
(
inputs
,
scope
);
filter_
=
FilterFrom
<
GType
>
(
inputs
,
scope
);
input_
=
InputFrom
<
GType
>
(
inputs
,
scope
);
input_
=
InputFrom
<
GType
>
(
inputs
,
scope
);
output_
=
OutputFrom
<
GType
>
(
outputs
,
scope
);
//output_ = OutputFrom<GType>(outputs, scope);
if
(
outputs
.
count
(
"Output"
))
{
output_
=
OpParam
::
OutputFrom
<
GType
>
(
outputs
,
scope
);
}
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
dilations_
=
GetAttr
<
vector
<
int
>>
(
"dilations"
,
attrs
);
dilations_
=
GetAttr
<
vector
<
int
>>
(
"dilations"
,
attrs
);
...
@@ -2265,6 +2265,37 @@ class ConvTransposeParam : public OpParam {
...
@@ -2265,6 +2265,37 @@ class ConvTransposeParam : public OpParam {
#endif
#endif
};
};
#endif
#endif
#ifdef FUSION_DECONVADD_OP
template
<
typename
Dtype
>
class
FusionDeconvAddParam
:
public
ConvTransposeParam
<
Dtype
>
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
rtype
RType
;
public:
FusionDeconvAddParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
:
ConvTransposeParam
<
Dtype
>
(
inputs
,
outputs
,
attrs
,
scope
)
{
bias_
=
OpParam
::
InputYFrom
<
GType
>
(
inputs
,
scope
);
axis_
=
OpParam
::
GetAttr
<
int
>
(
"axis"
,
attrs
);
output_
=
OpParam
::
OutFrom
<
GType
>
(
outputs
,
scope
);
}
RType
*
Bias
()
const
{
return
bias_
;
}
const
int
&
Axis
()
const
{
return
axis_
;
}
RType
*
Output
()
const
{
return
output_
;
}
protected:
RType
*
bias_
;
int
axis_
;
RType
*
output_
;
};
#endif
#ifdef FUSION_DECONVADDRELU_OP
template
<
typename
Dtype
>
using
FusionDeconvAddReluParam
=
FusionDeconvAddParam
<
Dtype
>
;
#endif
#ifdef FUSION_DECONVRELU_OP
#ifdef FUSION_DECONVRELU_OP
template
<
typename
Dtype
>
template
<
typename
Dtype
>
...
...
src/operators/split_op.cpp
浏览文件 @
202d5f29
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef SPLIT_OP
#ifdef SPLIT_OP
#include <vector>
#include "operators/split_op.h"
#include "operators/split_op.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
@@ -83,5 +83,8 @@ namespace ops = paddle_mobile::operators;
...
@@ -83,5 +83,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
split
,
ops
::
SplitOp
);
REGISTER_OPERATOR_CPU
(
split
,
ops
::
SplitOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
split
,
ops
::
SplitOp
);
#endif
#endif // SPLIT_OP
#endif // SPLIT_OP
src/operators/tanh_op.cpp
浏览文件 @
202d5f29
...
@@ -29,7 +29,7 @@ void TanhOp<DeviceType, T>::InferShape() const {
...
@@ -29,7 +29,7 @@ void TanhOp<DeviceType, T>::InferShape() const {
namespace
ops
=
paddle_mobile
::
operators
;
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
T
anh
,
ops
::
TanhOp
);
REGISTER_OPERATOR_FPGA
(
t
anh
,
ops
::
TanhOp
);
#endif
#endif
#endif
#endif
src/operators/transpose2_op.cpp
浏览文件 @
202d5f29
...
@@ -60,5 +60,8 @@ namespace ops = paddle_mobile::operators;
...
@@ -60,5 +60,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
transpose2
,
ops
::
Transpose2Op
);
REGISTER_OPERATOR_CPU
(
transpose2
,
ops
::
Transpose2Op
);
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
transpose2
,
ops
::
Transpose2Op
);
#endif
#endif // TRANSPOSE_OP
#endif // TRANSPOSE_OP
test/CMakeLists.txt
浏览文件 @
202d5f29
...
@@ -66,15 +66,12 @@ list(FIND NET "FPGA_NET_V1" CON)
...
@@ -66,15 +66,12 @@ list(FIND NET "FPGA_NET_V1" CON)
if
(
CON GREATER -1
)
if
(
CON GREATER -1
)
ADD_EXECUTABLE
(
test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h
)
ADD_EXECUTABLE
(
test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-resnet50 paddle-mobile
)
target_link_libraries
(
test-resnet50 paddle-mobile
)
ADD_EXECUTABLE
(
test-densebox net/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-densebox paddle-mobile
)
set
(
FOUND_MATCH ON
)
set
(
FOUND_MATCH ON
)
endif
()
endif
()
<<<<<<< HEAD
list
(
FIND NET
"FPGA_NET_V2"
CON
)
if
(
CON GREATER -1
)
ADD_EXECUTABLE
(
test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-resnet50 paddle-mobile
)
=======
list
(
FIND NET
"FPGA_NET_V2"
CON
)
list
(
FIND NET
"FPGA_NET_V2"
CON
)
if
(
CON GREATER -1
)
if
(
CON GREATER -1
)
...
@@ -83,7 +80,10 @@ if (CON GREATER -1)
...
@@ -83,7 +80,10 @@ if (CON GREATER -1)
ADD_EXECUTABLE
(
test-pe fpga/test_pe.cpp
)
ADD_EXECUTABLE
(
test-pe fpga/test_pe.cpp
)
target_link_libraries
(
test-pe paddle-mobile
)
target_link_libraries
(
test-pe paddle-mobile
)
>>>>>>> upstream/develop
ADD_EXECUTABLE
(
test-densebox net/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-densebox paddle-mobile
)
set
(
FOUND_MATCH ON
)
set
(
FOUND_MATCH ON
)
endif
()
endif
()
...
...
test/net/test_yologpu.cpp
浏览文件 @
202d5f29
...
@@ -13,19 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,19 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <iostream>
#include <iostream>
<<<<<<<
HEAD
#include "../test_helper.h"
#include "../test_include.h"
int
main
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
GPU_CL
>
paddle_mobile
;
// paddle_mobile.SetThreadNum(4);
auto
time1
=
paddle_mobile
::
time
();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
g_yolo_mul
),
true
);
=======
#include <thread>
#include <thread>
#include "../../src/common/types.h"
#include "../../src/common/types.h"
#include "../../src/io/paddle_test_inference_api.h"
#include "../../src/io/paddle_test_inference_api.h"
...
@@ -99,7 +86,6 @@ void t2() {
...
@@ -99,7 +86,6 @@ void t2() {
std
::
string
(
g_yolo_mul
)
+
"/params"
,
true
);
std
::
string
(
g_yolo_mul
)
+
"/params"
,
true
);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
>>>>>>>
upstream
/
develop
if
(
isok
)
{
if
(
isok
)
{
auto
time2
=
paddle_mobile
::
time
();
auto
time2
=
paddle_mobile
::
time
();
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time2
)
<<
"ms"
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time2
)
<<
"ms"
...
@@ -138,8 +124,6 @@ void t2() {
...
@@ -138,8 +124,6 @@ void t2() {
// std::cout << i << std::endl;
// std::cout << i << std::endl;
// }
// }
}
}
<<<<<<<
HEAD
=======
}
}
void
t3
()
{
void
t3
()
{
...
@@ -201,6 +185,5 @@ int main() {
...
@@ -201,6 +185,5 @@ int main() {
// th2.join();
// th2.join();
// th3.join();
// th3.join();
th1
.
join
();
th1
.
join
();
>>>>>>>
upstream
/
develop
return
0
;
return
0
;
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录