Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
292e69a8
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
292e69a8
编写于
9月 03, 2018
作者:
Z
zhangyang0701
提交者:
GitHub
9月 03, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into develop
上级
79196cbf
63ee0b8f
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
199 addition
and
112 deletion
+199
-112
src/operators/kernel/central-arm-func/pool_arm_func.h
src/operators/kernel/central-arm-func/pool_arm_func.h
+5
-3
src/operators/math/pool_2x2.cpp
src/operators/math/pool_2x2.cpp
+186
-99
src/operators/math/pool_2x2.h
src/operators/math/pool_2x2.h
+4
-4
src/operators/math/pool_3x3.cpp
src/operators/math/pool_3x3.cpp
+4
-6
未找到文件。
src/operators/kernel/central-arm-func/pool_arm_func.h
浏览文件 @
292e69a8
...
@@ -76,15 +76,17 @@ void PoolCompute(const PoolParam<CPU> ¶m) {
...
@@ -76,15 +76,17 @@ void PoolCompute(const PoolParam<CPU> ¶m) {
}
}
}
}
}
else
if
(
ksize
[
0
]
==
2
&&
ksize
[
0
]
==
ksize
[
1
])
{
}
else
if
(
ksize
[
0
]
==
2
&&
ksize
[
0
]
==
ksize
[
1
]
&&
strides
[
0
]
==
2
&&
strides
[
0
]
==
strides
[
1
]
&&
paddings
[
0
]
==
paddings
[
1
]
&&
paddings
[
1
]
==
0
)
{
#if __ARM_NEON
#if __ARM_NEON
#if __aarch64__
#if __aarch64__
PoolBasic
(
pooling_type
,
ksize
,
strides
,
paddings
,
in_x
,
out
);
PoolBasic
(
pooling_type
,
ksize
,
strides
,
paddings
,
in_x
,
out
);
#else
#else
if
(
pooling_type
==
"max"
)
{
if
(
pooling_type
==
"max"
)
{
math
::
Pool2x2Max
(
strides
,
paddings
,
in_x
,
out
);
math
::
Pool2x2Max
s2p0
(
strides
,
paddings
,
in_x
,
out
);
}
else
if
(
pooling_type
==
"avg"
)
{
}
else
if
(
pooling_type
==
"avg"
)
{
math
::
Pool2x2Avg
(
strides
,
paddings
,
in_x
,
out
);
math
::
Pool2x2Avg
s2p0
(
strides
,
paddings
,
in_x
,
out
);
}
}
#endif
#endif
#else
#else
...
...
src/operators/math/pool_2x2.cpp
浏览文件 @
292e69a8
...
@@ -20,21 +20,15 @@ limitations under the License. */
...
@@ -20,21 +20,15 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
#define FLT_MAX __FLT_MAX__
void
Pool2x2Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool2x2Maxs2p0
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
Tensor
*
output
)
{
const
Tensor
*
input
,
Tensor
*
output
)
{
#if __ARM_NEON
#if __aarch64__
#else
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_width
=
input
->
dims
()[
3
];
const
int
input_width
=
input
->
dims
()[
3
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
int
output_height
=
output
->
dims
()[
2
];
int
output_height
=
output
->
dims
()[
2
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
ksize_height
=
2
;
const
int
ksize_height
=
2
;
...
@@ -47,72 +41,110 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -47,72 +41,110 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
const
int
input_channel_stride
=
input_height
*
input_width
;
const
int
input_channel_stride
=
input_height
*
input_width
;
const
int
output_channel_stride
=
output_height
*
output_width
;
const
int
output_channel_stride
=
output_height
*
output_width
;
const
int
input_batch_stride
=
output_channels
*
input_channel_stride
;
const
int
output_batch_stride
=
output_channels
*
output_channel_stride
;
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
int
out_w_num
=
output_width
>>
2
;
int
w1
=
input_width
/
16
;
const
int
in_h_num
=
output_height
>>
1
;
int
_w1
=
input_width
%
16
;
const
int
input_batch_stride
=
output_channels
*
input_channel_stride
;
int
w2
=
_w1
/
4
;
const
int
output_batch_stride
=
output_channels
*
output_channel_stride
;
int
_w2
=
_w1
%
4
;
int
remain
=
output_width
-
out_w_num
<<
2
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
const
float
*
input_data_chanel_row_next
=
input_data
+
input_width
;
for
(
int
ph
=
0
;
ph
<
input_height
;
ph
+=
2
)
{
for
(;
output_height
>
0
;
output_height
--
)
{
const
float
*
in_ptr1
=
input_data
+
i
*
input_batch_stride
+
if
(
out_w_num
>
0
)
{
c
*
input_channel_stride
+
ph
*
input_width
;
const
float
*
in_ptr2
=
in_ptr1
+
input_width
;
if
(
ph
+
1
>=
input_height
)
{
in_ptr2
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
input_width
));
memset
(
static_cast
<
void
*>
(
const_cast
<
float
*>
(
in_ptr2
)),
-
FLT_MAX
,
sizeof
(
float
)
*
input_width
);
}
float
*
out_ptr
=
output_data
+
i
*
output_batch_stride
+
c
*
output_channel_stride
+
ph
/
2
*
output_width
;
asm
volatile
(
asm
volatile
(
"max_loop:
\n\t
"
"subs %[w1], %[w1], #1
\n\t
"
"vld1.f32 {q0,q1}, [%[in_ptr1]]!
\n\t
"
"blt end_w1_%=
\n\t
"
"vld1.f32 {q2,q3}, [%[in_ptr2]]!
\n\t
"
"loop_w1_%=:
\n\t
"
"pld [%[in_ptr1], #64]
\n\t
"
"pld [%[in_ptr2], #64]
\n\t
"
"vld1.f32 {q0, q1}, [%[in_ptr1]]!
\n\t
"
"vld1.f32 {q2, q3}, [%[in_ptr2]]!
\n\t
"
"vld1.f32 {q6, q7}, [%[in_ptr1]]!
\n\t
"
"vld1.f32 {q8, q9}, [%[in_ptr2]]!
\n\t
"
"vmax.f32 q0, q0, q2
\n\t
"
"vmax.f32 q0, q0, q2
\n\t
"
"vmax.f32 q1, q1, q3
\n\t
"
"vmax.f32 q1, q1, q3
\n\t
"
"vmax.f32 q6, q6, q8
\n\t
"
"vmax.f32 q7, q7, q9
\n\t
"
"vpmax.f32 d8, d0, d1
\n\t
"
"vpmax.f32 d9, d2, d3
\n\t
"
"vpmax.f32 d10, d12, d13
\n\t
"
"vpmax.f32 d11, d14, d15
\n\t
"
"vst1.32 {q4, q5}, [%[out_ptr]]!
\n\t
"
"subs %[w1], %[w1], #1
\n\t
"
"bge loop_w1_%=
\n\t
"
"end_w1_%=:
\n\t
"
"subs %[w2], %[w2], #1
\n\t
"
"blt end_w2_%=
\n\t
"
"loop_w2_%=:
\n\t
"
"vld1.f32 {q0}, [%[in_ptr1]]!
\n\t
"
"vld1.f32 {q1}, [%[in_ptr2]]!
\n\t
"
"vmax.f32 q0, q0, q1
\n\t
"
"vpmax.f32 d4, d0, d1
\n\t
"
"vpmax.f32 d4, d0, d1
\n\t
"
"vpmax.f32 d5, d2, d3
\n\t
"
"vst1.32 {d4}, [%[out_ptr]]!
\n\t
"
"subs %[out_w_num], #1
\n\t
"
"vst1.32 {q2}, [%[out_ptr]]!
\n\t
"
"subs %[w2], %[w2], #1
\n\t
"
"bne max_loop
\n\t
"
"bge loop_w2_%=
\n\t
"
:
[
in_ptr1
]
"+r"
(
input_data
),
"end_w2_%=:
\n\t
"
[
in_ptr2
]
"+r"
(
input_data_chanel_row_next
),
[
out_ptr
]
"+r"
(
output_data
),
[
out_w_num
]
"+r"
(
out_w_num
)
:
:
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
);
:
[
w1
]
"r"
(
w1
),
[
w2
]
"r"
(
w2
),
[
in_ptr1
]
"r"
(
in_ptr1
),
}
[
in_ptr2
]
"r"
(
in_ptr2
),
[
out_ptr
]
"r"
(
out_ptr
)
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6"
,
"q7"
,
"q8"
,
"q9"
);
for
(;
remain
>
0
;
remain
--
)
{
if
(
_w2
!=
0
)
{
float
max_row1
=
std
::
max
(
input_data
[
0
],
input_data
[
1
]);
in_ptr1
+=
16
*
w1
+
4
*
w2
;
float
max_row2
=
std
::
max
(
input_data_chanel_row_next
[
0
],
in_ptr2
+=
16
*
w1
+
4
*
w2
;
input_data_chanel_row_next
[
1
]);
out_ptr
+=
8
*
w1
+
2
*
w2
;
*
output_data
=
std
::
max
(
max_row1
,
max_row2
);
if
(
_w2
==
1
)
{
input_data
+=
2
;
*
out_ptr
=
(
*
in_ptr1
>
*
in_ptr2
)
?
*
in_ptr1
:
*
in_ptr2
;
input_data_chanel_row_next
+=
2
;
}
else
if
(
_w2
==
2
)
{
output_data
++
;
float
temp
=
(
*
in_ptr1
++
>
*
in_ptr2
++
)
?
*
in_ptr1
++
:
*
in_ptr2
++
;
float
temp1
=
(
*
in_ptr1
>
*
in_ptr2
)
?
*
in_ptr1
:
*
in_ptr2
;
*
out_ptr
=
(
temp
>
temp1
)
?
temp
:
temp1
;
}
else
if
(
_w2
==
3
)
{
float
temp
=
(
*
in_ptr1
++
>
*
in_ptr2
++
)
?
*
in_ptr1
++
:
*
in_ptr2
++
;
float
temp1
=
(
*
in_ptr1
++
>
*
in_ptr2
++
)
?
*
in_ptr1
++
:
*
in_ptr2
++
;
*
out_ptr
++
=
(
temp
>
temp1
)
?
temp
:
temp1
;
*
out_ptr
=
(
*
in_ptr1
>
*
in_ptr2
)
?
*
in_ptr1
:
*
in_ptr2
;
}
}
}
}
}
input_data
+=
input_channel_stride
;
output_data
+=
output_channel_stride
;
}
}
input_data
+=
input_batch_stride
;
output_data
+=
output_batch_stride
;
}
}
#endif
#else
#endif
}
}
void
Pool2x2Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool2x2Avgs2p0
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
Tensor
*
output
)
{
const
Tensor
*
input
,
Tensor
*
output
)
{
#if __ARM_NEON
#if __aarch64__
#else
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_width
=
input
->
dims
()[
3
];
const
int
input_width
=
input
->
dims
()[
3
];
const
int
output_channels
=
output
->
dims
()[
1
];
const
int
output_channels
=
output
->
dims
()[
1
];
int
output_height
=
output
->
dims
()[
2
];
int
output_height
=
output
->
dims
()[
2
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
output_width
=
output
->
dims
()[
3
];
const
int
ksize_height
=
2
;
const
int
ksize_height
=
2
;
...
@@ -125,59 +157,114 @@ void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -125,59 +157,114 @@ void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
const
int
input_channel_stride
=
input_height
*
input_width
;
const
int
input_channel_stride
=
input_height
*
input_width
;
const
int
output_channel_stride
=
output_height
*
output_width
;
const
int
output_channel_stride
=
output_height
*
output_width
;
const
int
input_batch_stride
=
output_channels
*
input_channel_stride
;
const
int
output_batch_stride
=
output_channels
*
output_channel_stride
;
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
int
out_w_num
=
output_width
>>
2
;
int
w1
=
input_width
/
16
;
const
int
input_batch_stride
=
output_channels
*
input_channel_stride
;
int
_w1
=
input_width
%
16
;
const
int
output_batch_stride
=
output_channels
*
output_channel_stride
;
int
w2
=
_w1
/
4
;
float
vqua
[]
=
{
0.25
f
,
0.25
f
,
0.25
f
,
0.25
f
};
int
_w2
=
_w1
%
4
;
int
remain
=
output_width
-
out_w_num
<<
2
;
float
quarter
=
1
/
4
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
output_channels
;
++
c
)
{
const
float
*
input_data_chanel_row_next
=
input_data
+
input_width
;
for
(
int
ph
=
0
;
ph
<
input_height
;
ph
+=
2
)
{
for
(;
output_height
>
0
;
output_height
--
)
{
const
float
*
in_ptr1
=
input_data
+
i
*
input_batch_stride
+
if
(
out_w_num
>
0
)
{
c
*
input_channel_stride
+
ph
*
input_width
;
const
float
*
in_ptr2
=
in_ptr1
+
input_width
;
if
(
ph
+
1
>=
input_height
)
{
in_ptr2
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
input_width
));
memset
(
static_cast
<
void
*>
(
const_cast
<
float
*>
(
in_ptr2
)),
0
,
sizeof
(
float
)
*
input_width
);
}
float
*
out_ptr
=
output_data
+
i
*
output_batch_stride
+
c
*
output_channel_stride
+
ph
/
2
*
output_width
;
asm
volatile
(
asm
volatile
(
"avg_loop:
\n\t
"
"subs %[w1], %[w1], #1
\n\t
"
"vld1.32 {q0,q1}, [%[in_ptr1]]!
\n\t
"
"blt end_w1_%=
\n\t
"
"vld1.32 {q2,q3}, [%[in_ptr2]]!
\n\t
"
"loop_w1_%=:
\n\t
"
"vadd.f32 q0, q0, q2
\n\t
"
"pld [%[in_ptr1], #64]
\n\t
"
"pld [%[in_ptr2], #64]
\n\t
"
"vmov.f32 d0[0], %[quarter]
\n\t
"
"vld1.f32 {q1, q2}, [%[in_ptr1]]!
\n\t
"
"vld1.f32 {q3, q4}, [%[in_ptr2]]!
\n\t
"
"vld1.f32 {q7, q8}, [%[in_ptr1]]!
\n\t
"
"vld1.f32 {q9, q10}, [%[in_ptr2]]!
\n\t
"
"vadd.f32 q1, q1, q3
\n\t
"
"vadd.f32 q1, q1, q3
\n\t
"
"vpadd.f32 d4, d0, d1
\n\t
"
"vadd.f32 q2, q2, q4
\n\t
"
"vpadd.f32 d5, d2, d3
\n\t
"
"vld1.32 {q4}, [%[vqua]]!
\n\t
"
"vadd.f32 q7, q7, q9
\n\t
"
"vmul.f32 q2, q2, q4
\n\t
"
"vadd.f32 q8, q8, q10
\n\t
"
"subs %[out_w_num], #1
\n\t
"
"vst1.32 {q2}, [%[out_ptr]]!
\n\t
"
"bne avg_loop
\n\t
"
:
[
in_ptr1
]
"+r"
(
input_data
),
[
in_ptr2
]
"+r"
(
input_data_chanel_row_next
),
[
out_ptr
]
"+r"
(
output_data
),
[
out_w_num
]
"+r"
(
out_w_num
)
:
[
vqua
]
"r"
(
vqua
)
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
);
}
for
(;
remain
>
0
;
remain
--
)
{
"vpadd.f32 d10, d2, d3
\n\t
"
float
max_row1
=
std
::
max
(
input_data
[
0
],
input_data
[
1
]);
"vpadd.f32 d11, d4, d5
\n\t
"
float
max_row2
=
std
::
max
(
input_data_chanel_row_next
[
0
],
input_data_chanel_row_next
[
1
]);
"vpadd.f32 d12, d14, d15
\n\t
"
*
output_data
=
std
::
max
(
max_row1
,
max_row2
);
"vpadd.f32 d13, d16, d17
\n\t
"
input_data
+=
2
;
input_data_chanel_row_next
+=
2
;
"vmul.f32 q5, q5, d0[0]
\n\t
"
output_data
++
;
"vmul.f32 q6, q6, d0[0]
\n\t
"
"vst1.32 {q5, q6}, [%[out_ptr]]!
\n\t
"
"subs %[w1], %[w1], #1
\n\t
"
"bge loop_w1_%=
\n\t
"
"end_w1_%=:
\n\t
"
"subs %[w2], %[w2], #1
\n\t
"
"blt end_w2_%=
\n\t
"
"loop_w2_%=:
\n\t
"
"vld1.f32 {q1}, [%[in_ptr1]]!
\n\t
"
"vld1.f32 {q2}, [%[in_ptr2]]!
\n\t
"
"vadd.f32 q1, q1, q2
\n\t
"
"vpadd.f32 d4, d2, d3
\n\t
"
"vmul.f32 d4, d4, d0[0]
\n\t
"
"vst1.32 {d4}, [%[out_ptr]]!
\n\t
"
"subs %[w2], %[w2], #1
\n\t
"
"bge loop_w2_%=
\n\t
"
"end_w2_%=:
\n\t
"
:
:
[
w1
]
"r"
(
w1
),
[
w2
]
"r"
(
w2
),
[
in_ptr1
]
"r"
(
in_ptr1
),
[
in_ptr2
]
"r"
(
in_ptr2
),
[
out_ptr
]
"r"
(
out_ptr
),
[
quarter
]
"r"
(
quarter
)
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6"
,
"q7"
,
"q8"
,
"q9"
,
"q10"
);
if
(
_w2
!=
0
)
{
in_ptr1
+=
16
*
w1
+
4
*
w2
;
in_ptr2
+=
16
*
w1
+
4
*
w2
;
out_ptr
+=
8
*
w1
+
2
*
w2
;
if
(
_w2
==
1
)
{
*
out_ptr
=
0.5
*
(
*
in_ptr1
+
*
in_ptr2
);
}
else
if
(
_w2
==
2
)
{
float
temp
=
0
;
temp
+=
*
in_ptr1
++
;
temp
+=
*
in_ptr2
++
;
temp
+=
*
in_ptr1
;
temp
+=
*
in_ptr2
;
*
out_ptr
=
0.5
*
temp
;
}
else
if
(
_w2
==
3
)
{
float
temp
=
0
;
temp
+=
*
in_ptr1
++
;
temp
+=
*
in_ptr2
++
;
temp
+=
*
in_ptr1
++
;
temp
+=
*
in_ptr2
++
;
*
out_ptr
++
=
0.5
*
temp
;
*
out_ptr
=
0.5
*
(
*
in_ptr1
+
*
in_ptr2
);
}
}
}
}
}
input_data
+=
input_channel_stride
;
output_data
+=
output_channel_stride
;
}
}
input_data
+=
input_batch_stride
;
output_data
+=
output_batch_stride
;
}
}
#endif
#else
#endif
}
}
//}
//}
...
...
src/operators/math/pool_2x2.h
浏览文件 @
292e69a8
...
@@ -26,11 +26,11 @@ namespace math {
...
@@ -26,11 +26,11 @@ namespace math {
using
framework
::
Tensor
;
using
framework
::
Tensor
;
using
std
::
vector
;
using
std
::
vector
;
void
Pool2x2Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool2x2Max
s2p0
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
Tensor
*
output
);
const
Tensor
*
input
,
Tensor
*
output
);
void
Pool2x2Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
in_x
,
void
Pool2x2Avg
s2p0
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
Tensor
*
out
);
const
Tensor
*
in_x
,
Tensor
*
out
);
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
...
...
src/operators/math/pool_3x3.cpp
浏览文件 @
292e69a8
...
@@ -558,15 +558,13 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -558,15 +558,13 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
const
float
*
input_seg
=
input_data
+
c
*
input_channel_stride
;
const
float
*
input_seg
=
input_data
+
c
*
input_channel_stride
;
float
*
output_seg
=
output_data
+
c
*
output_channel_stride
;
float
*
output_seg
=
output_data
+
c
*
output_channel_stride
;
for
(
int
ph
=
0
;
ph
<
output_height
;
ph
++
)
{
for
(
int
ph
=
0
;
ph
<
output_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
output_width
;
pw
++
)
{
int
hstart
=
ph
*
stride
-
padding
;
int
hstart
=
ph
*
stride
-
padding
;
int
wstart
=
pw
*
stride
-
padding
;
int
hend
=
min
(
hstart
+
3
,
input_height
);
int
hend
=
min
(
hstart
+
3
,
input_height
+
padding
);
int
wend
=
min
(
wstart
+
3
,
input_width
+
padding
);
hstart
=
max
(
hstart
,
0
);
hstart
=
max
(
hstart
,
0
);
for
(
int
pw
=
0
;
pw
<
output_width
;
pw
++
)
{
int
wstart
=
pw
*
stride
-
padding
;
int
wend
=
min
(
wstart
+
3
,
input_width
);
wstart
=
max
(
wstart
,
0
);
wstart
=
max
(
wstart
,
0
);
hend
=
min
(
hend
,
input_height
);
wend
=
min
(
wend
,
input_width
);
const
float
*
pos1
=
input_seg
+
hstart
*
input_width
+
wstart
;
const
float
*
pos1
=
input_seg
+
hstart
*
input_width
+
wstart
;
const
float
*
pos2
=
input_seg
+
(
hstart
+
1
)
*
input_width
+
wstart
;
const
float
*
pos2
=
input_seg
+
(
hstart
+
1
)
*
input_width
+
wstart
;
const
float
*
pos3
=
input_seg
+
(
hstart
+
2
)
*
input_width
+
wstart
;
const
float
*
pos3
=
input_seg
+
(
hstart
+
2
)
*
input_width
+
wstart
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录