Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
9ad9635a
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9ad9635a
编写于
3月 16, 2019
作者:
xiebaiyuan
提交者:
GitHub
3月 16, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1504 from hjchen2/backup
Fix no effect if setting thread count in another thread
上级
88ede1e9
33de575e
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
220 addition
and
76 deletion
+220
-76
src/framework/context.h
src/framework/context.h
+38
-14
src/framework/executor.cpp
src/framework/executor.cpp
+19
-6
src/framework/executor.h
src/framework/executor.h
+2
-0
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+1
-3
src/operators/kernel/arm/density_prior_box_kernel.cpp
src/operators/kernel/arm/density_prior_box_kernel.cpp
+36
-0
src/operators/kernel/arm/sequence_pool_kernel.cpp
src/operators/kernel/arm/sequence_pool_kernel.cpp
+7
-7
src/operators/kernel/prior_box_kernel.h
src/operators/kernel/prior_box_kernel.h
+59
-11
src/operators/math/gemm/cblas.cc
src/operators/math/gemm/cblas.cc
+0
-1
src/operators/math/gemm/executor.h
src/operators/math/gemm/executor.h
+7
-7
src/operators/prior_box_op.cpp
src/operators/prior_box_op.cpp
+45
-5
src/operators/prior_box_op.h
src/operators/prior_box_op.h
+6
-22
未找到文件。
src/
operators/math/gemm/cpu_info
.h
→
src/
framework/context
.h
浏览文件 @
9ad9635a
...
@@ -14,42 +14,66 @@ limitations under the License. */
...
@@ -14,42 +14,66 @@ limitations under the License. */
#pragma once
#pragma once
#if _OPENMP
#include <omp.h>
#endif
#define MOBILE_MAX_CPU_NUM 8
#define MOBILE_MAX_CPU_NUM 8
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
framework
{
namespace
math
{
struct
CPU
Info
{
struct
CPU
Context
{
private:
private:
CPU
Info
(
)
{
CPU
Context
()
:
num_cpus
(
4
),
num_threads
(
1
)
{
// TODO(hjchen2)
// TODO(hjchen2)
num_cpus
=
4
;
for
(
int
i
=
0
;
i
<
num_cpus
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_cpus
;
++
i
)
{
cpu_frequenc
y
[
i
]
=
2400
;
// 2400 MHz
cpu_frequenc
ies
[
i
]
=
2400
;
// 2400 MHz
max_cpu_frequenc
y
[
i
]
=
2400
;
// 2400 MHz
max_cpu_frequenc
ies
[
i
]
=
2400
;
// 2400 MHz
}
}
// L1_cache = 32000; // 32K
// L1_cache = 32000; // 32K
L1_cache
=
32
*
1024
;
L1_cache
=
32
*
1024
;
L2_cache
=
2000000
;
// 2M
L2_cache
=
2000000
;
// 2M
// L2_cache = 512000;
// L2_cache = 512000;
}
}
virtual
~
CPUInfo
()
{}
public:
public:
static
CPUInfo
*
Info
()
{
void
set_num_threads
(
int
threads
)
{
static
CPUInfo
*
ctx
=
new
CPUInfo
;
#if _ONENMP
omp_set_num_threads
(
threads
);
if
(
threads
<=
omp_get_max_threads
())
{
num_threads
=
threads
;
}
else
{
num_threads
=
omp_get_max_threads
();
}
#endif
num_threads
=
(
num_threads
>
1
)
?
num_threads
:
1
;
}
virtual
~
CPUContext
()
{}
public:
static
CPUContext
*
Context
()
{
static
CPUContext
*
ctx
=
new
CPUContext
;
return
ctx
;
return
ctx
;
}
}
int
num_cpus
;
int
num_cpus
;
int
cpu_frequency
[
MOBILE_MAX_CPU_NUM
];
int
num_threads
;
int
max_cpu_frequency
[
MOBILE_MAX_CPU_NUM
];
int
cpu_frequencies
[
MOBILE_MAX_CPU_NUM
];
int
max_cpu_frequencies
[
MOBILE_MAX_CPU_NUM
];
int
L1_cache
;
int
L1_cache
;
int
L2_cache
;
int
L2_cache
;
};
};
}
// namespace math
inline
void
set_global_num_threads
(
int
threads
)
{
}
// namespace operators
CPUContext
::
Context
()
->
set_num_threads
(
threads
);
}
inline
int
get_global_num_threads
()
{
return
CPUContext
::
Context
()
->
num_threads
;
}
}
// namespace framework
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/framework/executor.cpp
浏览文件 @
9ad9635a
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "common/enforce.h"
#include "common/enforce.h"
#include "common/log.h"
#include "common/log.h"
#include "framework/context.h"
#include "framework/framework.pb-c.h"
#include "framework/framework.pb-c.h"
#include "framework/lod_tensor.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/operator.h"
...
@@ -37,6 +38,11 @@ namespace framework {
...
@@ -37,6 +38,11 @@ namespace framework {
#pragma mark - executor
#pragma mark - executor
template
<
typename
Device
,
typename
T
>
void
Executor
<
Device
,
T
>::
SetThreadNum
(
int
threads
)
{
set_global_num_threads
(
threads
);
}
template
<
typename
Device
,
typename
T
>
template
<
typename
Device
,
typename
T
>
Executor
<
Device
,
T
>::
Executor
(
const
Program
<
Device
>
&
program
,
Executor
<
Device
,
T
>::
Executor
(
const
Program
<
Device
>
&
program
,
paddle_mobile
::
PaddleMobileConfigInternal
config
,
paddle_mobile
::
PaddleMobileConfigInternal
config
,
...
@@ -444,6 +450,9 @@ std::shared_ptr<LoDTensor> Executor<Device, T>::GetOutput(
...
@@ -444,6 +450,9 @@ std::shared_ptr<LoDTensor> Executor<Device, T>::GetOutput(
template
<
typename
Device
,
typename
T
>
template
<
typename
Device
,
typename
T
>
PMStatus
Executor
<
Device
,
T
>::
Predict
()
{
PMStatus
Executor
<
Device
,
T
>::
Predict
()
{
#if _OPENMP
omp_set_num_threads
(
get_global_num_threads
());
#endif
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
std
::
vector
<
ProfInfo
>
profile
(
ops_of_block0_
.
size
());
std
::
vector
<
ProfInfo
>
profile
(
ops_of_block0_
.
size
());
struct
timespec
ts
;
struct
timespec
ts
;
...
@@ -654,14 +663,18 @@ void Executor<GPU_CL, float>::InitNoPersistableMemory(
...
@@ -654,14 +663,18 @@ void Executor<GPU_CL, float>::InitNoPersistableMemory(
output
->
Resize
(
input_tensor
.
dims
());
output
->
Resize
(
input_tensor
.
dims
());
output
->
mutable_data
<
float
>
();
output
->
mutable_data
<
float
>
();
}
}
template
<
>
template
<
>
void
Executor
<
GPU_CL
,
float
>::
SetInput
(
const
Tensor
&
input
,
void
Executor
<
GPU_CL
,
float
>::
SetInput
(
const
Tensor
&
input
,
const
std
::
string
&
var_name
)
{
const
std
::
string
&
var_name
)
{
auto
*
target_var
=
program_
.
scope
->
FindVar
(
var_name
);
int
index
=
0
;
PADDLE_MOBILE_ENFORCE
(
target_var
!=
nullptr
,
"Variable %s is not exist"
,
if
(
feed_indices_
.
find
(
var_name
)
!=
feed_indices_
.
end
())
{
var_name
.
c_str
());
index
=
feed_indices_
.
find
(
var_name
)
->
second
;
}
auto
*
feed_var
=
program_
.
scope
->
Var
(
"feed"
);
framework
::
LoDTensor
*
target_tensor
=
&
(
feed_var
->
template
GetMutable
<
framework
::
LoDTensorArray
>()
->
at
(
index
));
auto
*
target_tensor
=
target_var
->
template
GetMutable
<
LoDTensor
>();
DLOG
<<
"config_.load_when_predict "
<<
config_
.
load_when_predict
;
DLOG
<<
"config_.load_when_predict "
<<
config_
.
load_when_predict
;
DLOG
<<
"target_tensor->IsInitialized() "
<<
target_tensor
->
IsInitialized
();
DLOG
<<
"target_tensor->IsInitialized() "
<<
target_tensor
->
IsInitialized
();
DLOG
<<
"target_tensor->dims() "
<<
target_tensor
->
dims
();
DLOG
<<
"target_tensor->dims() "
<<
target_tensor
->
dims
();
...
@@ -772,7 +785,7 @@ void Executor<GPU_CL, float>::InitMemory() {
...
@@ -772,7 +785,7 @@ void Executor<GPU_CL, float>::InitMemory() {
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
CLImage
*
cl_image
=
nullptr
;
CLImage
*
cl_image
=
nullptr
;
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
var
->
template
GetMutable
<
LoDTensor
>();
var
->
template
GetMutable
<
framework
::
LoDTensorArray
>();
continue
;
continue
;
}
else
{
}
else
{
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
...
@@ -840,7 +853,7 @@ void Executor<GPU_CL, float>::InitCombineMemory() {
...
@@ -840,7 +853,7 @@ void Executor<GPU_CL, float>::InitCombineMemory() {
if
(
var_desc
->
Persistable
())
{
if
(
var_desc
->
Persistable
())
{
CLImage
*
cl_image
=
nullptr
;
CLImage
*
cl_image
=
nullptr
;
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
var
->
template
GetMutable
<
LoDTensor
>();
var
->
template
GetMutable
<
framework
::
LoDTensorArray
>();
continue
;
continue
;
}
else
{
}
else
{
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
cl_image
=
var
->
template
GetMutable
<
CLImage
>();
...
...
src/framework/executor.h
浏览文件 @
9ad9635a
...
@@ -36,6 +36,8 @@ class Executor {
...
@@ -36,6 +36,8 @@ class Executor {
paddle_mobile
::
PaddleMobileConfigInternal
config
,
int
batch_size
=
1
,
paddle_mobile
::
PaddleMobileConfigInternal
config
,
int
batch_size
=
1
,
const
bool
use_optimize
=
true
,
const
bool
lod_mode
=
false
);
const
bool
use_optimize
=
true
,
const
bool
lod_mode
=
false
);
void
SetThreadNum
(
int
threads
);
PMStatus
Predict
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
Tensor
>>
&
inputs
);
PMStatus
Predict
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
Tensor
>>
&
inputs
);
PMStatus
Predict
(
PMStatus
Predict
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
LoDTensor
>>
&
inputs
);
const
std
::
vector
<
std
::
pair
<
std
::
string
,
LoDTensor
>>
&
inputs
);
...
...
src/io/paddle_mobile.cpp
浏览文件 @
9ad9635a
...
@@ -28,9 +28,7 @@ namespace paddle_mobile {
...
@@ -28,9 +28,7 @@ namespace paddle_mobile {
template
<
typename
Device
,
typename
T
>
template
<
typename
Device
,
typename
T
>
void
PaddleMobile
<
Device
,
T
>::
SetThreadNum
(
int
num
)
{
void
PaddleMobile
<
Device
,
T
>::
SetThreadNum
(
int
num
)
{
#ifdef _OPENMP
executor_
->
SetThreadNum
(
num
);
omp_set_num_threads
(
num
);
#endif
}
}
template
<
typename
Device
,
typename
T
>
template
<
typename
Device
,
typename
T
>
...
...
src/operators/kernel/arm/density_prior_box_kernel.cpp
0 → 100644
浏览文件 @
9ad9635a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef DENSITY_PRIORBOX_OP
#include "operators/kernel/prior_box_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
DensityPriorBoxKernel
<
CPU
,
float
>::
Init
(
DensityPriorBoxParam
<
CPU
>
*
param
)
{
return
true
;
}
template
<
>
void
DensityPriorBoxKernel
<
CPU
,
float
>::
Compute
(
const
DensityPriorBoxParam
<
CPU
>
&
param
)
{
// TODO(hjchen2)
}
}
// namespace operators
}
// namespace paddle_mobile
#endif // DENSITY_PRIORBOX_OP
src/operators/kernel/arm/sequence_pool_kernel.cpp
浏览文件 @
9ad9635a
...
@@ -21,7 +21,7 @@ limitations under the License. */
...
@@ -21,7 +21,7 @@ limitations under the License. */
#include "common/types.h"
#include "common/types.h"
#include "operators/kernel/sequence_kernels.h"
#include "operators/kernel/sequence_kernels.h"
#include "operators/math/pooling.h"
#include "operators/math/pooling.h"
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
#include <arm_neon.h>
#include <arm_neon.h>
#endif // __ARM_NEON__
#endif // __ARM_NEON__
...
@@ -44,7 +44,7 @@ void SequencePoolImpl(const framework::LoDTensor &input,
...
@@ -44,7 +44,7 @@ void SequencePoolImpl(const framework::LoDTensor &input,
if
(
width
==
1
)
{
if
(
width
==
1
)
{
float
max
=
-
std
::
numeric_limits
<
float
>::
max
();
float
max
=
-
std
::
numeric_limits
<
float
>::
max
();
int
remain_h
=
height
;
int
remain_h
=
height
;
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
int
loop
=
remain_h
>>
2
;
int
loop
=
remain_h
>>
2
;
remain_h
=
remain_h
&
0x3
;
remain_h
=
remain_h
&
0x3
;
float32x4_t
__max4
=
math
::
vPoolInitq_f32
<
MAX
>
();
float32x4_t
__max4
=
math
::
vPoolInitq_f32
<
MAX
>
();
...
@@ -67,11 +67,11 @@ void SequencePoolImpl(const framework::LoDTensor &input,
...
@@ -67,11 +67,11 @@ void SequencePoolImpl(const framework::LoDTensor &input,
in_ptr
+=
width
;
in_ptr
+=
width
;
int
remain_h
=
height
-
1
;
int
remain_h
=
height
-
1
;
int
remain_w_start
=
0
;
int
remain_w_start
=
0
;
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
remain_w_start
=
width
&
0xfffc
;
remain_w_start
=
width
&
0xfffc
;
#endif // __ARM_NEON__
#endif // __ARM_NEON__
for
(
int
h
=
0
;
h
<
remain_h
;
++
h
)
{
for
(
int
h
=
0
;
h
<
remain_h
;
++
h
)
{
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
for
(
int
w
=
0
;
w
<
width
;
w
+=
4
)
{
for
(
int
w
=
0
;
w
<
width
;
w
+=
4
)
{
float32x4_t
__in
=
vld1q_f32
(
in_ptr
+
w
);
float32x4_t
__in
=
vld1q_f32
(
in_ptr
+
w
);
float32x4_t
__out
=
vld1q_f32
(
out_ptr
+
w
);
float32x4_t
__out
=
vld1q_f32
(
out_ptr
+
w
);
...
@@ -104,7 +104,7 @@ void SequencePoolImpl<SUM, float>(const framework::LoDTensor &input,
...
@@ -104,7 +104,7 @@ void SequencePoolImpl<SUM, float>(const framework::LoDTensor &input,
if
(
width
==
1
)
{
if
(
width
==
1
)
{
float
sum
=
0.
f
;
float
sum
=
0.
f
;
int
remain_h
=
height
;
int
remain_h
=
height
;
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
int
loop
=
remain_h
>>
2
;
int
loop
=
remain_h
>>
2
;
remain_h
=
remain_h
&
0x3
;
remain_h
=
remain_h
&
0x3
;
float32x4_t
__sum4
=
vdupq_n_f32
(
0.
f
);
float32x4_t
__sum4
=
vdupq_n_f32
(
0.
f
);
...
@@ -126,12 +126,12 @@ void SequencePoolImpl<SUM, float>(const framework::LoDTensor &input,
...
@@ -126,12 +126,12 @@ void SequencePoolImpl<SUM, float>(const framework::LoDTensor &input,
in_ptr
+=
width
;
in_ptr
+=
width
;
int
remain_h
=
height
-
1
;
int
remain_h
=
height
-
1
;
int
remain_w_start
=
0
;
int
remain_w_start
=
0
;
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
int
loop_w
=
width
>>
2
;
int
loop_w
=
width
>>
2
;
remain_w_start
=
width
&
0xfffc
;
remain_w_start
=
width
&
0xfffc
;
#endif // __ARM_NEON__
#endif // __ARM_NEON__
for
(
int
h
=
0
;
h
<
remain_h
;
++
h
)
{
for
(
int
h
=
0
;
h
<
remain_h
;
++
h
)
{
#if
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if
def __ARM_NEON__
for
(
int
w
=
0
;
w
<
width
-
3
;
w
+=
4
)
{
for
(
int
w
=
0
;
w
<
width
-
3
;
w
+=
4
)
{
float32x4_t
__in
=
vld1q_f32
(
in_ptr
+
w
);
float32x4_t
__in
=
vld1q_f32
(
in_ptr
+
w
);
float32x4_t
__out
=
vld1q_f32
(
out_ptr
+
w
);
float32x4_t
__out
=
vld1q_f32
(
out_ptr
+
w
);
...
...
src/operators/kernel/prior_box_kernel.h
浏览文件 @
9ad9635a
...
@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PRIORBOX_OP
#pragma once
#pragma once
#include <algorithm>
#include <algorithm>
...
@@ -26,9 +24,10 @@ limitations under the License. */
...
@@ -26,9 +24,10 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
inline
void
ExpandAspectRatios
(
const
std
::
vector
<
float
>&
input_aspect_ratior
,
#ifdef PRIORBOX_OP
inline
void
ExpandAspectRatios
(
const
std
::
vector
<
float
>
&
input_aspect_ratior
,
bool
flip
,
bool
flip
,
std
::
vector
<
float
>
*
output_aspect_ratior
)
{
std
::
vector
<
float
>
*
output_aspect_ratior
)
{
constexpr
float
epsilon
=
1e-6
;
constexpr
float
epsilon
=
1e-6
;
output_aspect_ratior
->
clear
();
output_aspect_ratior
->
clear
();
output_aspect_ratior
->
push_back
(
1.0
f
);
output_aspect_ratior
->
push_back
(
1.0
f
);
...
@@ -50,14 +49,63 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
...
@@ -50,14 +49,63 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
}
}
}
}
template
<
typename
DeviceType
,
typename
T
>
DECLARE_KERNEL
(
PriorBox
,
PriorBoxParam
);
class
PriorBoxKernel
#endif // PRIORBOX_OP
:
public
framework
::
OpKernelBase
<
DeviceType
,
PriorBoxParam
<
DeviceType
>>
{
#ifdef DENSITY_PRIORBOX_OP
template
<
typename
Dtype
>
class
DensityPriorBoxParam
:
public
OpParam
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
public:
DensityPriorBoxParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
*
scope
)
:
OpParam
(
inputs
,
outputs
,
attrs
,
scope
)
{
input_
=
InputFrom
<
GType
>
(
inputs
,
*
scope
);
input_image_
=
InputImageFrom
<
GType
>
(
inputs
,
*
scope
);
output_boxes_
=
OutputBoxesFrom
<
GType
>
(
outputs
,
*
scope
);
output_variances_
=
OutputVariancesFrom
<
GType
>
(
outputs
,
*
scope
);
variances_
=
GetAttr
<
vector
<
float
>>
(
"variances"
,
attrs
);
clip_
=
GetAttr
<
bool
>
(
"clip"
,
attrs
);
flatten_to_2d_
=
GetAttr
<
bool
>
(
"flatten_to_2d"
,
attrs
);
step_w_
=
GetAttr
<
float
>
(
"step_w"
,
attrs
);
step_h_
=
GetAttr
<
float
>
(
"step_h"
,
attrs
);
offset_
=
GetAttr
<
float
>
(
"offset"
,
attrs
);
fixed_sizes_
=
GetAttr
<
vector
<
float
>>
(
"fixed_sizes"
,
attrs
);
fixed_ratios_
=
GetAttr
<
vector
<
float
>>
(
"fixed_ratios"
,
attrs
);
densities_
=
GetAttr
<
vector
<
int
>>
(
"densities"
,
attrs
);
}
const
GType
*
Input
()
const
{
return
input_
;
}
const
GType
*
InputImage
()
const
{
return
input_image_
;
}
GType
*
OutputBoxes
()
const
{
return
output_boxes_
;
}
GType
*
OutputVariances
()
const
{
return
output_variances_
;
}
const
bool
Clip
()
const
{
return
clip_
;
}
const
bool
FlattenTo2d
()
const
{
return
flatten_to_2d_
;
}
const
float
StepW
()
const
{
return
step_w_
;
}
const
float
StepH
()
const
{
return
step_h_
;
}
const
float
Offset
()
const
{
return
offset_
;
}
const
vector
<
float
>
&
FixedSizes
()
const
{
return
fixed_sizes_
;
}
const
vector
<
float
>
&
FixedRatios
()
const
{
return
fixed_ratios_
;
}
const
vector
<
int
>
&
Densities
()
const
{
return
densities_
;
}
public:
public:
void
Compute
(
const
PriorBoxParam
<
DeviceType
>&
param
);
GType
*
input_
;
bool
Init
(
PriorBoxParam
<
DeviceType
>*
param
);
GType
*
input_image_
;
GType
*
output_boxes_
GType
*
output_variances_
;
bool
clip_
;
bool
flatten_to_2d_
;
float
step_w_
;
float
step_h_
;
float
offset_
;
vector
<
float
>
fixed_sizes_
;
vector
<
float
>
fixed_ratios_
;
vector
<
int
>
densities_
;
};
};
DECLARE_KERNEL
(
DensityPriorBox
,
DensityPriorBoxParam
);
#endif // DENSITY_PRIORBOX_OP
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
src/operators/math/gemm/cblas.cc
浏览文件 @
9ad9635a
...
@@ -17,7 +17,6 @@ limitations under the License. */
...
@@ -17,7 +17,6 @@ limitations under the License. */
#pragma once
#pragma once
#include "operators/math/gemm/cblas.h"
#include "operators/math/gemm/cblas.h"
#include "operators/math/gemm/cpu_info.h"
#include "operators/math/gemm/executor.h"
#include "operators/math/gemm/executor.h"
#include "operators/math/gemm/strategy.h"
#include "operators/math/gemm/strategy.h"
...
...
src/operators/math/gemm/executor.h
浏览文件 @
9ad9635a
...
@@ -19,17 +19,17 @@ limitations under the License. */
...
@@ -19,17 +19,17 @@ limitations under the License. */
#include <omp.h>
#include <omp.h>
#endif
#endif
// #include <sys/time.h>
// #include <sys/time.h>
//
#include <iostream>
#include <iostream>
#include "common/log.h"
#include "common/log.h"
#include "framework/context.h"
#include "memory/t_malloc.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm/cpu_info.h"
#include "operators/math/gemm/gemm_kernel.h"
#include "operators/math/gemm/gemm_kernel.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
static
CPUInfo
*
info
=
CPUInfo
::
Info
();
static
framework
::
CPUContext
*
g_cpu_ctx
=
framework
::
CPUContext
::
Context
();
int
CeilDiv
(
const
int
&
x
,
const
int
&
y
)
{
return
(
x
+
y
-
1
)
/
y
;
}
int
CeilDiv
(
const
int
&
x
,
const
int
&
y
)
{
return
(
x
+
y
-
1
)
/
y
;
}
unsigned
int
ResetL1Cache
(
const
unsigned
int
L1_size
,
const
int
thread_num
,
unsigned
int
ResetL1Cache
(
const
unsigned
int
L1_size
,
const
int
thread_num
,
...
@@ -70,11 +70,11 @@ class GemmExecutor : public Executor {
...
@@ -70,11 +70,11 @@ class GemmExecutor : public Executor {
unsigned
int
L1_size
=
0
;
unsigned
int
L1_size
=
0
;
unsigned
int
L2_size
=
0
;
unsigned
int
L2_size
=
0
;
if
(
M_
>
N_
)
{
if
(
M_
>
N_
)
{
L2_size
=
ResetL1Cache
(
info
->
L1_cache
,
num_threads_
,
M_
,
K_
);
L2_size
=
ResetL1Cache
(
g_cpu_ctx
->
L1_cache
,
num_threads_
,
M_
,
K_
);
L1_size
=
info
->
L2_cache
;
L1_size
=
g_cpu_ctx
->
L2_cache
;
}
else
{
}
else
{
L1_size
=
ResetL1Cache
(
info
->
L1_cache
,
num_threads_
,
N_
,
K_
);
L1_size
=
ResetL1Cache
(
g_cpu_ctx
->
L1_cache
,
num_threads_
,
N_
,
K_
);
L2_size
=
info
->
L2_cache
;
L2_size
=
g_cpu_ctx
->
L2_cache
;
}
}
rhs_tile_num_
=
L1_size
/
(
K_
*
sizeof
(
Itype
));
rhs_tile_num_
=
L1_size
/
(
K_
*
sizeof
(
Itype
));
...
...
src/operators/prior_box_op.cpp
浏览文件 @
9ad9635a
...
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PRIORBOX_OP
#include "operators/prior_box_op.h"
#include "operators/prior_box_op.h"
#include <vector>
#include <vector>
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
#ifdef PRIORBOX_OP
template
<
typename
Dtype
,
typename
T
>
template
<
typename
Dtype
,
typename
T
>
void
PriorBoxOp
<
Dtype
,
T
>::
InferShape
()
const
{
void
PriorBoxOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
input_dims
=
this
->
param_
.
Input
()
->
dims
();
auto
input_dims
=
this
->
param_
.
Input
()
->
dims
();
...
@@ -44,15 +44,55 @@ void PriorBoxOp<Dtype, T>::InferShape() const {
...
@@ -44,15 +44,55 @@ void PriorBoxOp<Dtype, T>::InferShape() const {
this
->
param_
.
OutputBoxes
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
this
->
param_
.
OutputBoxes
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
this
->
param_
.
OutputVariances
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
this
->
param_
.
OutputVariances
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
}
}
#endif // PRIORBOX_OP
#ifdef DENSITY_PRIORBOX_OP
template
<
typename
Dtype
,
typename
T
>
void
DensityPriorBoxOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
input_dims
=
this
->
param_
.
Input
()
->
dims
();
auto
input_image_dims
=
this
->
param_
.
InputImage
()
->
dims
();
auto
&
fixed_sizes
=
this
->
param_
.
FixedSizes
();
auto
&
fixed_ratios
=
this
->
param_
.
FixedRatios
();
auto
&
densities
=
this
->
param_
.
Densities
();
bool
flatten
=
this
->
param_
.
FlattenTo2d
();
size_t
num_priors
=
0
;
for
(
size_t
i
=
0
;
i
<
densities
.
size
();
++
i
)
{
num_priors
+=
(
fixed_ratios
.
size
())
*
(
pow
(
densities
[
i
],
2
));
}
if
(
!
flatten
)
{
std
::
vector
<
int64_t
>
dim_vec
(
4
);
dim_vec
[
0
]
=
input_dims
[
2
];
dim_vec
[
1
]
=
input_dims
[
3
];
dim_vec
[
2
]
=
num_priors
;
dim_vec
[
3
]
=
4
;
this
->
param_
.
OutputBoxes
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
this
->
param_
.
OutputVariances
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
}
else
{
int64_t
dim0
=
input_dims
[
2
]
*
input_dims
[
3
]
*
num_priors
;
this
->
param_
.
OutputBoxes
()
->
Resize
(
framework
::
make_ddim
({
dim0
,
4
}));
this
->
param_
.
OutputVariances
()
->
Resize
(
framework
::
make_ddim
({
dim0
,
4
}));
}
}
#endif // DENSITY_PRIORBOX_OP
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#ifdef PRIORBOX_OP
REGISTER_OPERATOR_CPU
(
prior_box
,
ops
::
PriorBoxOp
);
REGISTER_OPERATOR_CPU
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
#endif // PRIORBOX_OP
#ifdef DENSITY_PRIORBOX_OP
REGISTER_OPERATOR_CPU
(
density_prior_box
,
ops
::
DensityPriorBoxOp
);
#endif // DENSITY_PRIORBOX_OP
#endif // PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
#ifdef PRIORBOX_OP
REGISTER_OPERATOR_CL
(
prior_box
,
ops
::
PriorBoxOp
);
REGISTER_OPERATOR_CL
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
#endif
// PRIORBOX_OP
#endif
#endif
// PADDLE_MOBILE_CL
src/operators/prior_box_op.h
浏览文件 @
9ad9635a
...
@@ -12,12 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,12 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef PRIORBOX_OP
#pragma once
#pragma once
#include <string>
#include <string>
#include "framework/operator.h"
#include "framework/operator.h"
#include "operators/kernel/prior_box_kernel.h"
#include "operators/kernel/prior_box_kernel.h"
#include "operators/op_param.h"
#include "operators/op_param.h"
...
@@ -25,26 +22,13 @@ limitations under the License. */
...
@@ -25,26 +22,13 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
using
paddle_mobile
::
framework
::
Tensor
;
#ifdef PRIORBOX_OP
DECLARE_OPERATOR
(
PriorBox
,
PriorBoxParam
,
PriorBoxKernel
);
template
<
typename
DeviceType
,
typename
T
>
#endif
class
PriorBoxOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
PriorBoxParam
<
DeviceType
>
,
operators
::
PriorBoxKernel
<
DeviceType
,
T
>>
{
public:
PriorBoxOp
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
framework
::
Scope
*
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
PriorBoxParam
<
DeviceType
>
,
operators
::
PriorBoxKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
void
InferShape
()
const
override
;
protected:
#ifdef DENSITY_PRIORBOX_OP
};
DECLARE_OPERATOR
(
DensityPriorBox
,
DensityPriorBoxParam
,
DensityPriorBoxKernel
);
#endif
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录