Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a3e4c8de
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a3e4c8de
编写于
10月 15, 2018
作者:
R
Ray Liu
提交者:
GitHub
10月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1084 from codeWorm2015/opencl
Opencl
上级
306c1284
09545cd9
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
167 addition
and
281 deletion
+167
-281
CMakeLists.txt
CMakeLists.txt
+1
-1
src/framework/cl/cl_engine.h
src/framework/cl/cl_engine.h
+1
-1
src/framework/cl/cl_half.cpp
src/framework/cl/cl_half.cpp
+14
-2
src/framework/cl/cl_half.h
src/framework/cl/cl_half.h
+5
-0
src/framework/cl/cl_helper.h
src/framework/cl/cl_helper.h
+7
-1
src/framework/cl/cl_image.h
src/framework/cl/cl_image.h
+33
-24
src/framework/cl/cl_scope.h
src/framework/cl/cl_scope.h
+6
-2
src/framework/executor.cpp
src/framework/executor.cpp
+20
-8
src/framework/operator.cpp
src/framework/operator.cpp
+5
-1
src/operators/kernel/cl/cl_kernel/conv_kernel.cl
src/operators/kernel/cl/cl_kernel/conv_kernel.cl
+3
-1
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+11
-4
src/operators/kernel/cl/conv_add_kernel.cpp
src/operators/kernel/cl/conv_add_kernel.cpp
+3
-0
src/operators/kernel/cl/conv_kernel.cpp
src/operators/kernel/cl/conv_kernel.cpp
+38
-3
src/operators/kernel/cl/depthwise_conv_kernel.cpp
src/operators/kernel/cl/depthwise_conv_kernel.cpp
+1
-0
src/operators/op_param.h
src/operators/op_param.h
+1
-0
test/net/test_mobilenet_GPU.cpp
test/net/test_mobilenet_GPU.cpp
+18
-17
tools/web-exporter/CMakeLists.txt
tools/web-exporter/CMakeLists.txt
+0
-20
tools/web-exporter/export-nodejs.cpp
tools/web-exporter/export-nodejs.cpp
+0
-49
tools/web-exporter/export-scope.cpp
tools/web-exporter/export-scope.cpp
+0
-34
tools/web-exporter/export.cpp
tools/web-exporter/export.cpp
+0
-52
tools/web-exporter/export.h
tools/web-exporter/export.h
+0
-61
未找到文件。
CMakeLists.txt
浏览文件 @
a3e4c8de
...
...
@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
project
(
paddle-mobile
)
option
(
DEBUGING
"enable debug mode"
ON
)
option
(
USE_EXCEPTION
"use std exception"
O
FF
)
option
(
USE_EXCEPTION
"use std exception"
O
N
)
option
(
LOG_PROFILE
"log profile"
OFF
)
# select the platform to build
option
(
CPU
"armv7 with neon"
OFF
)
...
...
src/framework/cl/cl_engine.h
浏览文件 @
a3e4c8de
...
...
@@ -52,7 +52,7 @@ class CLEngine {
cl_context
context
,
std
::
string
file_name
)
{
FILE
*
file
=
fopen
(
file_name
.
c_str
(),
"rb"
);
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
filename
.
c_str
());
file
_
name
.
c_str
());
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"size is too small"
);
...
...
src/framework/cl/cl_half.cpp
浏览文件 @
a3e4c8de
...
...
@@ -488,7 +488,7 @@ static const uint8_t shifttable[512] = {
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
};
half_t
float2half
(
float
f
)
{
uint32_t
v
=
*
reinterpret_cast
<
uint32_t
*>
(
&
f
);
uint32_t
v
=
*
reinterpret_cast
<
uint32_t
*>
(
&
f
);
return
basetable
[(
v
>>
23
)
&
0x1ff
]
+
((
v
&
0x007fffff
)
>>
shifttable
[(
v
>>
23
)
&
0x1ff
]);
}
...
...
@@ -496,5 +496,17 @@ half_t float2half(float f) {
float
half2float
(
half_t
h
)
{
uint32_t
v
=
mantissatable
[
offsettable
[
h
>>
10
]
+
(
h
&
0x3ff
)]
+
exponenttable
[
h
>>
10
];
return
*
reinterpret_cast
<
float
*>
(
&
v
);
return
*
reinterpret_cast
<
float
*>
(
&
v
);
}
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
h_array
[
i
]
=
float2half
(
f_array
[
i
]);
}
}
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
f_array
[
i
]
=
float2half
(
h_array
[
i
]);
}
}
src/framework/cl/cl_half.h
浏览文件 @
a3e4c8de
...
...
@@ -18,4 +18,9 @@ limitations under the License. */
typedef
uint16_t
half_t
;
half_t
float2half
(
float
f
);
float
half2float
(
half_t
h
);
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
src/framework/cl/cl_helper.h
浏览文件 @
a3e4c8de
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <type_traits>
#include <vector>
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h"
...
...
@@ -32,11 +33,16 @@ class CLHelper {
explicit
CLHelper
(
CLScope
*
scope
)
:
scope_
(
scope
)
{}
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
)
{
DLOG
<<
" begin add kernel "
;
auto
kernel
=
scope_
->
GetKernel
(
kernel_name
,
file_name
);
DLOG
<<
" add kernel ing "
;
kernels
.
emplace_back
(
std
::
move
(
kernel
));
}
cl_kernel
KernelAt
(
const
int
index
)
{
return
kernels
[
index
].
get
();
}
cl_kernel
KernelAt
(
const
int
index
)
{
DLOG
<<
" kernel count: "
<<
kernels
.
size
();
return
kernels
[
index
].
get
();
}
cl_command_queue
CLCommandQueue
()
{
return
scope_
->
CommandQueue
();
}
...
...
src/framework/cl/cl_image.h
浏览文件 @
a3e4c8de
...
...
@@ -17,7 +17,9 @@ limitations under the License. */
#include <vector>
#include "CL/cl.h"
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_tool.h"
#include "framework/ddim.h"
#include "framework/tensor.h"
...
...
@@ -59,6 +61,7 @@ class CLImage {
PADDLE_MOBILE_THROW_EXCEPTION
(
" empty image tensor data shouldn't have value"
);
}
DLOG
<<
" init empty image "
;
InitCLImage
(
context
,
nullptr
,
dim
);
initialized_
=
true
;
}
...
...
@@ -98,7 +101,8 @@ class CLImage {
T
*
data
()
const
{
if
(
initialized_
)
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" cl image has initialized, tensor data has been deleted "
);
" cl image has initialized, tensor data has been deleted, can't use "
"tensor data"
);
}
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
}
...
...
@@ -115,6 +119,7 @@ class CLImage {
private:
void
InitCLImage
(
cl_context
context
,
float
*
tensor_data
,
const
DDim
&
dim
)
{
DLOG
<<
" tensor dim: "
<<
dim
;
cl_image_format
cf
=
{.
image_channel_order
=
CL_RGBA
,
.
image_channel_data_type
=
CL_HALF_FLOAT
};
// NCHW -> [W * (C+3)/4, H * N]
...
...
@@ -132,29 +137,23 @@ class CLImage {
tensor_data_
[
i
]
=
0
;
}
}
size_t
N
,
C
,
H
,
W
;
if
(
tensor_dims_
.
size
()
==
4
)
{
N
=
tensor_dims_
[
0
];
if
(
N
<
0
)
{
N
=
1
;
}
C
=
tensor_dims_
[
1
];
H
=
tensor_dims_
[
2
];
W
=
tensor_dims_
[
3
];
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
}
else
if
(
tensor_dims_
.
size
()
==
1
)
{
N
=
1
;
C
=
tensor_dims_
[
0
];
H
=
1
;
W
=
1
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
for
(
int
j
=
0
;
j
<
dim
.
size
();
++
j
)
{
new_dims
[
4
-
dim
.
size
()
+
j
]
=
dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
...
...
@@ -193,9 +192,12 @@ class CLImage {
}
}
cl_int
err
;
DLOG
<<
" image width: "
<<
width
;
DLOG
<<
" image height: "
<<
height
;
cl_image_
=
clCreateImage2D
(
context
,
// cl_context context
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
// cl_mem_flags flags
context
,
// cl_context context
CL_MEM_READ_WRITE
|
(
imageData
?
CL_MEM_COPY_HOST_PTR
:
0
),
// cl_mem_flags flags
&
cf
,
// const cl_image_format *image_format
width
,
// size_t image_width
height
,
// size_t image_height
...
...
@@ -205,6 +207,7 @@ class CLImage {
if
(
err
!=
CL_SUCCESS
)
{
// TODO(HaiPeng): error handling
CL_CHECK_ERRORS
(
err
);
PADDLE_MOBILE_THROW_EXCEPTION
(
" create image 2d error "
);
}
}
...
...
@@ -222,9 +225,15 @@ class CLImage {
cl_context
context_
;
};
void
TensorToCLImage
(
Tensor
*
tensor
,
CLImage
*
image
,
cl_command_queue
commandQueue
);
void
TensorToCLImage
(
Tensor
*
tensor
,
CLImage
*
image
,
cl_command_queue
commandQueue
);
void
CLImageToTensor
(
CLImage
*
image
,
Tensor
*
tensor
,
cl_command_queue
commandQueue
);
void
CLImageToTensor
(
CLImage
*
image
,
Tensor
*
tensor
,
cl_command_queue
commandQueue
);
#ifdef PADDLE_MOBILE_DEBUG
Print
&
operator
<<
(
Print
&
printer
,
const
CLImage
&
image
);
#endif
}
// namespace framework
}
// namespace paddle_mobile
src/framework/cl/cl_scope.h
浏览文件 @
a3e4c8de
...
...
@@ -40,8 +40,11 @@ class CLScope {
std
::
unique_ptr
<
_cl_kernel
,
CLKernelDeleter
>
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
)
{
auto
program
=
Program
(
file_name
);
DLOG
<<
" get program ~ "
;
std
::
unique_ptr
<
_cl_kernel
,
CLKernelDeleter
>
kernel
(
clCreateKernel
(
program
,
kernel_name
.
c_str
(),
NULL
));
clCreateKernel
(
program
,
kernel_name
.
c_str
(),
&
status_
));
CL_CHECK_ERRORS
(
status_
);
DLOG
<<
" create kernel ~ "
;
return
std
::
move
(
kernel
);
}
...
...
@@ -58,11 +61,12 @@ class CLScope {
status_
=
clBuildProgram
(
program
.
get
(),
0
,
0
,
"-cl-fast-relaxed-math"
,
0
,
0
);
CL_CHECK_ERRORS
(
status_
);
programs_
[
file_name
]
=
std
::
move
(
program
);
return
program
.
get
();
return
program
s_
[
file_name
]
.
get
();
}
private:
...
...
src/framework/executor.cpp
浏览文件 @
a3e4c8de
...
...
@@ -37,6 +37,8 @@ limitations under the License. */
#include "framework/cl/cl_image.h"
#endif
int
debug_to
=
2
;
namespace
paddle_mobile
{
namespace
framework
{
...
...
@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for
(
int
i
=
0
;
i
<
blocks
.
size
();
++
i
)
{
std
::
shared_ptr
<
framework
::
BlockDesc
>
block_desc
=
blocks
[
i
];
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
ops
=
block_desc
->
Ops
();
for
(
int
j
=
0
;
j
<
ops
.
size
()
;
++
j
)
{
for
(
int
j
=
0
;
j
<
debug_to
;
++
j
)
{
std
::
shared_ptr
<
framework
::
OpDesc
>
op
=
ops
[
j
];
DLOG
<<
"create op: "
<<
j
<<
" "
<<
op
->
Type
();
auto
op_base
=
framework
::
OpRegistry
<
Dtype
>::
CreateOp
(
...
...
@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
}
}
#else
for
(
int
i
=
0
;
i
<
ops
.
size
()
;
i
++
)
{
for
(
int
i
=
0
;
i
<
debug_to
;
i
++
)
{
#ifdef PADDLE_MOBILE_PROFILE
struct
timespec
ts
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
...
...
@@ -428,6 +430,11 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
#endif
}
#endif
DLOG
<<
" predict return nullptr"
;
return
nullptr
;
auto
last_op
=
ops
.
rbegin
();
auto
output_map
=
(
*
last_op
)
->
Outputs
();
std
::
vector
<
std
::
string
>
out_keys
=
(
*
last_op
)
->
GetOutKeys
();
...
...
@@ -647,13 +654,18 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const
std
::
vector
<
Ptype
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
)
{
framework
::
Tensor
tensor
(
input
,
framework
::
make_ddim
(
dims
));
std
::
shared_ptr
<
framework
::
Tensor
>
output_tensor
=
Predict
(
tensor
,
0
);
Executor
<
Dtype
,
P
>::
Ptype
*
output_ptr
=
output_tensor
->
data
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
();
std
::
vector
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
result_vector
;
for
(
int
j
=
0
;
j
<
output_tensor
->
numel
();
++
j
)
{
result_vector
.
push_back
(
output_ptr
[
j
]);
if
(
output_tensor
!=
nullptr
)
{
Executor
<
Dtype
,
P
>::
Ptype
*
output_ptr
=
output_tensor
->
data
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
();
std
::
vector
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
result_vector
;
for
(
int
j
=
0
;
j
<
output_tensor
->
numel
();
++
j
)
{
result_vector
.
push_back
(
output_ptr
[
j
]);
}
return
result_vector
;
}
else
{
DLOG
<<
"return empty vector"
;
return
{};
}
return
result_vector
;
}
#ifdef PADDLE_MOBILE_FPGA
...
...
src/framework/operator.cpp
浏览文件 @
a3e4c8de
...
...
@@ -57,7 +57,10 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template
<
typename
Dtype
>
void
OperatorBase
<
Dtype
>::
Run
()
{
DLOG
<<
" begin run "
<<
type_
;
RunImpl
();
DLOG
<<
" end run "
<<
type_
;
return
;
#ifdef PADDLE_MOBILE_DEBUG
DLOG
<<
"-------------"
<<
type_
<<
"----------------------------"
;
vector
<
string
>
input_keys
=
GetInputKeys
();
...
...
@@ -100,8 +103,9 @@ void OperatorBase<Dtype>::Run() {
#ifdef PADDLE_MOBILE_CL
if
(
type_
==
"fetch"
)
{
Tensor
*
tensor
=
vari
->
template
GetMutable
<
framework
::
LoDTensor
>();
if
(
tensor
)
if
(
tensor
)
{
DLOG
<<
type_
<<
" output- "
<<
key
<<
"="
<<
tensor
->
dims
();
}
}
else
{
CLImage
*
cl_image
=
vari
->
template
GetMutable
<
framework
::
CLImage
>();
// cl_command_queue commandQueue =
...
...
src/operators/kernel/cl/cl_kernel/conv_kernel.cl
浏览文件 @
a3e4c8de
...
...
@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
include
"conv_kernel.inc.cl"
//#include
"conv_kernel.inc.cl"
__kernel
void
conv_3x3
()
{}
\ No newline at end of file
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
浏览文件 @
a3e4c8de
...
...
@@ -24,9 +24,16 @@ namespace operators {
template
<
>
bool
ConvAddBNReluKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvAddBNReluParam
<
GPU_CL
>
*
param
)
{
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
param
->
Bias
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
// const CL *mean = param->InputMean();
const
framework
::
CLImage
*
mean
=
param
->
InputMean
();
const
framework
::
CLImage
*
variance
=
param
->
InputVariance
();
const
framework
::
CLImage
*
scale
=
param
->
InputScale
();
const
framework
::
CLImage
*
bias
=
param
->
InputBias
();
...
...
@@ -52,9 +59,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
new_scale
->
SetTensorData
(
new_scale_ptr
,
variance
->
dims
());
...
...
@@ -68,6 +72,9 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
...
...
src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
a3e4c8de
...
...
@@ -25,6 +25,9 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
param
->
Bias
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
...
...
src/operators/kernel/cl/conv_kernel.cpp
浏览文件 @
a3e4c8de
...
...
@@ -26,18 +26,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
DLOG
<<
" init helper: "
<<
&
cl_helper_
;
DLOG
<<
" conv kernel add kernel ~ "
;
DLOG
<<
" width of one block: "
<<
param
->
Filter
()
->
WidthOfOneBlock
();
DLOG
<<
" height of one block: "
<<
param
->
Filter
()
->
HeightOfOneBlock
();
DLOG
<<
" filter dims: "
<<
param
->
Filter
()
->
dims
();
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
DLOG
<<
" here1 "
;
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
DLOG
<<
" here2 "
;
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
DLOG
<<
" here3 "
;
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_kernel.cl"
);
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
...
...
@@ -47,14 +61,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template
<
>
void
ConvKernel
<
GPU_CL
,
float
>::
Compute
(
const
ConvParam
<
GPU_CL
>
&
param
)
{
DLOG
<<
" Compute helper: "
<<
&
cl_helper_
;
DLOG
<<
" begin compute "
;
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
DLOG
<<
" get work size "
;
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
DLOG
<<
" end work size "
;
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
DLOG
<<
" get Input "
;
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
DLOG
<<
" get Filter "
;
auto
output
=
param
.
Output
();
DLOG
<<
" get Output "
;
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
input_c
=
param
.
Input
()
->
CBlock
();
...
...
@@ -64,6 +91,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> ¶m) {
cl_int
status
;
DLOG
<<
" begin set kernel arg "
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
...
...
@@ -77,12 +106,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> ¶m) {
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
DLOG
<<
" end set kernel arg "
;
CL_CHECK_ERRORS
(
status
);
DLOG
<<
" begin enqueue "
;
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
DLOG
<<
" end enqueue "
;
CL_CHECK_ERRORS
(
status
);
}
...
...
src/operators/kernel/cl/depthwise_conv_kernel.cpp
浏览文件 @
a3e4c8de
...
...
@@ -27,6 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
...
...
src/operators/op_param.h
浏览文件 @
a3e4c8de
...
...
@@ -948,6 +948,7 @@ class FetchParam : public OpParam {
input_x_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
out_
=
OutFrom
(
outputs
,
scope
);
}
const
RType
*
InputX
()
const
{
return
input_x_
;
}
Tensor
*
Out
()
const
{
return
out_
;
}
...
...
test/net/test_mobilenet_GPU.cpp
浏览文件 @
a3e4c8de
...
...
@@ -34,23 +34,24 @@ int main() {
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
std
::
vector
<
float
>::
iterator
biggest
=
std
::
max_element
(
std
::
begin
(
vec_result
),
std
::
end
(
vec_result
));
std
::
cout
<<
" Max element is "
<<
*
biggest
<<
" at position "
<<
std
::
distance
(
std
::
begin
(
vec_result
),
biggest
)
<<
std
::
endl
;
// 预热十次
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time3
=
paddle_mobile
::
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
DLOG
<<
vec_result
;
auto
time4
=
paddle_mobile
::
time
();
std
::
cout
<<
"predict cost :"
<<
paddle_mobile
::
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
<<
std
::
endl
;
// std::vector<float>::iterator biggest =
// std::max_element(std::begin(vec_result), std::end(vec_result));
// std::cout << " Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result), biggest) <<
// std::endl;
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// auto time3 = paddle_mobile::time();
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// DLOG << vec_result;
// auto time4 = paddle_mobile::time();
// std::cout << "predict cost :" << paddle_mobile::time_diff(time3,
// time4) / 10 << "ms"
// << std::endl;
}
std
::
cout
<<
"如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
...
...
tools/web-exporter/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
306c1284
cmake_minimum_required
(
VERSION 3.6
)
project
(
web-exporter
)
set
(
CMAKE_CXX_STANDARD 11
)
file
(
GLOB PADDLE_MOBILE_CPP_FILES
"../../src/common/*.c"
"../../src/common/*.cpp"
"../../src/memory/*.cpp"
"../../src/framework/*.c"
"../../src/framework/*.cpp"
"../../src/framework/program/*.cpp"
"../../src/framework/program/program-optimize/*.cpp"
)
file
(
GLOB EXPORT_CPP_FILES
"*.cpp"
)
add_executable
(
web-exporter
${
PADDLE_MOBILE_CPP_FILES
}
${
EXPORT_CPP_FILES
}
)
target_include_directories
(
web-exporter PRIVATE
"../../src"
)
target_link_libraries
(
web-exporter
)
\ No newline at end of file
tools/web-exporter/export-nodejs.cpp
已删除
100644 → 0
浏览文件 @
306c1284
#include "export.h"
inline
std
::
string
indent
(
int
i
)
{
return
std
::
string
(
i
,
' '
);
}
void
export_nodejs
(
ProgramPtr
program
,
ScopePtr
scope
,
std
::
ostream
&
os
)
{
os
<<
"module.exports.program = {
\n
"
;
os
<<
indent
(
2
)
<<
var2str
(
"blocks"
)
<<
": [
\n
"
;
for
(
const
auto
&
block
:
program
->
Blocks
())
{
os
<<
indent
(
4
)
<<
"{
\n
"
;
os
<<
indent
(
6
)
<<
var2str
(
"vars"
)
<<
": {
\n
"
;
for
(
const
auto
&
var
:
block
->
Vars
())
{
const
auto
&
dim
=
var
->
Tensor_desc
().
Dims
();
os
<<
indent
(
8
)
<<
var2str
(
var
->
Name
())
<<
": {
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"dim"
)
<<
": "
<<
var2str
(
dim
)
<<
",
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"persistable"
)
<<
": "
<<
var2str
(
var
->
Persistable
())
<<
"
\n
"
;
os
<<
indent
(
8
)
<<
"},
\n
"
;
}
os
<<
indent
(
6
)
<<
"},
\n
"
;
os
<<
indent
(
6
)
<<
var2str
(
"ops"
)
<<
": [
\n
"
;
for
(
const
auto
&
op
:
block
->
Ops
())
{
os
<<
indent
(
8
)
<<
"{
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"type"
)
<<
": "
<<
var2str
(
op
->
Type
())
<<
",
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"inputs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetInputs
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
<<
var2str
(
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"outputs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetInputs
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
<<
var2str
(
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"attrs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetAttrMap
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
;
os
<<
decltype
(
kv
.
second
)
::
ApplyVistor
(
VarVisitor
(),
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
8
)
<<
"},
\n
"
;
}
os
<<
indent
(
6
)
<<
"],
\n
"
;
os
<<
indent
(
4
)
<<
"},
\n
"
;
}
os
<<
indent
(
2
)
<<
"]
\n
"
;
os
<<
"}
\n
"
;
}
tools/web-exporter/export-scope.cpp
已删除
100644 → 0
浏览文件 @
306c1284
#include <cstdio>
#include "export.h"
void
export_scope
(
ProgramPtr
program
,
ScopePtr
scope
,
const
std
::
string
&
dirname
)
{
for
(
const
auto
&
block
:
program
->
Blocks
())
{
for
(
const
auto
&
var
:
block
->
Vars
())
{
if
(
var
->
Name
()
==
"feed"
||
var
->
Name
()
==
"fetch"
)
{
continue
;
}
if
(
var
->
Persistable
())
{
auto
*
v
=
scope
->
FindVar
(
var
->
Name
());
assert
(
v
!=
nullptr
);
int
count
=
1
;
for
(
auto
n
:
var
->
Tensor_desc
().
Dims
())
{
count
*=
n
;
}
auto
*
tensor
=
v
->
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>
();
const
float
*
p
=
tensor
->
mutable_data
<
float
>
();
std
::
string
para_file_name
=
dirname
+
'/'
+
var
->
Name
();
FILE
*
para_file
=
fopen
(
para_file_name
.
c_str
(),
"w"
);
assert
(
p
!=
nullptr
);
fwrite
(
p
,
sizeof
(
float
),
count
,
para_file
);
fclose
(
para_file
);
// std::cout << "==> " << var->Name() << " " << count << "\n";
// for (int i = 0; i < count; i++) {
// std::cout << p[i] << ", ";
// }
// std::cout << "\n";
}
}
}
}
tools/web-exporter/export.cpp
已删除
100644 → 0
浏览文件 @
306c1284
#include "export.h"
#include <sys/stat.h>
#include <sys/types.h>
class
FakeExecutor
:
public
paddle_mobile
::
framework
::
Executor
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
{
public:
FakeExecutor
(
const
paddle_mobile
::
framework
::
Program
<
paddle_mobile
::
CPU
>
p
)
{
program_
=
p
;
batch_size_
=
1
;
use_optimize_
=
true
;
loddable_
=
false
;
if
(
use_optimize_
)
{
to_predict_program_
=
program_
.
optimizeProgram
;
}
else
{
to_predict_program_
=
program_
.
originProgram
;
}
auto
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
variable_ptr
[
0
].
SetValue
<
int
>
(
1
);
if
(
program_
.
combined
)
{
InitCombineMemory
();
}
else
{
InitMemory
();
}
}
};
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
3
)
{
std
::
cout
<<
"Usage: "
<<
argv
[
0
]
<<
" <combined-modle-dir> <output-dir>
\n
"
;
return
-
1
;
}
std
::
string
model_dir
=
argv
[
1
];
std
::
string
model_path
=
model_dir
+
"/model"
;
std
::
string
para_path
=
model_dir
+
"/params"
;
std
::
string
out_dir
=
argv
[
2
];
std
::
string
out_model_js
=
out_dir
+
"/model.js"
;
std
::
string
out_para_dir
=
out_dir
+
"/paras"
;
mkdir
(
out_dir
.
c_str
(),
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
mkdir
(
out_para_dir
.
c_str
(),
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
std
::
cout
<<
"loading "
<<
model_path
<<
" & "
<<
para_path
<<
"
\n
"
;
paddle_mobile
::
framework
::
Loader
<>
loader
;
auto
program
=
loader
.
Load
(
model_path
,
para_path
,
true
);
FakeExecutor
executor
(
program
);
auto
optimizedProgram
=
program
.
optimizeProgram
;
export_scope
(
optimizedProgram
,
program
.
scope
,
out_para_dir
);
std
::
ofstream
fs
(
out_model_js
.
c_str
());
export_nodejs
(
optimizedProgram
,
program
.
scope
,
fs
);
fs
.
close
();
return
0
;
}
tools/web-exporter/export.h
已删除
100644 → 0
浏览文件 @
306c1284
#pragma once
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include <ostream>
#include <fstream>
#include "framework/loader.h"
#include "framework/executor.h"
#include "framework/scope.h"
#include "framework/program/program_desc.h"
// using paddle_mobile::framework::ProgramDesc;
// using paddle_mobile::framework::Scope;
using
ProgramPtr
=
std
::
shared_ptr
<
paddle_mobile
::
framework
::
ProgramDesc
>
;
using
ScopePtr
=
std
::
shared_ptr
<
paddle_mobile
::
framework
::
Scope
>
;
void
export_nodejs
(
ProgramPtr
program
,
ScopePtr
scope
,
std
::
ostream
&
os
=
std
::
cout
);
void
export_scope
(
ProgramPtr
program
,
ScopePtr
scope
,
const
std
::
string
&
dirname
=
"."
);
template
<
typename
T
>
inline
std
::
string
var2str
(
const
T
&
v
)
{
return
std
::
to_string
(
v
);
}
template
<
>
inline
std
::
string
var2str
(
const
std
::
string
&
v
)
{
return
"
\"
"
+
v
+
"
\"
"
;
}
inline
std
::
string
var2str
(
const
char
*
v
)
{
return
var2str
<
std
::
string
>
(
v
);
}
inline
std
::
string
var2str
(
const
bool
v
)
{
return
v
?
"true"
:
"false"
;
}
template
<
typename
T
>
std
::
string
var2str
(
const
std
::
vector
<
T
>
&
v
)
{
std
::
string
r
=
"["
;
auto
s
=
v
.
size
();
for
(
int
i
=
0
;
i
<
s
;
i
++
)
{
if
(
i
)
r
+=
", "
;
r
+=
var2str
(
v
[
i
]);
}
return
r
+
"]"
;
}
struct
VarVisitor
{
using
type_t
=
decltype
(
var2str
(
0
));
template
<
typename
T
>
type_t
operator
()(
const
T
&
v
)
{
return
var2str
(
v
);
}
};
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录