Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
6ab45d2d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6ab45d2d
编写于
10月 15, 2018
作者:
Y
yangfei
浏览文件
操作
浏览文件
下载
差异文件
add some function
上级
7fbea0e2
df230944
变更
29
隐藏空白更改
内联
并排
Showing
29 changed file
with
861 addition
and
139 deletion
+861
-139
src/common/common.h
src/common/common.h
+4
-0
src/framework/cl/cl_engine.h
src/framework/cl/cl_engine.h
+1
-1
src/framework/cl/cl_tool.h
src/framework/cl/cl_tool.h
+7
-6
src/framework/executor.cpp
src/framework/executor.cpp
+17
-7
src/operators/kernel/cl/batchnorm_kernel.cpp
src/operators/kernel/cl/batchnorm_kernel.cpp
+56
-1
src/operators/kernel/cl/cl_kernel/batchnorm_kernel.cl
src/operators/kernel/cl/cl_kernel/batchnorm_kernel.cl
+24
-0
src/operators/kernel/cl/cl_kernel/fetch_kernel.cl
src/operators/kernel/cl/cl_kernel/fetch_kernel.cl
+27
-0
src/operators/kernel/cl/cl_kernel/pool_kernel.cl
src/operators/kernel/cl/cl_kernel/pool_kernel.cl
+75
-0
src/operators/kernel/cl/cl_kernel/relu.cl
src/operators/kernel/cl/cl_kernel/relu.cl
+25
-0
src/operators/kernel/cl/cl_kernel/reshape.cl
src/operators/kernel/cl/cl_kernel/reshape.cl
+49
-0
src/operators/kernel/cl/cl_kernel/softmax.cl
src/operators/kernel/cl/cl_kernel/softmax.cl
+41
-0
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+31
-24
src/operators/kernel/cl/conv_add_kernel.cpp
src/operators/kernel/cl/conv_add_kernel.cpp
+25
-18
src/operators/kernel/cl/conv_kernel.cpp
src/operators/kernel/cl/conv_kernel.cpp
+57
-51
src/operators/kernel/cl/depthwise_conv_kernel.cpp
src/operators/kernel/cl/depthwise_conv_kernel.cpp
+24
-17
src/operators/kernel/cl/feed_kernel.cpp
src/operators/kernel/cl/feed_kernel.cpp
+14
-0
src/operators/kernel/cl/fetch_kernel.cpp
src/operators/kernel/cl/fetch_kernel.cpp
+35
-1
src/operators/kernel/cl/pool_kernel.cpp
src/operators/kernel/cl/pool_kernel.cpp
+41
-1
src/operators/kernel/cl/relu_kernel.cpp
src/operators/kernel/cl/relu_kernel.cpp
+17
-2
src/operators/kernel/cl/reshape_kernel.cpp
src/operators/kernel/cl/reshape_kernel.cpp
+26
-1
src/operators/kernel/cl/softmax_kernel.cpp
src/operators/kernel/cl/softmax_kernel.cpp
+20
-1
src/operators/op_param.h
src/operators/op_param.h
+21
-0
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+2
-2
test/net/test_mobilenet_GPU.cpp
test/net/test_mobilenet_GPU.cpp
+6
-6
tools/web-exporter/CMakeLists.txt
tools/web-exporter/CMakeLists.txt
+20
-0
tools/web-exporter/export-nodejs.cpp
tools/web-exporter/export-nodejs.cpp
+49
-0
tools/web-exporter/export-scope.cpp
tools/web-exporter/export-scope.cpp
+34
-0
tools/web-exporter/export.cpp
tools/web-exporter/export.cpp
+52
-0
tools/web-exporter/export.h
tools/web-exporter/export.h
+61
-0
未找到文件。
src/common/common.h
浏览文件 @
6ab45d2d
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once
#include <chrono>
namespace
paddle_mobile
{
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
inline
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
}
...
...
@@ -25,3 +27,5 @@ inline double time_diff(Time t1, Time t2) {
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
}
src/framework/cl/cl_engine.h
浏览文件 @
6ab45d2d
...
...
@@ -18,8 +18,8 @@ limitations under the License. */
#include <string>
#include "CL/cl.h"
#include "common/log.h"
#include "common/enforce.h"
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_tool.h"
...
...
src/framework/cl/cl_tool.h
浏览文件 @
6ab45d2d
...
...
@@ -21,12 +21,13 @@ namespace framework {
const
char
*
opencl_error_to_str
(
cl_int
error
);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
paddle_mobile::framework::opencl_error_to_str(ERR), __FILE__, \
__LINE__); \
}
}
// namespace framework
...
...
src/framework/executor.cpp
浏览文件 @
6ab45d2d
...
...
@@ -908,10 +908,14 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
auto
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>()
;
CLImage
*
cl_image
=
nullptr
;
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
var
->
template
GetMutable
<
framework
::
LoDTensor
>();
continue
;
}
else
{
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
}
char
*
origin_data
=
Get_binary_data
(
program_
.
model_path
+
"/"
+
var_desc
->
Name
());
char
*
data
=
origin_data
;
...
...
@@ -928,7 +932,8 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
framework
::
DDim
ddim
=
framework
::
make_ddim
(
desc
.
Dims
());
cl_image
->
Init
(
context
,
tensorInput
,
ddim
);
// has not init
cl_image
->
SetTensorData
(
tensorInput
,
ddim
);
delete
origin_data
;
// paddle_mobile::memory::Free(tensorInput);
...
...
@@ -941,7 +946,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
// framework::DDim ddim = framework::make_ddim(desc.Dims());
framework
::
DDim
ddim
=
cl_image
->
dims
();
DLOG
<<
var_desc
->
Name
();
cl_image
->
Init
(
context
,
ddim
);
cl_image
->
Init
EmptyImage
(
context
,
ddim
);
}
}
}
...
...
@@ -965,9 +970,12 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program_
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
auto
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>()
;
CLImage
*
cl_image
=
nullptr
;
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
var
->
template
GetMutable
<
framework
::
LoDTensor
>();
continue
;
}
else
{
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
}
cl_context
context
=
program_
.
scope
->
GetCLScpoe
()
->
Context
();
...
...
@@ -982,7 +990,10 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
float
*
tensorInput
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
numel
));
LoadMemory
(
*
var_desc
,
tensorInput
,
&
origin_data
);
cl_image
->
Init
(
context
,
tensorInput
,
ddim
);
// has not init
cl_image
->
SetTensorData
(
tensorInput
,
ddim
);
paddle_mobile
::
memory
::
Free
(
tensorInput
);
}
else
{
auto
cl_image
=
var
->
template
GetMutable
<
framework
::
CLImage
>();
...
...
@@ -991,8 +1002,7 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
const
framework
::
TensorDesc
&
desc
=
var_desc
->
Tensor_desc
();
framework
::
DDim
ddim
=
cl_image
->
dims
();
// framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image
->
Init
(
context
,
ddim
);
cl_image
->
InitEmptyImage
(
context
,
ddim
);
}
}
}
...
...
src/operators/kernel/cl/batchnorm_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -21,12 +21,67 @@ namespace operators {
template
<
>
bool
BatchNormKernel
<
GPU_CL
,
float
>::
Init
(
BatchNormParam
<
GPU_CL
>
*
param
)
{
this
->
cl_helper_
.
AddKernel
(
"batchnorm"
,
"batchnorm_kernel.cl"
);
const
framework
::
CLImage
*
mean
=
param
->
InputMean
();
const
framework
::
CLImage
*
variance
=
param
->
InputVariance
();
const
framework
::
CLImage
*
scale
=
param
->
InputScale
();
const
framework
::
CLImage
*
bias
=
param
->
InputBias
();
const
float
epsilon
=
param
->
Epsilon
();
auto
mean_ptr
=
mean
->
data
<
float
>
();
auto
variance_ptr
=
variance
->
data
<
float
>
();
auto
scale_ptr
=
scale
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
const
int
C
=
mean
->
numel
();
float
inv_std_ptr
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
float
*
new_scale_ptr
=
new
float
[
C
];
float
*
new_bias_ptr
=
new
float
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
new_scale_ptr
[
i
]
=
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
framework
::
CLImage
*
new_bias
=
new
framework
::
CLImage
();
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
return
true
;
}
template
<
>
void
BatchNormKernel
<
GPU_CL
,
float
>::
Compute
(
const
BatchNormParam
<
GPU_CL
>
&
param
)
{}
const
BatchNormParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
OutputY
());
auto
input
=
param
.
InputX
()
->
GetCLImage
();
auto
out
=
param
.
OutputY
()
->
GetCLImage
();
auto
new_scale
=
param
.
NewScale
()
->
GetCLImage
();
auto
new_bias
=
param
.
NewBias
()
->
GetCLImage
();
const
int
out_height
=
param
.
OutputY
()
->
HeightOfOneBlock
();
const
int
out_width
=
param
.
OutputY
()
->
WidthOfOneBlock
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
out_height
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
out_width
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
new_scale
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
new_bias
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
out
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
}
template
class
BatchNormKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/cl_kernel/batchnorm_kernel.cl
0 → 100644
浏览文件 @
6ab45d2d
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
batchnorm
(
__private
const
int
out_height,
__private
const
int
out_width,
__read_only
image2d_t
input,
__read_only
image2d_t
new_scale,
__read_only
image2d_t
new_bias,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
half4
new_scale
=
read_imageh
(
bn_scale,
sampler,
(
int2
)(
out_c,
0
))
;
half4
new_bias
=
read_imageh
(
bn_bias,
sampler,
(
int2
)(
out_c,
0
))
;
int
pos_x
=
mad24
(
out_c,
out_width,
out_w
)
;
half4
in
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_x,
out_nh
))
;
half4
out
=
mad
(
in,
new_scale,
new_bias
)
;
write_imageh
(
output,
(
int2
)(
pos_x,
nh
)
,
out
)
;
}
src/operators/kernel/cl/cl_kernel/fetch_kernel.cl
0 → 100644
浏览文件 @
6ab45d2d
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
fetch
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
size_ch,
__private
const
int
size_block,
__private
const
int
size_batch,
__read_only
image2d_t
input,
__global
float*
out
)
{
const
int
in_c
=
get_global_id
(
0
)
;
const
int
in_w
=
get_global_id
(
1
)
;
const
int
in_nh
=
get_global_id
(
2
)
;
const
int
in_n
=
in_nh
/
in_height
;
const
int
in_h
=
in_nh
%
in_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
const
int
pos_x
=
mad24
(
in_c,
in_width,
in_w
)
;
half4
in
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_x,
in_nh
))
;
const
int
index
=
in_n
*
size_batch
+
in_c
*
size_block
+
in_h
*
in_width
+
in_w
;
out[index]
=
convert_float
(
in.x
)
;
out[index
+
size_ch]
=
convert_float
(
in.y
)
;
out[index
+
size_ch
*
2]
=
convert_float
(
in.z
)
;
out[index
+
size_ch
*
3]
=
convert_float
(
in.w
)
;
}
src/operators/kernel/cl/cl_kernel/pool_kernel.cl
0 → 100644
浏览文件 @
6ab45d2d
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
MIN_VALUE
-FLT_MAX
__kernel
void
pool_max
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
max
(
out_h
*
stride_h
-
pad_top,
0
)
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
int
start_w
=
max
(
out_w
*
stride_w
-
pad_left,
0
)
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
max_value
=
(
half4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
half4
tmp
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
}
__kernel
void
pool_avg
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
max
(
out_h
*
stride_h
-
pad_top,
0
)
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
int
start_w
=
max
(
out_w
*
stride_w
-
pad_left,
0
)
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
sum
=
(
half4
)(
0.0f
)
;
int
num
=
0
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
num++
;
}
}
half4
avg
=
sum
/
num
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
\ No newline at end of file
src/operators/kernel/cl/cl_kernel/relu.cl
0 → 100644
浏览文件 @
6ab45d2d
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
relu
(
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
|
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
half4
r
=
read_imageh
(
input,
sampler,
int2
(
x,
y
))
;
r
=
max
(
half4
(
0
,
0
,
0
,
0
)
,
r
)
;
write_imageh
(
output,
int2
(
x,
y
)
,
r
)
;
}
\ No newline at end of file
src/operators/kernel/cl/cl_kernel/reshape.cl
0 → 100644
浏览文件 @
6ab45d2d
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
reshape
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
__private
const
int
d0,
__private
const
int
d1,
__private
const
int
d2,
__private
const
int
d3,
__private
const
int
x0,
__private
const
int
x1,
__private
const
int
x2,
__private
const
int
x3
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
int
obx
=
x
/
x3
;
int
oby
=
y
/
x2
;
int
ox
=
x
%
x3
;
int
oy
=
y
%
x2
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
|
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
half4
r
;
for
(
int
i
=
0
; i < 4; i++) {
int
t
=
obx
*
4
+
i
;
if
(
t
>
x1
)
break
;
int
oindex
=
oby
*
x1
*
x2
*
x3
+
t
*
x2
*
x3
+
ox
*
x3
+
oy
;
int
i0,
i1,
i2,
i3
;
int
i3
=
oindex
%
d3
; oindex /= d3;
int
i2
=
oindex
%
d2
; oindex /= d2;
int
i1
=
oindex
%
d1
; oindex /= d1;
int
i0
=
oindex
;
int
ix
=
(
i1
/
4
)
*
d3
+
i3
;
int
iy
=
i0
*
d2
+
i2
;
r[i]
=
read_imageh
(
input,
sampler,
int2
(
ix,
iy
))
[i1%4]
;
}
write_imageh
(
output,
int2
(
x,
y
)
,
r
)
;
}
\ No newline at end of file
src/operators/kernel/cl/cl_kernel/softmax.cl
0 → 100644
浏览文件 @
6ab45d2d
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
softmax
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
__private
const
int
d0,
__private
const
int
d1,
__private
const
int
d2,
__private
const
int
d3
)
{
const
int
z
=
get_global_id
(
0
)
;
const
int
x
=
get_global_id
(
1
)
;
const
int
y
=
get_global_id
(
2
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
|
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
half4
maxv
=
read_imageh
(
input,
sampler,
int2
(
z
*
d3,
y
))
;
half4
buf[d3]
=
{piece}
;
for
(
int
i
=
1
; i < d3; i++) {
buf[i]
=
read_imageh
(
input,
sampler,
int2
(
z
*
d3
+
i,
y
))
;
maxv
=
max
(
maxv,
buf[i]
)
;
}
float4
sum
=
0
;
for
(
int
i
=
0
; i < d3; i++) {
buf[i]
=
exp
(
buf[i]
-
maxv
)
;
sum
+=
buf[i]
;
}
half4
r
=
buf[x]
/
sum
;
write_imageh
(
output,
int2
(
z
*
d3
+
x,
y
)
,
r
)
;
}
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_tool.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -56,15 +57,15 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
new_scale
->
Init
(
this
->
cl_helper_
.
CLContext
(),
new_scale_ptr
,
variance
->
dims
());
new_scale
->
SetTensorData
(
new_scale_ptr
,
variance
->
dims
());
new_scale
->
InitCLImage
(
this
->
cl_helper_
.
CLContext
());
framework
::
CLImage
*
new_bias
=
new
framework
::
CLImage
();
new_bias
->
Init
(
this
->
cl_helper_
.
CLContext
(),
new_bias_ptr
,
variance
->
dims
());
new_bias
->
SetTensorData
(
new_bias_ptr
,
variance
->
dims
());
new_bias
->
InitCLImage
(
this
->
cl_helper_
.
CLContext
());
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
PADDLE_MOBILE_ENFORCE
(
...
...
@@ -115,26 +116,32 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
int
output_width
=
param
.
Output
()
->
WidthOfOneBlock
();
int
output_height
=
param
.
Output
()
->
HeightOfOneBlock
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_scale
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
new_bias
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
dilation
);
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_width
);
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
input_height
);
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_width
);
clSetKernelArg
(
kernel
,
16
,
sizeof
(
int
),
&
output_height
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_scale
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
new_bias
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
dilation
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
input_height
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_width
);
status
=
clSetKernelArg
(
kernel
,
16
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
template
class
ConvAddBNReluKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -65,24 +65,31 @@ void ConvAddKernel<GPU_CL, float>::Compute(
int
output_width
=
param
.
Output
()
->
WidthOfOneBlock
();
int
output_height
=
param
.
Output
()
->
HeightOfOneBlock
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
output
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
stride
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
offset
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
input_c
);
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
dilation
);
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_width
);
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_height
);
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_width
);
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_height
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
output
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
stride
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
offset
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
input_c
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
dilation
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_height
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_width
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
template
class
ConvAddKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/conv_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -21,63 +21,69 @@ namespace operators {
template
<
>
bool
ConvKernel
<
GPU_CL
,
float
>::
Init
(
ConvParam
<
GPU_CL
>
*
param
)
{
//
PADDLE_MOBILE_ENFORCE(
//
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
//
param->Paddings()[0] == param->Paddings()[1],
//
"need equal");
// int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
// static_cast<int>(param->Paddings()[1]);
// param->SetOffset(offset
);
//
// if (param->Filter()->WidthOfOneBlock() == 1 &&
// param->Filter()->HeightOfOneBlock() == 1) {
// this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
// } else if (param->Filter()->dims()[1] == 1) {
// this->cl_helper_.AddKernel("depth_conv_3x3",
//
"conv_add_bn_relu_kernel.cl");
//
} else if (param->Filter()->WidthOfOneBlock() == 3 &&
//
param->Filter()->HeightOfOneBlock() == 3) {
//
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
//
} else {
//
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
//
}
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]
);
param
->
SetOffset
(
offset
);
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
return
true
;
}
template
<
>
void
ConvKernel
<
GPU_CL
,
float
>::
Compute
(
const
ConvParam
<
GPU_CL
>
&
param
)
{
// auto kernel = this->cl_helper_.KernelAt(0);
// auto default_work_size =
// this->cl_helper_.DefaultWorkSize(*param.Output()); int c_block =
// default_work_size[0]; int w = default_work_size[1]; int nh =
// default_work_size[2]; auto input = param.Input()->GetCLImage(); auto
// filter = param.Filter()->GetCLImage(); auto output = param.Output(); int
// stride = param.Strides()[0]; int offset = param.Offset(); int input_c =
// param.Input()->CBlock(); int dilation = param.Dilations()[0]; int
// input_width = param.Input()->WidthOfOneBlock(); int input_height =
// param.Input()->HeightOfOneBlock();
//
// clSetKernelArg(kernel, 0, sizeof(int), &c_block);
// clSetKernelArg(kernel, 1, sizeof(int), &w);
// clSetKernelArg(kernel, 2, sizeof(int), &nh);
// clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
// clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
// clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
// clSetKernelArg(kernel, 6, sizeof(int), &stride);
// clSetKernelArg(kernel, 7, sizeof(int), &offset);
// clSetKernelArg(kernel, 8, sizeof(int), &input_c);
// clSetKernelArg(kernel, 9, sizeof(int), &dilation);
// clSetKernelArg(kernel, 10, sizeof(int), &input_width);
// clSetKernelArg(kernel, 11, sizeof(int), &input_height);
//
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
// default_work_size.data(), NULL, 0, NULL, NULL);
// auto kernel = this->cl_helper_.KernelAt(0);
// size_t global_work_size[3] = {1, 2, 3};
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
// global_work_size, NULL, 0, NULL, NULL);
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
auto
output
=
param
.
Output
();
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
input_c
=
param
.
Input
()
->
CBlock
();
int
dilation
=
param
.
Dilations
()[
0
];
int
input_width
=
param
.
Input
()
->
WidthOfOneBlock
();
int
input_height
=
param
.
Input
()
->
HeightOfOneBlock
();
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
output
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
stride
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
offset
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
input_c
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
dilation
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
template
class
ConvKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/depthwise_conv_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -55,23 +55,30 @@ void DepthwiseConvKernel<GPU_CL, float>::Compute(
int
output_width
=
param
.
Output
()
->
WidthOfOneBlock
();
int
output_height
=
param
.
Output
()
->
HeightOfOneBlock
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
output
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
stride
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
offset
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
input_c
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
dilation
);
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
output_width
);
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_height
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
output
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
stride
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
offset
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
input_c
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
dilation
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
output_width
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
template
class
DepthwiseConvKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/feed_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -27,6 +27,7 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
template
<
>
void
FeedKernel
<
GPU_CL
,
float
>::
Compute
(
const
FeedParam
<
GPU_CL
>
&
param
)
{
<<<<<<<
HEAD
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
cl_int
status
;
auto
output
=
param
.
Out
();
...
...
@@ -38,6 +39,19 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
int
height
=
output
->
dims
()[
2
];
int
width
=
output
->
dims
()[
3
];
DLOG
<<
output
->
dims
();
=======
DLOG
<<
"feed_kernel"
;
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
cl_int
status
;
auto
output
=
param
.
Out
();
auto
input
=
param
.
InputX
();
DLOG
<<
" input: "
<<
input
;
const
float
*
input_data
=
input
->
data
<
float
>
();
cl_mem
cl_image
=
output
->
GetCLImage
();
int
height
=
output
->
dims
()[
2
];
int
width
=
output
->
dims
()[
3
];
>>>>>>>
df230944d11f0f09aea4c2c6bc0489d8667fa8ca
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
&
input_data
);
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
&
cl_image
);
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
&
width
);
...
...
src/operators/kernel/cl/fetch_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -19,11 +19,45 @@ namespace operators {
template
<
>
bool
FetchKernel
<
GPU_CL
,
float
>::
Init
(
FetchParam
<
GPU_CL
>
*
param
)
{
this
->
cl_helper_
.
AddKernel
(
"fetch"
,
"fetch_kernel.cl"
);
return
true
;
}
template
<
>
void
FetchKernel
<
GPU_CL
,
float
>::
Compute
(
const
FetchParam
<
GPU_CL
>
&
param
)
{}
void
FetchKernel
<
GPU_CL
,
float
>::
Compute
(
const
FetchParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
InputX
());
auto
input
=
param
.
InputX
()
->
GetCLImage
();
auto
*
out
=
param
.
Out
();
const
auto
&
dims
=
param
.
InputX
()
->
dims
();
const
int
N
=
dims
[
0
];
const
int
C
=
dims
[
1
];
const
int
in_height
=
dims
[
2
];
const
int
in_width
=
dims
[
3
];
int
size_ch
=
in_height
*
in_width
;
int
size_block
=
size_ch
*
4
;
int
size_batch
=
size_ch
*
C
;
// need create outputBuffer
cl_image_format
imageFormat
;
imageFormat
.
image_channel_order
=
CL_RGBA
;
imageFormat
.
image_channel_data_type
=
CL_FLOAT
;
cl_mem
outputBuffer
;
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
in_height
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
in_width
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
size_ch
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
int
),
&
size_block
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
int
),
&
size_batch
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
outputBuffer
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
}
template
class
FetchKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/pool_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -21,11 +21,51 @@ namespace operators {
template
<
>
bool
PoolKernel
<
GPU_CL
,
float
>::
Init
(
PoolParam
<
GPU_CL
>
*
param
)
{
std
::
string
pooling_type
=
param
->
PoolingType
();
this
->
cl_helper_
.
AddKernel
(
"pool_"
+
pooling_type
,
"pool_kernel.cl"
);
return
true
;
}
template
<
>
void
PoolKernel
<
GPU_CL
,
float
>::
Compute
(
const
PoolParam
<
GPU_CL
>
&
param
)
{}
void
PoolKernel
<
GPU_CL
,
float
>::
Compute
(
const
PoolParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
out
=
param
.
Output
()
->
GetCLImage
();
const
int
in_height
=
param
.
Input
()
->
HeightOfOneBlock
();
const
int
in_width
=
param
.
Input
()
->
WidthOfOneBlock
();
const
int
out_height
=
param
.
Output
()
->
HeightOfOneBlock
();
const
int
out_width
=
param
.
Output
()
->
WidthOfOneBlock
();
std
::
string
pooling_type
=
param
.
PoolingType
();
std
::
vector
<
int
>
ksize
=
param
.
Ksize
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
const
int
pad_top
=
paddings
[
0
];
const
int
pad_left
=
paddings
[
1
];
const
int
stride_h
=
strides
[
0
];
const
int
stride_w
=
strides
[
1
];
const
int
ksize_h
=
ksize
[
0
];
const
int
ksize_w
=
ksize
[
1
];
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_int
),
&
in_height
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_int
),
&
in_width
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_int
),
&
out_height
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_int
),
&
out_width
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
&
pad_top
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
&
pad_left
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
&
stride_h
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
&
stride_w
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
&
ksize_h
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_int
),
&
ksize_w
);
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_mem
),
&
out
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
}
template
class
PoolKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/relu_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RELU_OP
#include "operators/kernel/relu_kernel.h"
...
...
@@ -18,14 +19,28 @@ namespace paddle_mobile {
namespace
operators
{
template
<
>
bool
ReluKernel
<
GPU_CL
,
float
>::
Init
(
ReluParam
<
GPU_CL
>
*
param
)
{
bool
ReluKernel
<
GPU_CL
,
float
>::
Init
(
ReluParam
<
GPU_CL
>*
param
)
{
this
->
cl_helper_
.
AddKernel
(
"relu"
,
"relu.cl"
);
return
true
;
}
template
<
>
void
ReluKernel
<
GPU_CL
,
float
>::
Compute
(
const
ReluParam
<
GPU_CL
>
&
param
)
{}
void
ReluKernel
<
GPU_CL
,
float
>::
Compute
(
const
ReluParam
<
GPU_CL
>&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
const
auto
*
input
=
param
.
InputX
();
auto
*
output
=
param
.
Out
();
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
output
);
auto
inputImage
=
input
->
GetCLImage
();
auto
outputImage
=
output
->
GetCLImage
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
&
inputImage
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
&
outputImage
);
const
size_t
work_size
[
2
]
=
{
input
->
ImageWidth
(),
input
->
ImageHeight
()};
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
work_size
,
NULL
,
0
,
NULL
,
NULL
);
}
template
class
ReluKernel
<
GPU_CL
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/cl/reshape_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -19,11 +19,36 @@ namespace operators {
template
<
>
bool
ReshapeKernel
<
GPU_CL
,
float
>::
Init
(
ReshapeParam
<
GPU_CL
>
*
param
)
{
this
->
cl_helper_
.
AddKernel
(
"reshape"
,
"reshape.cl"
);
return
true
;
}
template
<
>
void
ReshapeKernel
<
GPU_CL
,
float
>::
Compute
(
const
ReshapeParam
<
GPU_CL
>
&
param
)
{}
void
ReshapeKernel
<
GPU_CL
,
float
>::
Compute
(
const
ReshapeParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
const
auto
*
input
=
param
.
InputX
();
auto
*
output
=
param
.
Out
();
auto
inputImage
=
input
->
GetCLImage
();
auto
outputImage
=
output
->
GetCLImage
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
&
inputImage
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
&
outputImage
);
const
auto
&
inputDim
=
input
->
dims
();
const
auto
&
outputDim
=
output
->
dims
();
int
dims
[
4
]
=
{
inputDim
[
0
],
inputDim
[
1
],
inputDim
[
2
],
inputDim
[
3
]};
int
odims
[
4
]
=
{
outputDim
[
0
],
outputDim
[
1
],
outputDim
[
2
],
outputDim
[
3
]};
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
dims
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
int
),
dims
+
1
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
int
),
dims
+
2
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
int
),
dims
+
3
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
odims
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
odims
+
1
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
odims
+
2
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
odims
+
3
);
const
size_t
work_size
[
2
]
=
{
output
->
ImageWidth
(),
output
->
ImageHeight
()};
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
2
,
NULL
,
work_size
,
NULL
,
0
,
NULL
,
NULL
);
}
template
class
ReshapeKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/softmax_kernel.cpp
浏览文件 @
6ab45d2d
...
...
@@ -21,11 +21,30 @@ namespace operators {
template
<
>
bool
SoftmaxKernel
<
GPU_CL
,
float
>::
Init
(
SoftmaxParam
<
GPU_CL
>
*
param
)
{
this
->
cl_helper_
.
AddKernel
(
"softmax"
,
"softmax.cl"
);
return
true
;
}
template
<
>
void
SoftmaxKernel
<
GPU_CL
,
float
>::
Compute
(
const
SoftmaxParam
<
GPU_CL
>
&
param
)
{}
void
SoftmaxKernel
<
GPU_CL
,
float
>::
Compute
(
const
SoftmaxParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
(
param
.
Out
()));
const
auto
*
input
=
param
.
InputX
();
auto
*
output
=
param
.
Out
();
auto
inputImage
=
input
->
GetCLImage
();
auto
outputImage
=
output
->
GetCLImage
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
&
inputImage
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
&
outputImage
);
const
auto
&
inputDim
=
input
->
dims
();
int
dims
[
4
]
=
{
inputDim
[
0
],
inputDim
[
1
],
inputDim
[
2
],
inputDim
[
3
]};
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
dims
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
int
),
dims
+
1
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
int
),
dims
+
2
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
int
),
dims
+
3
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
}
template
class
SoftmaxKernel
<
GPU_CL
,
float
>;
...
...
src/operators/op_param.h
浏览文件 @
6ab45d2d
...
...
@@ -614,6 +614,14 @@ class BatchNormParam : OpParam {
const
string
&
DataFormat
()
const
{
return
data_format_
;
}
void
SetNewScale
(
RType
*
new_scale
)
{
new_scale_
=
new_scale
;
}
void
SetNewBias
(
RType
*
new_bias
)
{
new_bias_
=
new_bias
;
}
const
RType
*
NewScale
()
const
{
return
new_scale_
;
}
const
RType
*
NewBias
()
const
{
return
new_bias_
;
}
private:
RType
*
input_x_
;
RType
*
output_y_
;
...
...
@@ -625,6 +633,8 @@ class BatchNormParam : OpParam {
float
momentum_
;
bool
is_test_
;
string
data_format_
;
RType
*
new_bias_
;
RType
*
new_scale_
;
};
#endif
...
...
@@ -936,10 +946,21 @@ class FetchParam : public OpParam {
FetchParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_x_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
<<<<<<<
HEAD
out_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
}
const
RType
*
InputX
()
const
{
return
input_x_
;
}
Tensor
*
Out
()
const
{
return
out_
;
}
=======
out_
=
OutFrom
(
outputs
,
scope
);
}
const
RType
*
InputX
()
const
{
return
input_x_
;
}
Tensor
*
Out
()
const
{
return
out_
;
}
static
Tensor
*
OutFrom
(
const
VariableNameMap
&
outputs
,
const
Scope
&
scope
)
{
return
GetVarValue
<
Tensor
>
(
"Out"
,
outputs
,
scope
);
}
>>>>>>>
df230944d11f0f09aea4c2c6bc0489d8667fa8ca
private:
RType
*
input_x_
;
...
...
test/net/test_googlenet.cpp
浏览文件 @
6ab45d2d
...
...
@@ -29,8 +29,8 @@ int main() {
bool
optimize
=
true
;
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
auto
time2
=
paddle_mobile
::
time
();
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224
,
&
input
,
dims
);
...
...
test/net/test_mobilenet_GPU.cpp
浏览文件 @
6ab45d2d
...
...
@@ -19,14 +19,14 @@ limitations under the License. */
int
main
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
GPU_CL
>
paddle_mobile
;
// paddle_mobile.SetThreadNum(4);
auto
time1
=
time
();
auto
time1
=
paddle_mobile
::
time
();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto
isok
=
paddle_mobile
.
Load
(
g_mobilenet
,
false
);
if
(
isok
)
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
auto
time2
=
paddle_mobile
::
time
();
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
...
...
@@ -42,13 +42,13 @@ int main() {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time3
=
time
();
auto
time3
=
paddle_mobile
::
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
DLOG
<<
vec_result
;
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
auto
time4
=
paddle_mobile
::
time
();
std
::
cout
<<
"predict cost :"
<<
paddle_mobile
::
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
<<
std
::
endl
;
}
...
...
tools/web-exporter/CMakeLists.txt
0 → 100644
浏览文件 @
6ab45d2d
cmake_minimum_required
(
VERSION 3.6
)
project
(
web-exporter
)
set
(
CMAKE_CXX_STANDARD 11
)
file
(
GLOB PADDLE_MOBILE_CPP_FILES
"../../src/common/*.c"
"../../src/common/*.cpp"
"../../src/memory/*.cpp"
"../../src/framework/*.c"
"../../src/framework/*.cpp"
"../../src/framework/program/*.cpp"
"../../src/framework/program/program-optimize/*.cpp"
)
file
(
GLOB EXPORT_CPP_FILES
"*.cpp"
)
add_executable
(
web-exporter
${
PADDLE_MOBILE_CPP_FILES
}
${
EXPORT_CPP_FILES
}
)
target_include_directories
(
web-exporter PRIVATE
"../../src"
)
target_link_libraries
(
web-exporter
)
\ No newline at end of file
tools/web-exporter/export-nodejs.cpp
0 → 100644
浏览文件 @
6ab45d2d
#include "export.h"
inline
std
::
string
indent
(
int
i
)
{
return
std
::
string
(
i
,
' '
);
}
void
export_nodejs
(
ProgramPtr
program
,
ScopePtr
scope
,
std
::
ostream
&
os
)
{
os
<<
"module.exports.program = {
\n
"
;
os
<<
indent
(
2
)
<<
var2str
(
"blocks"
)
<<
": [
\n
"
;
for
(
const
auto
&
block
:
program
->
Blocks
())
{
os
<<
indent
(
4
)
<<
"{
\n
"
;
os
<<
indent
(
6
)
<<
var2str
(
"vars"
)
<<
": {
\n
"
;
for
(
const
auto
&
var
:
block
->
Vars
())
{
const
auto
&
dim
=
var
->
Tensor_desc
().
Dims
();
os
<<
indent
(
8
)
<<
var2str
(
var
->
Name
())
<<
": {
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"dim"
)
<<
": "
<<
var2str
(
dim
)
<<
",
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"persistable"
)
<<
": "
<<
var2str
(
var
->
Persistable
())
<<
"
\n
"
;
os
<<
indent
(
8
)
<<
"},
\n
"
;
}
os
<<
indent
(
6
)
<<
"},
\n
"
;
os
<<
indent
(
6
)
<<
var2str
(
"ops"
)
<<
": [
\n
"
;
for
(
const
auto
&
op
:
block
->
Ops
())
{
os
<<
indent
(
8
)
<<
"{
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"type"
)
<<
": "
<<
var2str
(
op
->
Type
())
<<
",
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"inputs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetInputs
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
<<
var2str
(
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"outputs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetInputs
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
<<
var2str
(
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"attrs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetAttrMap
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
;
os
<<
decltype
(
kv
.
second
)
::
ApplyVistor
(
VarVisitor
(),
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
8
)
<<
"},
\n
"
;
}
os
<<
indent
(
6
)
<<
"],
\n
"
;
os
<<
indent
(
4
)
<<
"},
\n
"
;
}
os
<<
indent
(
2
)
<<
"]
\n
"
;
os
<<
"}
\n
"
;
}
tools/web-exporter/export-scope.cpp
0 → 100644
浏览文件 @
6ab45d2d
#include <cstdio>
#include "export.h"
void
export_scope
(
ProgramPtr
program
,
ScopePtr
scope
,
const
std
::
string
&
dirname
)
{
for
(
const
auto
&
block
:
program
->
Blocks
())
{
for
(
const
auto
&
var
:
block
->
Vars
())
{
if
(
var
->
Name
()
==
"feed"
||
var
->
Name
()
==
"fetch"
)
{
continue
;
}
if
(
var
->
Persistable
())
{
auto
*
v
=
scope
->
FindVar
(
var
->
Name
());
assert
(
v
!=
nullptr
);
int
count
=
1
;
for
(
auto
n
:
var
->
Tensor_desc
().
Dims
())
{
count
*=
n
;
}
auto
*
tensor
=
v
->
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>
();
const
float
*
p
=
tensor
->
mutable_data
<
float
>
();
std
::
string
para_file_name
=
dirname
+
'/'
+
var
->
Name
();
FILE
*
para_file
=
fopen
(
para_file_name
.
c_str
(),
"w"
);
assert
(
p
!=
nullptr
);
fwrite
(
p
,
sizeof
(
float
),
count
,
para_file
);
fclose
(
para_file
);
// std::cout << "==> " << var->Name() << " " << count << "\n";
// for (int i = 0; i < count; i++) {
// std::cout << p[i] << ", ";
// }
// std::cout << "\n";
}
}
}
}
tools/web-exporter/export.cpp
0 → 100644
浏览文件 @
6ab45d2d
#include "export.h"
#include <sys/stat.h>
#include <sys/types.h>
class
FakeExecutor
:
public
paddle_mobile
::
framework
::
Executor
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
{
public:
FakeExecutor
(
const
paddle_mobile
::
framework
::
Program
<
paddle_mobile
::
CPU
>
p
)
{
program_
=
p
;
batch_size_
=
1
;
use_optimize_
=
true
;
loddable_
=
false
;
if
(
use_optimize_
)
{
to_predict_program_
=
program_
.
optimizeProgram
;
}
else
{
to_predict_program_
=
program_
.
originProgram
;
}
auto
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
variable_ptr
[
0
].
SetValue
<
int
>
(
1
);
if
(
program_
.
combined
)
{
InitCombineMemory
();
}
else
{
InitMemory
();
}
}
};
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
3
)
{
std
::
cout
<<
"Usage: "
<<
argv
[
0
]
<<
" <combined-modle-dir> <output-dir>
\n
"
;
return
-
1
;
}
std
::
string
model_dir
=
argv
[
1
];
std
::
string
model_path
=
model_dir
+
"/model"
;
std
::
string
para_path
=
model_dir
+
"/params"
;
std
::
string
out_dir
=
argv
[
2
];
std
::
string
out_model_js
=
out_dir
+
"/model.js"
;
std
::
string
out_para_dir
=
out_dir
+
"/paras"
;
mkdir
(
out_dir
.
c_str
(),
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
mkdir
(
out_para_dir
.
c_str
(),
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
std
::
cout
<<
"loading "
<<
model_path
<<
" & "
<<
para_path
<<
"
\n
"
;
paddle_mobile
::
framework
::
Loader
<>
loader
;
auto
program
=
loader
.
Load
(
model_path
,
para_path
,
true
);
FakeExecutor
executor
(
program
);
auto
optimizedProgram
=
program
.
optimizeProgram
;
export_scope
(
optimizedProgram
,
program
.
scope
,
out_para_dir
);
std
::
ofstream
fs
(
out_model_js
.
c_str
());
export_nodejs
(
optimizedProgram
,
program
.
scope
,
fs
);
fs
.
close
();
return
0
;
}
tools/web-exporter/export.h
0 → 100644
浏览文件 @
6ab45d2d
#pragma once
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include <ostream>
#include <fstream>
#include "framework/loader.h"
#include "framework/executor.h"
#include "framework/scope.h"
#include "framework/program/program_desc.h"
// using paddle_mobile::framework::ProgramDesc;
// using paddle_mobile::framework::Scope;
using
ProgramPtr
=
std
::
shared_ptr
<
paddle_mobile
::
framework
::
ProgramDesc
>
;
using
ScopePtr
=
std
::
shared_ptr
<
paddle_mobile
::
framework
::
Scope
>
;
void
export_nodejs
(
ProgramPtr
program
,
ScopePtr
scope
,
std
::
ostream
&
os
=
std
::
cout
);
void
export_scope
(
ProgramPtr
program
,
ScopePtr
scope
,
const
std
::
string
&
dirname
=
"."
);
template
<
typename
T
>
inline
std
::
string
var2str
(
const
T
&
v
)
{
return
std
::
to_string
(
v
);
}
template
<
>
inline
std
::
string
var2str
(
const
std
::
string
&
v
)
{
return
"
\"
"
+
v
+
"
\"
"
;
}
inline
std
::
string
var2str
(
const
char
*
v
)
{
return
var2str
<
std
::
string
>
(
v
);
}
inline
std
::
string
var2str
(
const
bool
v
)
{
return
v
?
"true"
:
"false"
;
}
template
<
typename
T
>
std
::
string
var2str
(
const
std
::
vector
<
T
>
&
v
)
{
std
::
string
r
=
"["
;
auto
s
=
v
.
size
();
for
(
int
i
=
0
;
i
<
s
;
i
++
)
{
if
(
i
)
r
+=
", "
;
r
+=
var2str
(
v
[
i
]);
}
return
r
+
"]"
;
}
struct
VarVisitor
{
using
type_t
=
decltype
(
var2str
(
0
));
template
<
typename
T
>
type_t
operator
()(
const
T
&
v
)
{
return
var2str
(
v
);
}
};
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录