Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
3eef149e
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3eef149e
编写于
10月 15, 2018
作者:
R
Ray Liu
提交者:
GitHub
10月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1084 from codeWorm2015/opencl
Opencl
上级
761cc7f1
2fa2871d
变更
21
显示空白变更内容
内联
并排
Showing
21 changed file
with
167 addition
and
281 deletion
+167
-281
CMakeLists.txt
CMakeLists.txt
+1
-1
src/framework/cl/cl_engine.h
src/framework/cl/cl_engine.h
+1
-1
src/framework/cl/cl_half.cpp
src/framework/cl/cl_half.cpp
+14
-2
src/framework/cl/cl_half.h
src/framework/cl/cl_half.h
+5
-0
src/framework/cl/cl_helper.h
src/framework/cl/cl_helper.h
+7
-1
src/framework/cl/cl_image.h
src/framework/cl/cl_image.h
+33
-24
src/framework/cl/cl_scope.h
src/framework/cl/cl_scope.h
+6
-2
src/framework/executor.cpp
src/framework/executor.cpp
+20
-8
src/framework/operator.cpp
src/framework/operator.cpp
+5
-1
src/operators/kernel/cl/cl_kernel/conv_kernel.cl
src/operators/kernel/cl/cl_kernel/conv_kernel.cl
+3
-1
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+11
-4
src/operators/kernel/cl/conv_add_kernel.cpp
src/operators/kernel/cl/conv_add_kernel.cpp
+3
-0
src/operators/kernel/cl/conv_kernel.cpp
src/operators/kernel/cl/conv_kernel.cpp
+38
-3
src/operators/kernel/cl/depthwise_conv_kernel.cpp
src/operators/kernel/cl/depthwise_conv_kernel.cpp
+1
-0
src/operators/op_param.h
src/operators/op_param.h
+1
-0
test/net/test_mobilenet_GPU.cpp
test/net/test_mobilenet_GPU.cpp
+18
-17
tools/web-exporter/CMakeLists.txt
tools/web-exporter/CMakeLists.txt
+0
-20
tools/web-exporter/export-nodejs.cpp
tools/web-exporter/export-nodejs.cpp
+0
-49
tools/web-exporter/export-scope.cpp
tools/web-exporter/export-scope.cpp
+0
-34
tools/web-exporter/export.cpp
tools/web-exporter/export.cpp
+0
-52
tools/web-exporter/export.h
tools/web-exporter/export.h
+0
-61
未找到文件。
CMakeLists.txt
浏览文件 @
3eef149e
...
@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
...
@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
project
(
paddle-mobile
)
project
(
paddle-mobile
)
option
(
DEBUGING
"enable debug mode"
ON
)
option
(
DEBUGING
"enable debug mode"
ON
)
option
(
USE_EXCEPTION
"use std exception"
O
FF
)
option
(
USE_EXCEPTION
"use std exception"
O
N
)
option
(
LOG_PROFILE
"log profile"
OFF
)
option
(
LOG_PROFILE
"log profile"
OFF
)
# select the platform to build
# select the platform to build
option
(
CPU
"armv7 with neon"
OFF
)
option
(
CPU
"armv7 with neon"
OFF
)
...
...
src/framework/cl/cl_engine.h
浏览文件 @
3eef149e
...
@@ -52,7 +52,7 @@ class CLEngine {
...
@@ -52,7 +52,7 @@ class CLEngine {
cl_context
context
,
std
::
string
file_name
)
{
cl_context
context
,
std
::
string
file_name
)
{
FILE
*
file
=
fopen
(
file_name
.
c_str
(),
"rb"
);
FILE
*
file
=
fopen
(
file_name
.
c_str
(),
"rb"
);
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
filename
.
c_str
());
file
_
name
.
c_str
());
fseek
(
file
,
0
,
SEEK_END
);
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
int64_t
size
=
ftell
(
file
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"size is too small"
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"size is too small"
);
...
...
src/framework/cl/cl_half.cpp
浏览文件 @
3eef149e
...
@@ -488,7 +488,7 @@ static const uint8_t shifttable[512] = {
...
@@ -488,7 +488,7 @@ static const uint8_t shifttable[512] = {
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
};
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
};
half_t
float2half
(
float
f
)
{
half_t
float2half
(
float
f
)
{
uint32_t
v
=
*
reinterpret_cast
<
uint32_t
*>
(
&
f
);
uint32_t
v
=
*
reinterpret_cast
<
uint32_t
*>
(
&
f
);
return
basetable
[(
v
>>
23
)
&
0x1ff
]
+
return
basetable
[(
v
>>
23
)
&
0x1ff
]
+
((
v
&
0x007fffff
)
>>
shifttable
[(
v
>>
23
)
&
0x1ff
]);
((
v
&
0x007fffff
)
>>
shifttable
[(
v
>>
23
)
&
0x1ff
]);
}
}
...
@@ -496,5 +496,17 @@ half_t float2half(float f) {
...
@@ -496,5 +496,17 @@ half_t float2half(float f) {
float
half2float
(
half_t
h
)
{
float
half2float
(
half_t
h
)
{
uint32_t
v
=
mantissatable
[
offsettable
[
h
>>
10
]
+
(
h
&
0x3ff
)]
+
uint32_t
v
=
mantissatable
[
offsettable
[
h
>>
10
]
+
(
h
&
0x3ff
)]
+
exponenttable
[
h
>>
10
];
exponenttable
[
h
>>
10
];
return
*
reinterpret_cast
<
float
*>
(
&
v
);
return
*
reinterpret_cast
<
float
*>
(
&
v
);
}
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
h_array
[
i
]
=
float2half
(
f_array
[
i
]);
}
}
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
f_array
[
i
]
=
float2half
(
h_array
[
i
]);
}
}
}
src/framework/cl/cl_half.h
浏览文件 @
3eef149e
...
@@ -18,4 +18,9 @@ limitations under the License. */
...
@@ -18,4 +18,9 @@ limitations under the License. */
typedef
uint16_t
half_t
;
typedef
uint16_t
half_t
;
half_t
float2half
(
float
f
);
half_t
float2half
(
float
f
);
float
half2float
(
half_t
h
);
float
half2float
(
half_t
h
);
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
src/framework/cl/cl_helper.h
浏览文件 @
3eef149e
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <type_traits>
#include <type_traits>
#include <vector>
#include <vector>
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_scope.h"
...
@@ -32,11 +33,16 @@ class CLHelper {
...
@@ -32,11 +33,16 @@ class CLHelper {
explicit
CLHelper
(
CLScope
*
scope
)
:
scope_
(
scope
)
{}
explicit
CLHelper
(
CLScope
*
scope
)
:
scope_
(
scope
)
{}
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
)
{
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
)
{
DLOG
<<
" begin add kernel "
;
auto
kernel
=
scope_
->
GetKernel
(
kernel_name
,
file_name
);
auto
kernel
=
scope_
->
GetKernel
(
kernel_name
,
file_name
);
DLOG
<<
" add kernel ing "
;
kernels
.
emplace_back
(
std
::
move
(
kernel
));
kernels
.
emplace_back
(
std
::
move
(
kernel
));
}
}
cl_kernel
KernelAt
(
const
int
index
)
{
return
kernels
[
index
].
get
();
}
cl_kernel
KernelAt
(
const
int
index
)
{
DLOG
<<
" kernel count: "
<<
kernels
.
size
();
return
kernels
[
index
].
get
();
}
cl_command_queue
CLCommandQueue
()
{
return
scope_
->
CommandQueue
();
}
cl_command_queue
CLCommandQueue
()
{
return
scope_
->
CommandQueue
();
}
...
...
src/framework/cl/cl_image.h
浏览文件 @
3eef149e
...
@@ -17,7 +17,9 @@ limitations under the License. */
...
@@ -17,7 +17,9 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "CL/cl.h"
#include "CL/cl.h"
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_tool.h"
#include "framework/ddim.h"
#include "framework/ddim.h"
#include "framework/tensor.h"
#include "framework/tensor.h"
...
@@ -59,6 +61,7 @@ class CLImage {
...
@@ -59,6 +61,7 @@ class CLImage {
PADDLE_MOBILE_THROW_EXCEPTION
(
PADDLE_MOBILE_THROW_EXCEPTION
(
" empty image tensor data shouldn't have value"
);
" empty image tensor data shouldn't have value"
);
}
}
DLOG
<<
" init empty image "
;
InitCLImage
(
context
,
nullptr
,
dim
);
InitCLImage
(
context
,
nullptr
,
dim
);
initialized_
=
true
;
initialized_
=
true
;
}
}
...
@@ -98,7 +101,8 @@ class CLImage {
...
@@ -98,7 +101,8 @@ class CLImage {
T
*
data
()
const
{
T
*
data
()
const
{
if
(
initialized_
)
{
if
(
initialized_
)
{
PADDLE_MOBILE_THROW_EXCEPTION
(
PADDLE_MOBILE_THROW_EXCEPTION
(
" cl image has initialized, tensor data has been deleted "
);
" cl image has initialized, tensor data has been deleted, can't use "
"tensor data"
);
}
}
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
}
}
...
@@ -115,6 +119,7 @@ class CLImage {
...
@@ -115,6 +119,7 @@ class CLImage {
private:
private:
void
InitCLImage
(
cl_context
context
,
float
*
tensor_data
,
const
DDim
&
dim
)
{
void
InitCLImage
(
cl_context
context
,
float
*
tensor_data
,
const
DDim
&
dim
)
{
DLOG
<<
" tensor dim: "
<<
dim
;
cl_image_format
cf
=
{.
image_channel_order
=
CL_RGBA
,
cl_image_format
cf
=
{.
image_channel_order
=
CL_RGBA
,
.
image_channel_data_type
=
CL_HALF_FLOAT
};
.
image_channel_data_type
=
CL_HALF_FLOAT
};
// NCHW -> [W * (C+3)/4, H * N]
// NCHW -> [W * (C+3)/4, H * N]
...
@@ -132,28 +137,22 @@ class CLImage {
...
@@ -132,28 +137,22 @@ class CLImage {
tensor_data_
[
i
]
=
0
;
tensor_data_
[
i
]
=
0
;
}
}
}
}
size_t
N
,
C
,
H
,
W
;
if
(
tensor_dims_
.
size
()
==
4
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
N
=
tensor_dims_
[
0
];
if
(
N
<
0
)
{
for
(
int
j
=
0
;
j
<
dim
.
size
();
++
j
)
{
N
=
1
;
new_dims
[
4
-
dim
.
size
()
+
j
]
=
dim
[
j
]
;
}
}
C
=
tensor_dims_
[
1
];
H
=
tensor_dims_
[
2
];
W
=
tensor_dims_
[
3
];
width_of_one_block_
=
W
;
size_t
N
,
C
,
H
,
W
;
height_of_one_block_
=
H
;
}
else
if
(
tensor_dims_
.
size
()
==
1
)
{
N
=
new_dims
[
0
];
N
=
1
;
C
=
new_dims
[
1
];
C
=
tensor_dims_
[
0
];
H
=
new_dims
[
2
];
H
=
1
;
W
=
new_dims
[
3
];
W
=
1
;
width_of_one_block_
=
W
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
height_of_one_block_
=
H
;
}
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
size_t
height
=
H
*
N
;
...
@@ -193,9 +192,12 @@ class CLImage {
...
@@ -193,9 +192,12 @@ class CLImage {
}
}
}
}
cl_int
err
;
cl_int
err
;
DLOG
<<
" image width: "
<<
width
;
DLOG
<<
" image height: "
<<
height
;
cl_image_
=
clCreateImage2D
(
cl_image_
=
clCreateImage2D
(
context
,
// cl_context context
context
,
// cl_context context
CL_MEM_READ_WRITE
|
CL_MEM_COPY_HOST_PTR
,
// cl_mem_flags flags
CL_MEM_READ_WRITE
|
(
imageData
?
CL_MEM_COPY_HOST_PTR
:
0
),
// cl_mem_flags flags
&
cf
,
// const cl_image_format *image_format
&
cf
,
// const cl_image_format *image_format
width
,
// size_t image_width
width
,
// size_t image_width
height
,
// size_t image_height
height
,
// size_t image_height
...
@@ -205,6 +207,7 @@ class CLImage {
...
@@ -205,6 +207,7 @@ class CLImage {
if
(
err
!=
CL_SUCCESS
)
{
if
(
err
!=
CL_SUCCESS
)
{
// TODO(HaiPeng): error handling
// TODO(HaiPeng): error handling
CL_CHECK_ERRORS
(
err
);
PADDLE_MOBILE_THROW_EXCEPTION
(
" create image 2d error "
);
PADDLE_MOBILE_THROW_EXCEPTION
(
" create image 2d error "
);
}
}
}
}
...
@@ -222,9 +225,15 @@ class CLImage {
...
@@ -222,9 +225,15 @@ class CLImage {
cl_context
context_
;
cl_context
context_
;
};
};
void
TensorToCLImage
(
Tensor
*
tensor
,
CLImage
*
image
,
cl_command_queue
commandQueue
);
void
TensorToCLImage
(
Tensor
*
tensor
,
CLImage
*
image
,
cl_command_queue
commandQueue
);
void
CLImageToTensor
(
CLImage
*
image
,
Tensor
*
tensor
,
cl_command_queue
commandQueue
);
void
CLImageToTensor
(
CLImage
*
image
,
Tensor
*
tensor
,
cl_command_queue
commandQueue
);
#ifdef PADDLE_MOBILE_DEBUG
Print
&
operator
<<
(
Print
&
printer
,
const
CLImage
&
image
);
#endif
}
// namespace framework
}
// namespace framework
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/framework/cl/cl_scope.h
浏览文件 @
3eef149e
...
@@ -40,8 +40,11 @@ class CLScope {
...
@@ -40,8 +40,11 @@ class CLScope {
std
::
unique_ptr
<
_cl_kernel
,
CLKernelDeleter
>
GetKernel
(
std
::
unique_ptr
<
_cl_kernel
,
CLKernelDeleter
>
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
)
{
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
)
{
auto
program
=
Program
(
file_name
);
auto
program
=
Program
(
file_name
);
DLOG
<<
" get program ~ "
;
std
::
unique_ptr
<
_cl_kernel
,
CLKernelDeleter
>
kernel
(
std
::
unique_ptr
<
_cl_kernel
,
CLKernelDeleter
>
kernel
(
clCreateKernel
(
program
,
kernel_name
.
c_str
(),
NULL
));
clCreateKernel
(
program
,
kernel_name
.
c_str
(),
&
status_
));
CL_CHECK_ERRORS
(
status_
);
DLOG
<<
" create kernel ~ "
;
return
std
::
move
(
kernel
);
return
std
::
move
(
kernel
);
}
}
...
@@ -58,11 +61,12 @@ class CLScope {
...
@@ -58,11 +61,12 @@ class CLScope {
status_
=
status_
=
clBuildProgram
(
program
.
get
(),
0
,
0
,
"-cl-fast-relaxed-math"
,
0
,
0
);
clBuildProgram
(
program
.
get
(),
0
,
0
,
"-cl-fast-relaxed-math"
,
0
,
0
);
CL_CHECK_ERRORS
(
status_
);
CL_CHECK_ERRORS
(
status_
);
programs_
[
file_name
]
=
std
::
move
(
program
);
programs_
[
file_name
]
=
std
::
move
(
program
);
return
program
.
get
();
return
program
s_
[
file_name
]
.
get
();
}
}
private:
private:
...
...
src/framework/executor.cpp
浏览文件 @
3eef149e
...
@@ -37,6 +37,8 @@ limitations under the License. */
...
@@ -37,6 +37,8 @@ limitations under the License. */
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_image.h"
#endif
#endif
int
debug_to
=
2
;
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
...
@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
...
@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for
(
int
i
=
0
;
i
<
blocks
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
blocks
.
size
();
++
i
)
{
std
::
shared_ptr
<
framework
::
BlockDesc
>
block_desc
=
blocks
[
i
];
std
::
shared_ptr
<
framework
::
BlockDesc
>
block_desc
=
blocks
[
i
];
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
ops
=
block_desc
->
Ops
();
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
ops
=
block_desc
->
Ops
();
for
(
int
j
=
0
;
j
<
ops
.
size
()
;
++
j
)
{
for
(
int
j
=
0
;
j
<
debug_to
;
++
j
)
{
std
::
shared_ptr
<
framework
::
OpDesc
>
op
=
ops
[
j
];
std
::
shared_ptr
<
framework
::
OpDesc
>
op
=
ops
[
j
];
DLOG
<<
"create op: "
<<
j
<<
" "
<<
op
->
Type
();
DLOG
<<
"create op: "
<<
j
<<
" "
<<
op
->
Type
();
auto
op_base
=
framework
::
OpRegistry
<
Dtype
>::
CreateOp
(
auto
op_base
=
framework
::
OpRegistry
<
Dtype
>::
CreateOp
(
...
@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
...
@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
}
}
}
}
#else
#else
for
(
int
i
=
0
;
i
<
ops
.
size
()
;
i
++
)
{
for
(
int
i
=
0
;
i
<
debug_to
;
i
++
)
{
#ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_MOBILE_PROFILE
struct
timespec
ts
;
struct
timespec
ts
;
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
...
@@ -428,6 +430,11 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
...
@@ -428,6 +430,11 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
#endif
#endif
}
}
#endif
#endif
DLOG
<<
" predict return nullptr"
;
return
nullptr
;
auto
last_op
=
ops
.
rbegin
();
auto
last_op
=
ops
.
rbegin
();
auto
output_map
=
(
*
last_op
)
->
Outputs
();
auto
output_map
=
(
*
last_op
)
->
Outputs
();
std
::
vector
<
std
::
string
>
out_keys
=
(
*
last_op
)
->
GetOutKeys
();
std
::
vector
<
std
::
string
>
out_keys
=
(
*
last_op
)
->
GetOutKeys
();
...
@@ -647,6 +654,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
...
@@ -647,6 +654,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const
std
::
vector
<
Ptype
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
)
{
const
std
::
vector
<
Ptype
>
&
input
,
const
std
::
vector
<
int64_t
>
&
dims
)
{
framework
::
Tensor
tensor
(
input
,
framework
::
make_ddim
(
dims
));
framework
::
Tensor
tensor
(
input
,
framework
::
make_ddim
(
dims
));
std
::
shared_ptr
<
framework
::
Tensor
>
output_tensor
=
Predict
(
tensor
,
0
);
std
::
shared_ptr
<
framework
::
Tensor
>
output_tensor
=
Predict
(
tensor
,
0
);
if
(
output_tensor
!=
nullptr
)
{
Executor
<
Dtype
,
P
>::
Ptype
*
output_ptr
=
Executor
<
Dtype
,
P
>::
Ptype
*
output_ptr
=
output_tensor
->
data
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
();
output_tensor
->
data
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
();
std
::
vector
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
result_vector
;
std
::
vector
<
typename
Executor
<
Dtype
,
P
>::
Ptype
>
result_vector
;
...
@@ -654,6 +662,10 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
...
@@ -654,6 +662,10 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
result_vector
.
push_back
(
output_ptr
[
j
]);
result_vector
.
push_back
(
output_ptr
[
j
]);
}
}
return
result_vector
;
return
result_vector
;
}
else
{
DLOG
<<
"return empty vector"
;
return
{};
}
}
}
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
...
...
src/framework/operator.cpp
浏览文件 @
3eef149e
...
@@ -57,7 +57,10 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
...
@@ -57,7 +57,10 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template
<
typename
Dtype
>
template
<
typename
Dtype
>
void
OperatorBase
<
Dtype
>::
Run
()
{
void
OperatorBase
<
Dtype
>::
Run
()
{
DLOG
<<
" begin run "
<<
type_
;
RunImpl
();
RunImpl
();
DLOG
<<
" end run "
<<
type_
;
return
;
#ifdef PADDLE_MOBILE_DEBUG
#ifdef PADDLE_MOBILE_DEBUG
DLOG
<<
"-------------"
<<
type_
<<
"----------------------------"
;
DLOG
<<
"-------------"
<<
type_
<<
"----------------------------"
;
vector
<
string
>
input_keys
=
GetInputKeys
();
vector
<
string
>
input_keys
=
GetInputKeys
();
...
@@ -100,8 +103,9 @@ void OperatorBase<Dtype>::Run() {
...
@@ -100,8 +103,9 @@ void OperatorBase<Dtype>::Run() {
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
if
(
type_
==
"fetch"
)
{
if
(
type_
==
"fetch"
)
{
Tensor
*
tensor
=
vari
->
template
GetMutable
<
framework
::
LoDTensor
>();
Tensor
*
tensor
=
vari
->
template
GetMutable
<
framework
::
LoDTensor
>();
if
(
tensor
)
if
(
tensor
)
{
DLOG
<<
type_
<<
" output- "
<<
key
<<
"="
<<
tensor
->
dims
();
DLOG
<<
type_
<<
" output- "
<<
key
<<
"="
<<
tensor
->
dims
();
}
}
else
{
}
else
{
CLImage
*
cl_image
=
vari
->
template
GetMutable
<
framework
::
CLImage
>();
CLImage
*
cl_image
=
vari
->
template
GetMutable
<
framework
::
CLImage
>();
// cl_command_queue commandQueue =
// cl_command_queue commandQueue =
...
...
src/operators/kernel/cl/cl_kernel/conv_kernel.cl
浏览文件 @
3eef149e
...
@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See
the
License
for
the
specific
language
governing
permissions
and
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
limitations
under
the
License.
*/
#
include
"conv_kernel.inc.cl"
//#include
"conv_kernel.inc.cl"
__kernel
void
conv_3x3
()
{}
\ No newline at end of file
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
浏览文件 @
3eef149e
...
@@ -24,9 +24,16 @@ namespace operators {
...
@@ -24,9 +24,16 @@ namespace operators {
template
<
>
template
<
>
bool
ConvAddBNReluKernel
<
GPU_CL
,
float
>::
Init
(
bool
ConvAddBNReluKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvAddBNReluParam
<
GPU_CL
>
*
param
)
{
FusionConvAddBNReluParam
<
GPU_CL
>
*
param
)
{
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
param
->
Bias
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
// const CL *mean = param->InputMean();
// const CL *mean = param->InputMean();
const
framework
::
CLImage
*
mean
=
param
->
InputMean
();
const
framework
::
CLImage
*
mean
=
param
->
InputMean
();
const
framework
::
CLImage
*
variance
=
param
->
InputVariance
();
const
framework
::
CLImage
*
variance
=
param
->
InputVariance
();
const
framework
::
CLImage
*
scale
=
param
->
InputScale
();
const
framework
::
CLImage
*
scale
=
param
->
InputScale
();
const
framework
::
CLImage
*
bias
=
param
->
InputBias
();
const
framework
::
CLImage
*
bias
=
param
->
InputBias
();
...
@@ -52,9 +59,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
...
@@ -52,9 +59,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
}
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
new_scale
->
SetTensorData
(
new_scale_ptr
,
variance
->
dims
());
new_scale
->
SetTensorData
(
new_scale_ptr
,
variance
->
dims
());
...
@@ -68,6 +72,9 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
...
@@ -68,6 +72,9 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param
->
SetNewScale
(
new_scale
);
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
param
->
SetNewBias
(
new_bias
);
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
PADDLE_MOBILE_ENFORCE
(
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
...
...
src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
3eef149e
...
@@ -25,6 +25,9 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
...
@@ -25,6 +25,9 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
param
->
Bias
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
param
->
SetOffset
(
offset
);
...
...
src/operators/kernel/cl/conv_kernel.cpp
浏览文件 @
3eef149e
...
@@ -26,18 +26,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
...
@@ -26,18 +26,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
param
->
SetOffset
(
offset
);
DLOG
<<
" init helper: "
<<
&
cl_helper_
;
DLOG
<<
" conv kernel add kernel ~ "
;
DLOG
<<
" width of one block: "
<<
param
->
Filter
()
->
WidthOfOneBlock
();
DLOG
<<
" height of one block: "
<<
param
->
Filter
()
->
HeightOfOneBlock
();
DLOG
<<
" filter dims: "
<<
param
->
Filter
()
->
dims
();
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
DLOG
<<
" here1 "
;
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
DLOG
<<
" here2 "
;
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
DLOG
<<
" here3 "
;
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_kernel.cl"
);
}
else
{
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
}
...
@@ -47,14 +61,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
...
@@ -47,14 +61,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template
<
>
template
<
>
void
ConvKernel
<
GPU_CL
,
float
>::
Compute
(
const
ConvParam
<
GPU_CL
>
&
param
)
{
void
ConvKernel
<
GPU_CL
,
float
>::
Compute
(
const
ConvParam
<
GPU_CL
>
&
param
)
{
DLOG
<<
" Compute helper: "
<<
&
cl_helper_
;
DLOG
<<
" begin compute "
;
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
DLOG
<<
" get work size "
;
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
DLOG
<<
" end work size "
;
int
c_block
=
default_work_size
[
0
];
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
input
=
param
.
Input
()
->
GetCLImage
();
DLOG
<<
" get Input "
;
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
DLOG
<<
" get Filter "
;
auto
output
=
param
.
Output
();
auto
output
=
param
.
Output
();
DLOG
<<
" get Output "
;
int
stride
=
param
.
Strides
()[
0
];
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
offset
=
param
.
Offset
();
int
input_c
=
param
.
Input
()
->
CBlock
();
int
input_c
=
param
.
Input
()
->
CBlock
();
...
@@ -64,6 +91,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> ¶m) {
...
@@ -64,6 +91,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> ¶m) {
cl_int
status
;
cl_int
status
;
DLOG
<<
" begin set kernel arg "
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
...
@@ -77,12 +106,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> ¶m) {
...
@@ -77,12 +106,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> ¶m) {
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
DLOG
<<
" end set kernel arg "
;
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
DLOG
<<
" begin enqueue "
;
status
=
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
DLOG
<<
" end enqueue "
;
CL_CHECK_ERRORS
(
status
);
CL_CHECK_ERRORS
(
status
);
}
}
...
...
src/operators/kernel/cl/depthwise_conv_kernel.cpp
浏览文件 @
3eef149e
...
@@ -27,6 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
...
@@ -27,6 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
"need equal"
);
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
());
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
param
->
SetOffset
(
offset
);
...
...
src/operators/op_param.h
浏览文件 @
3eef149e
...
@@ -948,6 +948,7 @@ class FetchParam : public OpParam {
...
@@ -948,6 +948,7 @@ class FetchParam : public OpParam {
input_x_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
input_x_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
out_
=
OutFrom
(
outputs
,
scope
);
out_
=
OutFrom
(
outputs
,
scope
);
}
}
const
RType
*
InputX
()
const
{
return
input_x_
;
}
const
RType
*
InputX
()
const
{
return
input_x_
;
}
Tensor
*
Out
()
const
{
return
out_
;
}
Tensor
*
Out
()
const
{
return
out_
;
}
...
...
test/net/test_mobilenet_GPU.cpp
浏览文件 @
3eef149e
...
@@ -34,23 +34,24 @@ int main() {
...
@@ -34,23 +34,24 @@ int main() {
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
std
::
vector
<
float
>::
iterator
biggest
=
// std::vector<float>::iterator biggest =
std
::
max_element
(
std
::
begin
(
vec_result
),
std
::
end
(
vec_result
));
// std::max_element(std::begin(vec_result), std::end(vec_result));
std
::
cout
<<
" Max element is "
<<
*
biggest
<<
" at position "
// std::cout << " Max element is " << *biggest << " at position "
<<
std
::
distance
(
std
::
begin
(
vec_result
),
biggest
)
<<
std
::
endl
;
// << std::distance(std::begin(vec_result), biggest) <<
// std::endl;
// 预热十次
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
// for (int i = 0; i < 10; ++i) {
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
// auto vec_result = paddle_mobile.Predict(input, dims);
}
// }
auto
time3
=
paddle_mobile
::
time
();
// auto time3 = paddle_mobile::time();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
// for (int i = 0; i < 10; ++i) {
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
// auto vec_result = paddle_mobile.Predict(input, dims);
}
// }
DLOG
<<
vec_result
;
// DLOG << vec_result;
auto
time4
=
paddle_mobile
::
time
();
// auto time4 = paddle_mobile::time();
std
::
cout
<<
"predict cost :"
<<
paddle_mobile
::
time_diff
(
time3
,
time4
)
/
10
// std::cout << "predict cost :" << paddle_mobile::time_diff(time3,
<<
"ms"
<<
std
::
endl
;
// time4) / 10 << "ms"
// << std::endl;
}
}
std
::
cout
<<
"如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
std
::
cout
<<
"如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
...
...
tools/web-exporter/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
761cc7f1
cmake_minimum_required
(
VERSION 3.6
)
project
(
web-exporter
)
set
(
CMAKE_CXX_STANDARD 11
)
file
(
GLOB PADDLE_MOBILE_CPP_FILES
"../../src/common/*.c"
"../../src/common/*.cpp"
"../../src/memory/*.cpp"
"../../src/framework/*.c"
"../../src/framework/*.cpp"
"../../src/framework/program/*.cpp"
"../../src/framework/program/program-optimize/*.cpp"
)
file
(
GLOB EXPORT_CPP_FILES
"*.cpp"
)
add_executable
(
web-exporter
${
PADDLE_MOBILE_CPP_FILES
}
${
EXPORT_CPP_FILES
}
)
target_include_directories
(
web-exporter PRIVATE
"../../src"
)
target_link_libraries
(
web-exporter
)
\ No newline at end of file
tools/web-exporter/export-nodejs.cpp
已删除
100644 → 0
浏览文件 @
761cc7f1
#include "export.h"
inline
std
::
string
indent
(
int
i
)
{
return
std
::
string
(
i
,
' '
);
}
void
export_nodejs
(
ProgramPtr
program
,
ScopePtr
scope
,
std
::
ostream
&
os
)
{
os
<<
"module.exports.program = {
\n
"
;
os
<<
indent
(
2
)
<<
var2str
(
"blocks"
)
<<
": [
\n
"
;
for
(
const
auto
&
block
:
program
->
Blocks
())
{
os
<<
indent
(
4
)
<<
"{
\n
"
;
os
<<
indent
(
6
)
<<
var2str
(
"vars"
)
<<
": {
\n
"
;
for
(
const
auto
&
var
:
block
->
Vars
())
{
const
auto
&
dim
=
var
->
Tensor_desc
().
Dims
();
os
<<
indent
(
8
)
<<
var2str
(
var
->
Name
())
<<
": {
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"dim"
)
<<
": "
<<
var2str
(
dim
)
<<
",
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"persistable"
)
<<
": "
<<
var2str
(
var
->
Persistable
())
<<
"
\n
"
;
os
<<
indent
(
8
)
<<
"},
\n
"
;
}
os
<<
indent
(
6
)
<<
"},
\n
"
;
os
<<
indent
(
6
)
<<
var2str
(
"ops"
)
<<
": [
\n
"
;
for
(
const
auto
&
op
:
block
->
Ops
())
{
os
<<
indent
(
8
)
<<
"{
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"type"
)
<<
": "
<<
var2str
(
op
->
Type
())
<<
",
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"inputs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetInputs
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
<<
var2str
(
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"outputs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetInputs
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
<<
var2str
(
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
10
)
<<
var2str
(
"attrs"
)
<<
": {
\n
"
;
for
(
const
auto
&
kv
:
op
->
GetAttrMap
())
{
os
<<
indent
(
12
)
<<
var2str
(
kv
.
first
)
<<
": "
;
os
<<
decltype
(
kv
.
second
)
::
ApplyVistor
(
VarVisitor
(),
kv
.
second
)
<<
",
\n
"
;
}
os
<<
indent
(
10
)
<<
"},
\n
"
;
os
<<
indent
(
8
)
<<
"},
\n
"
;
}
os
<<
indent
(
6
)
<<
"],
\n
"
;
os
<<
indent
(
4
)
<<
"},
\n
"
;
}
os
<<
indent
(
2
)
<<
"]
\n
"
;
os
<<
"}
\n
"
;
}
tools/web-exporter/export-scope.cpp
已删除
100644 → 0
浏览文件 @
761cc7f1
#include <cstdio>
#include "export.h"
void
export_scope
(
ProgramPtr
program
,
ScopePtr
scope
,
const
std
::
string
&
dirname
)
{
for
(
const
auto
&
block
:
program
->
Blocks
())
{
for
(
const
auto
&
var
:
block
->
Vars
())
{
if
(
var
->
Name
()
==
"feed"
||
var
->
Name
()
==
"fetch"
)
{
continue
;
}
if
(
var
->
Persistable
())
{
auto
*
v
=
scope
->
FindVar
(
var
->
Name
());
assert
(
v
!=
nullptr
);
int
count
=
1
;
for
(
auto
n
:
var
->
Tensor_desc
().
Dims
())
{
count
*=
n
;
}
auto
*
tensor
=
v
->
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>
();
const
float
*
p
=
tensor
->
mutable_data
<
float
>
();
std
::
string
para_file_name
=
dirname
+
'/'
+
var
->
Name
();
FILE
*
para_file
=
fopen
(
para_file_name
.
c_str
(),
"w"
);
assert
(
p
!=
nullptr
);
fwrite
(
p
,
sizeof
(
float
),
count
,
para_file
);
fclose
(
para_file
);
// std::cout << "==> " << var->Name() << " " << count << "\n";
// for (int i = 0; i < count; i++) {
// std::cout << p[i] << ", ";
// }
// std::cout << "\n";
}
}
}
}
tools/web-exporter/export.cpp
已删除
100644 → 0
浏览文件 @
761cc7f1
#include "export.h"
#include <sys/stat.h>
#include <sys/types.h>
class
FakeExecutor
:
public
paddle_mobile
::
framework
::
Executor
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
{
public:
FakeExecutor
(
const
paddle_mobile
::
framework
::
Program
<
paddle_mobile
::
CPU
>
p
)
{
program_
=
p
;
batch_size_
=
1
;
use_optimize_
=
true
;
loddable_
=
false
;
if
(
use_optimize_
)
{
to_predict_program_
=
program_
.
optimizeProgram
;
}
else
{
to_predict_program_
=
program_
.
originProgram
;
}
auto
*
variable_ptr
=
program_
.
scope
->
Var
(
"batch_size"
);
variable_ptr
[
0
].
SetValue
<
int
>
(
1
);
if
(
program_
.
combined
)
{
InitCombineMemory
();
}
else
{
InitMemory
();
}
}
};
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
3
)
{
std
::
cout
<<
"Usage: "
<<
argv
[
0
]
<<
" <combined-modle-dir> <output-dir>
\n
"
;
return
-
1
;
}
std
::
string
model_dir
=
argv
[
1
];
std
::
string
model_path
=
model_dir
+
"/model"
;
std
::
string
para_path
=
model_dir
+
"/params"
;
std
::
string
out_dir
=
argv
[
2
];
std
::
string
out_model_js
=
out_dir
+
"/model.js"
;
std
::
string
out_para_dir
=
out_dir
+
"/paras"
;
mkdir
(
out_dir
.
c_str
(),
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
mkdir
(
out_para_dir
.
c_str
(),
S_IRWXU
|
S_IRWXG
|
S_IRWXO
);
std
::
cout
<<
"loading "
<<
model_path
<<
" & "
<<
para_path
<<
"
\n
"
;
paddle_mobile
::
framework
::
Loader
<>
loader
;
auto
program
=
loader
.
Load
(
model_path
,
para_path
,
true
);
FakeExecutor
executor
(
program
);
auto
optimizedProgram
=
program
.
optimizeProgram
;
export_scope
(
optimizedProgram
,
program
.
scope
,
out_para_dir
);
std
::
ofstream
fs
(
out_model_js
.
c_str
());
export_nodejs
(
optimizedProgram
,
program
.
scope
,
fs
);
fs
.
close
();
return
0
;
}
tools/web-exporter/export.h
已删除
100644 → 0
浏览文件 @
761cc7f1
#pragma once
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include <ostream>
#include <fstream>
#include "framework/loader.h"
#include "framework/executor.h"
#include "framework/scope.h"
#include "framework/program/program_desc.h"
// using paddle_mobile::framework::ProgramDesc;
// using paddle_mobile::framework::Scope;
using
ProgramPtr
=
std
::
shared_ptr
<
paddle_mobile
::
framework
::
ProgramDesc
>
;
using
ScopePtr
=
std
::
shared_ptr
<
paddle_mobile
::
framework
::
Scope
>
;
void
export_nodejs
(
ProgramPtr
program
,
ScopePtr
scope
,
std
::
ostream
&
os
=
std
::
cout
);
void
export_scope
(
ProgramPtr
program
,
ScopePtr
scope
,
const
std
::
string
&
dirname
=
"."
);
template
<
typename
T
>
inline
std
::
string
var2str
(
const
T
&
v
)
{
return
std
::
to_string
(
v
);
}
template
<
>
inline
std
::
string
var2str
(
const
std
::
string
&
v
)
{
return
"
\"
"
+
v
+
"
\"
"
;
}
inline
std
::
string
var2str
(
const
char
*
v
)
{
return
var2str
<
std
::
string
>
(
v
);
}
inline
std
::
string
var2str
(
const
bool
v
)
{
return
v
?
"true"
:
"false"
;
}
template
<
typename
T
>
std
::
string
var2str
(
const
std
::
vector
<
T
>
&
v
)
{
std
::
string
r
=
"["
;
auto
s
=
v
.
size
();
for
(
int
i
=
0
;
i
<
s
;
i
++
)
{
if
(
i
)
r
+=
", "
;
r
+=
var2str
(
v
[
i
]);
}
return
r
+
"]"
;
}
struct
VarVisitor
{
using
type_t
=
decltype
(
var2str
(
0
));
template
<
typename
T
>
type_t
operator
()(
const
T
&
v
)
{
return
var2str
(
v
);
}
};
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录