Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
c74a68b7
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c74a68b7
编写于
10月 21, 2019
作者:
myq406450149
提交者:
GitHub
10月 21, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into gpu
上级
317bd91c
305130fc
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
377 addition
and
101 deletion
+377
-101
cmake/cross_compiling/android.cmake
cmake/cross_compiling/android.cmake
+1
-0
lite/api/cxx_api.cc
lite/api/cxx_api.cc
+24
-21
lite/api/cxx_api.h
lite/api/cxx_api.h
+4
-5
lite/api/cxx_api_impl.cc
lite/api/cxx_api_impl.cc
+4
-4
lite/api/light_api.cc
lite/api/light_api.cc
+24
-21
lite/api/light_api.h
lite/api/light_api.h
+4
-5
lite/api/light_api_impl.cc
lite/api/light_api_impl.cc
+4
-4
lite/api/light_api_test.cc
lite/api/light_api_test.cc
+4
-2
lite/api/paddle_api.h
lite/api/paddle_api.h
+2
-2
lite/api/paddle_api_test.cc
lite/api/paddle_api_test.cc
+6
-6
lite/core/device_info.cc
lite/core/device_info.cc
+23
-5
lite/core/types.cc
lite/core/types.cc
+4
-0
lite/core/types.h
lite/core/types.h
+6
-1
lite/kernels/arm/conv_compute.cc
lite/kernels/arm/conv_compute.cc
+10
-3
lite/kernels/cuda/CMakeLists.txt
lite/kernels/cuda/CMakeLists.txt
+2
-0
lite/kernels/cuda/pool_compute_test.cc
lite/kernels/cuda/pool_compute_test.cc
+3
-3
lite/kernels/cuda/softmax_compute_test.cc
lite/kernels/cuda/softmax_compute_test.cc
+134
-0
lite/kernels/cuda/yolo_box_compute_test.cc
lite/kernels/cuda/yolo_box_compute_test.cc
+6
-6
lite/model_parser/model_parser.cc
lite/model_parser/model_parser.cc
+2
-5
lite/model_parser/naive_buffer/naive_buffer.h
lite/model_parser/naive_buffer/naive_buffer.h
+65
-0
lite/model_parser/naive_buffer/param_desc.cc
lite/model_parser/naive_buffer/param_desc.cc
+7
-7
lite/model_parser/naive_buffer/proto/framework.nb.h
lite/model_parser/naive_buffer/proto/framework.nb.h
+1
-1
lite/operators/conv_op.cc
lite/operators/conv_op.cc
+32
-0
lite/operators/conv_op.h
lite/operators/conv_op.h
+5
-0
未找到文件。
cmake/cross_compiling/android.cmake
浏览文件 @
c74a68b7
...
@@ -18,6 +18,7 @@ endif()
...
@@ -18,6 +18,7 @@ endif()
set
(
ANDROID TRUE
)
set
(
ANDROID TRUE
)
add_definitions
(
-DLITE_WITH_LINUX
)
add_definitions
(
-DLITE_WITH_LINUX
)
add_definitions
(
-DLITE_WITH_ANDROID
)
if
(
NOT DEFINED ANDROID_NDK
)
if
(
NOT DEFINED ANDROID_NDK
)
set
(
ANDROID_NDK $ENV{NDK_ROOT}
)
set
(
ANDROID_NDK $ENV{NDK_ROOT}
)
...
...
lite/api/cxx_api.cc
浏览文件 @
c74a68b7
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "lite/api/cxx_api.h"
#include "lite/api/cxx_api.h"
#include <algorithm>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <utility>
#include <utility>
...
@@ -52,35 +53,36 @@ lite::Tensor *Predictor::GetInput(size_t offset) {
...
@@ -52,35 +53,36 @@ lite::Tensor *Predictor::GetInput(size_t offset) {
}
}
// get inputs names
// get inputs names
std
::
vector
<
std
::
string
>
Predictor
::
GetInputNames
()
{
const
std
::
vector
<
std
::
string
>
&
Predictor
::
GetInputNames
()
{
std
::
vector
<
std
::
string
>
input_names
;
return
input_names_
;
for
(
auto
&
item
:
input_names_
)
{
input_names
.
push_back
(
item
.
second
);
}
return
input_names
;
}
}
// get outputnames
// get outputnames
std
::
vector
<
std
::
string
>
Predictor
::
GetOutputNames
()
{
const
std
::
vector
<
std
::
string
>
&
Predictor
::
GetOutputNames
()
{
std
::
vector
<
std
::
string
>
output_names
;
return
output_names_
;
for
(
auto
&
item
:
output_names_
)
{
output_names
.
push_back
(
item
.
second
);
}
return
output_names
;
}
}
// append the names of inputs and outputs into input_names_ and output_names_
// append the names of inputs and outputs into input_names_ and output_names_
void
Predictor
::
PrepareFeedFetch
()
{
void
Predictor
::
PrepareFeedFetch
()
{
auto
current_block
=
program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
auto
current_block
=
program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
std
::
vector
<
cpp
::
OpDesc
*>
feeds
;
std
::
vector
<
cpp
::
OpDesc
*>
fetchs
;
for
(
int
i
=
0
;
i
<
current_block
->
OpsSize
();
i
++
)
{
for
(
int
i
=
0
;
i
<
current_block
->
OpsSize
();
i
++
)
{
auto
op
=
current_block
->
GetOp
<
cpp
::
OpDesc
>
(
i
);
auto
op
=
current_block
->
GetOp
<
cpp
::
OpDesc
>
(
i
);
if
(
op
->
Type
()
==
"feed"
)
{
if
(
op
->
Type
()
==
"feed"
)
{
int
idx
=
op
->
GetAttr
<
int
>
(
"col"
);
feeds
.
push_back
(
op
);
input_names_
[
idx
]
=
op
->
Output
(
"Out"
).
front
();
idx2feeds_
[
op
->
Output
(
"Out"
).
front
()]
=
idx
;
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
int
idx
=
op
->
GetAttr
<
int
>
(
"col"
);
fetchs
.
push_back
(
op
);
output_names_
[
idx
]
=
op
->
Input
(
"X"
).
front
();
}
}
}
}
input_names_
.
resize
(
feeds
.
size
());
output_names_
.
resize
(
fetchs
.
size
());
for
(
int
i
=
0
;
i
<
feeds
.
size
();
i
++
)
{
input_names_
[
feeds
[
i
]
->
GetAttr
<
int
>
(
"col"
)]
=
feeds
[
i
]
->
Output
(
"Out"
).
front
();
}
for
(
int
i
=
0
;
i
<
fetchs
.
size
();
i
++
)
{
output_names_
[
fetchs
[
i
]
->
GetAttr
<
int
>
(
"col"
)]
=
fetchs
[
i
]
->
Input
(
"X"
).
front
();
}
}
}
const
lite
::
Tensor
*
Predictor
::
GetOutput
(
size_t
offset
)
const
{
const
lite
::
Tensor
*
Predictor
::
GetOutput
(
size_t
offset
)
const
{
...
@@ -189,16 +191,17 @@ const lite::Tensor *Predictor::GetTensor(const std::string &name) const {
...
@@ -189,16 +191,17 @@ const lite::Tensor *Predictor::GetTensor(const std::string &name) const {
}
}
// get input by name
// get input by name
lite
::
Tensor
*
Predictor
::
GetInputByName
(
const
std
::
string
&
name
)
{
lite
::
Tensor
*
Predictor
::
GetInputByName
(
const
std
::
string
&
name
)
{
if
(
idx2feeds_
.
find
(
name
)
==
idx2feeds_
.
end
())
{
auto
element
=
std
::
find
(
input_names_
.
begin
(),
input_names_
.
end
(),
name
);
if
(
element
==
input_names_
.
end
())
{
LOG
(
ERROR
)
<<
"Model do not have input named with: ["
<<
name
LOG
(
ERROR
)
<<
"Model do not have input named with: ["
<<
name
<<
"], model's inputs include:"
;
<<
"], model's inputs include:"
;
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
LOG
(
ERROR
)
<<
"["
<<
input_names_
[
i
]
<<
"]"
;
LOG
(
ERROR
)
<<
"["
<<
input_names_
[
i
]
<<
"]"
;
}
}
return
NULL
;
return
nullptr
;
}
else
{
}
else
{
int
idx
=
idx2feeds_
[
name
]
;
int
position
=
std
::
distance
(
input_names_
.
begin
(),
element
)
;
return
GetInput
(
idx
);
return
GetInput
(
position
);
}
}
}
}
...
...
lite/api/cxx_api.h
浏览文件 @
c74a68b7
...
@@ -74,8 +74,8 @@ class LITE_API Predictor {
...
@@ -74,8 +74,8 @@ class LITE_API Predictor {
// get input by name.
// get input by name.
lite
::
Tensor
*
GetInputByName
(
const
std
::
string
&
name
);
lite
::
Tensor
*
GetInputByName
(
const
std
::
string
&
name
);
// get inputnames and get outputnames.
// get inputnames and get outputnames.
std
::
vector
<
std
::
string
>
GetInputNames
();
const
std
::
vector
<
std
::
string
>&
GetInputNames
();
std
::
vector
<
std
::
string
>
GetOutputNames
();
const
std
::
vector
<
std
::
string
>&
GetOutputNames
();
void
PrepareFeedFetch
();
void
PrepareFeedFetch
();
// Get offset-th col of fetch results.
// Get offset-th col of fetch results.
...
@@ -107,9 +107,8 @@ class LITE_API Predictor {
...
@@ -107,9 +107,8 @@ class LITE_API Predictor {
const
Scope
*
exec_scope_
;
const
Scope
*
exec_scope_
;
std
::
unique_ptr
<
RuntimeProgram
>
program_
;
std
::
unique_ptr
<
RuntimeProgram
>
program_
;
bool
program_generated_
{
false
};
bool
program_generated_
{
false
};
std
::
map
<
size_t
,
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
map
<
std
::
string
,
size_t
>
idx2feeds_
;
std
::
vector
<
std
::
string
>
output_names_
;
std
::
map
<
size_t
,
std
::
string
>
output_names_
;
};
};
/*
/*
...
...
lite/api/cxx_api_impl.cc
浏览文件 @
c74a68b7
...
@@ -37,8 +37,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
...
@@ -37,8 +37,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
std
::
string
GetVersion
()
const
override
;
std
::
string
GetVersion
()
const
override
;
// get inputs names and get outputs names
// get inputs names and get outputs names
std
::
vector
<
std
::
string
>
GetInputNames
()
override
;
const
std
::
vector
<
std
::
string
>
&
GetInputNames
()
override
;
std
::
vector
<
std
::
string
>
GetOutputNames
()
override
;
const
std
::
vector
<
std
::
string
>
&
GetOutputNames
()
override
;
std
::
unique_ptr
<
const
lite_api
::
Tensor
>
GetTensor
(
std
::
unique_ptr
<
const
lite_api
::
Tensor
>
GetTensor
(
const
std
::
string
&
name
)
const
override
;
const
std
::
string
&
name
)
const
override
;
...
@@ -76,11 +76,11 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
...
@@ -76,11 +76,11 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
return
std
::
unique_ptr
<
lite_api
::
Tensor
>
(
new
lite_api
::
Tensor
(
x
));
return
std
::
unique_ptr
<
lite_api
::
Tensor
>
(
new
lite_api
::
Tensor
(
x
));
}
}
std
::
vector
<
std
::
string
>
CxxPaddleApiImpl
::
GetInputNames
()
{
const
std
::
vector
<
std
::
string
>
&
CxxPaddleApiImpl
::
GetInputNames
()
{
return
raw_predictor_
.
GetInputNames
();
return
raw_predictor_
.
GetInputNames
();
}
}
std
::
vector
<
std
::
string
>
CxxPaddleApiImpl
::
GetOutputNames
()
{
const
std
::
vector
<
std
::
string
>
&
CxxPaddleApiImpl
::
GetOutputNames
()
{
return
raw_predictor_
.
GetOutputNames
();
return
raw_predictor_
.
GetOutputNames
();
}
}
...
...
lite/api/light_api.cc
浏览文件 @
c74a68b7
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "lite/api/light_api.h"
#include "lite/api/light_api.h"
#include <algorithm>
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -56,16 +57,17 @@ Tensor* LightPredictor::GetInput(size_t offset) {
...
@@ -56,16 +57,17 @@ Tensor* LightPredictor::GetInput(size_t offset) {
// get input by name
// get input by name
Tensor
*
LightPredictor
::
GetInputByName
(
const
std
::
string
&
name
)
{
Tensor
*
LightPredictor
::
GetInputByName
(
const
std
::
string
&
name
)
{
if
(
idx2feeds_
.
find
(
name
)
==
idx2feeds_
.
end
())
{
auto
element
=
std
::
find
(
input_names_
.
begin
(),
input_names_
.
end
(),
name
);
if
(
element
==
input_names_
.
end
())
{
LOG
(
ERROR
)
<<
"Model do not have input named with: ["
<<
name
LOG
(
ERROR
)
<<
"Model do not have input named with: ["
<<
name
<<
"], model's inputs include:"
;
<<
"], model's inputs include:"
;
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
LOG
(
ERROR
)
<<
"["
<<
input_names_
[
i
]
<<
"]"
;
LOG
(
ERROR
)
<<
"["
<<
input_names_
[
i
]
<<
"]"
;
}
}
return
NULL
;
return
nullptr
;
}
else
{
}
else
{
int
idx
=
idx2feeds_
[
name
]
;
int
position
=
std
::
distance
(
input_names_
.
begin
(),
element
)
;
return
GetInput
(
idx
);
return
GetInput
(
position
);
}
}
}
}
...
@@ -79,35 +81,36 @@ const Tensor* LightPredictor::GetOutput(size_t offset) {
...
@@ -79,35 +81,36 @@ const Tensor* LightPredictor::GetOutput(size_t offset) {
return
out_var
->
GetMutable
<
lite
::
Tensor
>
();
return
out_var
->
GetMutable
<
lite
::
Tensor
>
();
}
}
// get inputs names
// get inputs names
std
::
vector
<
std
::
string
>
LightPredictor
::
GetInputNames
()
{
const
std
::
vector
<
std
::
string
>&
LightPredictor
::
GetInputNames
()
{
std
::
vector
<
std
::
string
>
input_names
;
return
input_names_
;
for
(
auto
&
item
:
input_names_
)
{
input_names
.
push_back
(
item
.
second
);
}
return
input_names
;
}
}
// get outputnames
// get outputnames
std
::
vector
<
std
::
string
>
LightPredictor
::
GetOutputNames
()
{
const
std
::
vector
<
std
::
string
>&
LightPredictor
::
GetOutputNames
()
{
std
::
vector
<
std
::
string
>
output_names
;
return
output_names_
;
for
(
auto
&
item
:
output_names_
)
{
output_names
.
push_back
(
item
.
second
);
}
return
output_names
;
}
}
// append the names of inputs and outputs into input_names_ and output_names_
// append the names of inputs and outputs into input_names_ and output_names_
void
LightPredictor
::
PrepareFeedFetch
()
{
void
LightPredictor
::
PrepareFeedFetch
()
{
auto
current_block
=
cpp_program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
auto
current_block
=
cpp_program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
std
::
vector
<
cpp
::
OpDesc
*>
feeds
;
std
::
vector
<
cpp
::
OpDesc
*>
fetchs
;
for
(
int
i
=
0
;
i
<
current_block
->
OpsSize
();
i
++
)
{
for
(
int
i
=
0
;
i
<
current_block
->
OpsSize
();
i
++
)
{
auto
op
=
current_block
->
GetOp
<
cpp
::
OpDesc
>
(
i
);
auto
op
=
current_block
->
GetOp
<
cpp
::
OpDesc
>
(
i
);
if
(
op
->
Type
()
==
"feed"
)
{
if
(
op
->
Type
()
==
"feed"
)
{
int
idx
=
op
->
GetAttr
<
int
>
(
"col"
);
feeds
.
push_back
(
op
);
input_names_
[
idx
]
=
op
->
Output
(
"Out"
).
front
();
idx2feeds_
[
op
->
Output
(
"Out"
).
front
()]
=
idx
;
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
int
idx
=
op
->
GetAttr
<
int
>
(
"col"
);
fetchs
.
push_back
(
op
);
output_names_
[
idx
]
=
op
->
Input
(
"X"
).
front
();
}
}
}
}
input_names_
.
resize
(
feeds
.
size
());
output_names_
.
resize
(
fetchs
.
size
());
for
(
int
i
=
0
;
i
<
feeds
.
size
();
i
++
)
{
input_names_
[
feeds
[
i
]
->
GetAttr
<
int
>
(
"col"
)]
=
feeds
[
i
]
->
Output
(
"Out"
).
front
();
}
for
(
int
i
=
0
;
i
<
fetchs
.
size
();
i
++
)
{
output_names_
[
fetchs
[
i
]
->
GetAttr
<
int
>
(
"col"
)]
=
fetchs
[
i
]
->
Input
(
"X"
).
front
();
}
}
}
void
LightPredictor
::
BuildRuntimeProgram
(
const
cpp
::
ProgramDesc
&
prog
)
{
void
LightPredictor
::
BuildRuntimeProgram
(
const
cpp
::
ProgramDesc
&
prog
)
{
...
...
lite/api/light_api.h
浏览文件 @
c74a68b7
...
@@ -64,8 +64,8 @@ class LITE_API LightPredictor {
...
@@ -64,8 +64,8 @@ class LITE_API LightPredictor {
}
}
// get inputnames and get outputnames.
// get inputnames and get outputnames.
std
::
vector
<
std
::
string
>
GetInputNames
();
const
std
::
vector
<
std
::
string
>&
GetInputNames
();
std
::
vector
<
std
::
string
>
GetOutputNames
();
const
std
::
vector
<
std
::
string
>&
GetOutputNames
();
void
PrepareFeedFetch
();
void
PrepareFeedFetch
();
private:
private:
...
@@ -82,9 +82,8 @@ class LITE_API LightPredictor {
...
@@ -82,9 +82,8 @@ class LITE_API LightPredictor {
std
::
shared_ptr
<
Scope
>
scope_
;
std
::
shared_ptr
<
Scope
>
scope_
;
std
::
unique_ptr
<
RuntimeProgram
>
program_
;
std
::
unique_ptr
<
RuntimeProgram
>
program_
;
cpp
::
ProgramDesc
cpp_program_desc_
;
cpp
::
ProgramDesc
cpp_program_desc_
;
std
::
map
<
size_t
,
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
map
<
std
::
string
,
size_t
>
idx2feeds_
;
std
::
vector
<
std
::
string
>
output_names_
;
std
::
map
<
size_t
,
std
::
string
>
output_names_
;
};
};
}
// namespace lite
}
// namespace lite
...
...
lite/api/light_api_impl.cc
浏览文件 @
c74a68b7
...
@@ -32,8 +32,8 @@ class LightPredictorImpl : public PaddlePredictor {
...
@@ -32,8 +32,8 @@ class LightPredictorImpl : public PaddlePredictor {
void
Run
()
override
;
void
Run
()
override
;
std
::
string
GetVersion
()
const
override
;
std
::
string
GetVersion
()
const
override
;
std
::
vector
<
std
::
string
>
GetInputNames
()
override
;
const
std
::
vector
<
std
::
string
>&
GetInputNames
()
override
;
std
::
vector
<
std
::
string
>
GetOutputNames
()
override
;
const
std
::
vector
<
std
::
string
>&
GetOutputNames
()
override
;
std
::
unique_ptr
<
const
Tensor
>
GetTensor
(
std
::
unique_ptr
<
const
Tensor
>
GetTensor
(
const
std
::
string
&
name
)
const
override
;
const
std
::
string
&
name
)
const
override
;
...
@@ -78,11 +78,11 @@ std::unique_ptr<Tensor> LightPredictorImpl::GetInputByName(
...
@@ -78,11 +78,11 @@ std::unique_ptr<Tensor> LightPredictorImpl::GetInputByName(
new
Tensor
(
raw_predictor_
->
GetInputByName
(
name
)));
new
Tensor
(
raw_predictor_
->
GetInputByName
(
name
)));
}
}
std
::
vector
<
std
::
string
>
LightPredictorImpl
::
GetInputNames
()
{
const
std
::
vector
<
std
::
string
>&
LightPredictorImpl
::
GetInputNames
()
{
return
raw_predictor_
->
GetInputNames
();
return
raw_predictor_
->
GetInputNames
();
}
}
std
::
vector
<
std
::
string
>
LightPredictorImpl
::
GetOutputNames
()
{
const
std
::
vector
<
std
::
string
>&
LightPredictorImpl
::
GetOutputNames
()
{
return
raw_predictor_
->
GetOutputNames
();
return
raw_predictor_
->
GetOutputNames
();
}
}
...
...
lite/api/light_api_test.cc
浏览文件 @
c74a68b7
...
@@ -36,12 +36,14 @@ TEST(LightAPI, load) {
...
@@ -36,12 +36,14 @@ TEST(LightAPI, load) {
data
[
i
]
=
i
;
data
[
i
]
=
i
;
}
}
std
::
vector
<
std
::
string
>
inputs
=
predictor
.
GetInputNames
();
predictor
.
PrepareFeedFetch
();
const
std
::
vector
<
std
::
string
>&
inputs
=
predictor
.
GetInputNames
();
LOG
(
INFO
)
<<
"input size: "
<<
inputs
.
size
();
LOG
(
INFO
)
<<
"input size: "
<<
inputs
.
size
();
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"inputnames: "
<<
inputs
[
i
];
LOG
(
INFO
)
<<
"inputnames: "
<<
inputs
[
i
];
}
}
std
::
vector
<
std
::
string
>
outputs
=
predictor
.
GetOutputNames
();
const
std
::
vector
<
std
::
string
>&
outputs
=
predictor
.
GetOutputNames
();
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"outputnames: "
<<
outputs
[
i
];
LOG
(
INFO
)
<<
"outputnames: "
<<
outputs
[
i
];
}
}
...
...
lite/api/paddle_api.h
浏览文件 @
c74a68b7
...
@@ -75,9 +75,9 @@ class LITE_API PaddlePredictor {
...
@@ -75,9 +75,9 @@ class LITE_API PaddlePredictor {
virtual
std
::
string
GetVersion
()
const
=
0
;
virtual
std
::
string
GetVersion
()
const
=
0
;
// Get input names
// Get input names
virtual
std
::
vector
<
std
::
string
>
GetInputNames
()
=
0
;
virtual
const
std
::
vector
<
std
::
string
>&
GetInputNames
()
=
0
;
// Get output names
// Get output names
virtual
std
::
vector
<
std
::
string
>
GetOutputNames
()
=
0
;
virtual
const
std
::
vector
<
std
::
string
>&
GetOutputNames
()
=
0
;
// Get Input by name
// Get Input by name
virtual
std
::
unique_ptr
<
Tensor
>
GetInputByName
(
const
std
::
string
&
name
)
=
0
;
virtual
std
::
unique_ptr
<
Tensor
>
GetInputByName
(
const
std
::
string
&
name
)
=
0
;
...
...
lite/api/paddle_api_test.cc
浏览文件 @
c74a68b7
...
@@ -37,12 +37,12 @@ TEST(CxxApi, run) {
...
@@ -37,12 +37,12 @@ TEST(CxxApi, run) {
LOG
(
INFO
)
<<
"Version: "
<<
predictor
->
GetVersion
();
LOG
(
INFO
)
<<
"Version: "
<<
predictor
->
GetVersion
();
std
::
vector
<
std
::
string
>
inputs
=
predictor
->
GetInputNames
();
auto
&
inputs
=
predictor
->
GetInputNames
();
LOG
(
INFO
)
<<
"input size: "
<<
inputs
.
size
();
LOG
(
INFO
)
<<
"input size: "
<<
inputs
.
size
();
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"inputnames: "
<<
inputs
[
i
];
LOG
(
INFO
)
<<
"inputnames: "
<<
inputs
[
i
];
}
}
std
::
vector
<
std
::
string
>
outputs
=
predictor
->
GetOutputNames
();
auto
&
outputs
=
predictor
->
GetOutputNames
();
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"outputnames: "
<<
outputs
[
i
];
LOG
(
INFO
)
<<
"outputnames: "
<<
outputs
[
i
];
}
}
...
@@ -76,14 +76,14 @@ TEST(LightApi, run) {
...
@@ -76,14 +76,14 @@ TEST(LightApi, run) {
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
std
::
vector
<
std
::
string
>
inputs
=
predictor
->
GetInputNames
();
auto
&
inputs
=
predictor
->
GetInputNames
();
LOG
(
INFO
)
<<
"input size: "
<<
inputs
.
size
();
LOG
(
INFO
)
<<
"input size: "
<<
inputs
.
size
();
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"inputnames: "
<<
inputs
[
i
]
;
LOG
(
INFO
)
<<
"inputnames: "
<<
inputs
.
at
(
i
)
;
}
}
std
::
vector
<
std
::
string
>
outputs
=
predictor
->
GetOutputNames
();
auto
&
outputs
=
predictor
->
GetOutputNames
();
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"outputnames: "
<<
outputs
[
i
]
;
LOG
(
INFO
)
<<
"outputnames: "
<<
outputs
.
at
(
i
)
;
}
}
LOG
(
INFO
)
<<
"Version: "
<<
predictor
->
GetVersion
();
LOG
(
INFO
)
<<
"Version: "
<<
predictor
->
GetVersion
();
...
...
lite/core/device_info.cc
浏览文件 @
c74a68b7
...
@@ -35,6 +35,9 @@
...
@@ -35,6 +35,9 @@
#include <sys/syscall.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <unistd.h>
#endif
#endif
#ifdef LITE_WITH_ANDROID
#include <sys/system_properties.h>
#endif
#if __APPLE__
#if __APPLE__
#include "TargetConditionals.h"
#include "TargetConditionals.h"
#if LITE_WITH_IPHONE
#if LITE_WITH_IPHONE
...
@@ -218,6 +221,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
...
@@ -218,6 +221,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
std
::
string
get_cpu_name
()
{
std
::
string
get_cpu_name
()
{
std
::
string
cpu_name
;
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
FILE
*
fp
=
fopen
(
"/proc/cpuinfo"
,
"rb"
);
if
(
!
fp
)
{
if
(
!
fp
)
{
return
""
;
return
""
;
...
@@ -229,12 +233,23 @@ std::string get_cpu_name() {
...
@@ -229,12 +233,23 @@ std::string get_cpu_name() {
break
;
break
;
}
}
if
(
strstr
(
line
,
"Hardware"
)
!=
NULL
)
{
if
(
strstr
(
line
,
"Hardware"
)
!=
NULL
)
{
fclose
(
fp
);
cpu_name
=
std
::
string
(
line
);
return
std
::
string
(
line
);
}
}
}
}
#ifdef LITE_WITH_ANDROID
// cpu name concat board name, platform name and chip name
char
board_name
[
128
];
char
platform_name
[
128
];
char
chip_name
[
128
];
__system_property_get
(
"ro.product.board"
,
board_name
);
__system_property_get
(
"ro.board.platform"
,
platform_name
);
__system_property_get
(
"ro.chipname"
,
chip_name
);
cpu_name
=
cpu_name
+
"_"
+
board_name
+
"_"
+
platform_name
+
"_"
+
chip_name
;
#endif
std
::
transform
(
cpu_name
.
begin
(),
cpu_name
.
end
(),
cpu_name
.
begin
(),
::
toupper
);
fclose
(
fp
);
fclose
(
fp
);
return
""
;
return
cpu_name
;
}
}
int
get_min_freq_khz
(
int
cpuid
)
{
int
get_min_freq_khz
(
int
cpuid
)
{
...
@@ -780,7 +795,9 @@ bool DeviceInfo::SetCPUInfoByName() {
...
@@ -780,7 +795,9 @@ bool DeviceInfo::SetCPUInfoByName() {
cluster_ids_
=
{
0
,
0
,
0
,
0
};
cluster_ids_
=
{
0
,
0
,
0
,
0
};
SetArchInfo
(
1
,
kA53
);
SetArchInfo
(
1
,
kA53
);
return
true
;
return
true
;
}
else
if
(
dev_name_
.
find
(
"KIRIN980"
)
!=
std
::
string
::
npos
)
{
// Kirin 980
}
else
if
(
dev_name_
.
find
(
"KIRIN980"
)
!=
std
::
string
::
npos
||
dev_name_
.
find
(
"KIRIN990"
)
!=
std
::
string
::
npos
)
{
// Kirin 980, Kirin 990
core_num_
=
8
;
core_num_
=
8
;
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
core_ids_
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
big_core_ids_
=
{
4
,
5
,
6
,
7
};
...
@@ -1109,7 +1126,8 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
...
@@ -1109,7 +1126,8 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
}
}
bool
DeviceInfo
::
ExtendWorkspace
(
size_t
size
)
{
bool
DeviceInfo
::
ExtendWorkspace
(
size_t
size
)
{
workspace_
.
Resize
({
size
+
llc_size
()});
workspace_
.
Resize
(
{
static_cast
<
int64_t
>
(
size
+
static_cast
<
size_t
>
(
llc_size
()))});
return
workspace_
.
mutable_data
<
int8_t
>
()
!=
nullptr
;
return
workspace_
.
mutable_data
<
int8_t
>
()
!=
nullptr
;
}
}
...
...
lite/core/types.cc
浏览文件 @
c74a68b7
...
@@ -82,6 +82,10 @@ Type StdTypeToRepr<double>() {
...
@@ -82,6 +82,10 @@ Type StdTypeToRepr<double>() {
return
Type
::
_float64
;
return
Type
::
_float64
;
}
}
template
<
>
template
<
>
Type
StdTypeToRepr
<
std
::
vector
<
char
>>
()
{
return
Type
::
_char_list
;
}
template
<
>
Type
StdTypeToRepr
<
std
::
string
>
()
{
Type
StdTypeToRepr
<
std
::
string
>
()
{
return
Type
::
_string
;
return
Type
::
_string
;
}
}
...
...
lite/core/types.h
浏览文件 @
c74a68b7
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <stack>
#include <stack>
#include <string>
#include <string>
#include <vector>
#include "lite/api/paddle_place.h"
#include "lite/api/paddle_place.h"
#include "lite/utils/all.h"
#include "lite/utils/all.h"
...
@@ -36,7 +37,9 @@ enum class Type {
...
@@ -36,7 +37,9 @@ enum class Type {
_float64
,
_float64
,
_bool
,
_bool
,
_string
,
_string
,
// primary list types
// primary list type
_char_list
,
// list types
_list
,
_list
,
// enum type
// enum type
_enum
,
_enum
,
...
@@ -89,6 +92,8 @@ Type StdTypeToRepr<float>();
...
@@ -89,6 +92,8 @@ Type StdTypeToRepr<float>();
template
<
>
template
<
>
Type
StdTypeToRepr
<
bool
>
();
Type
StdTypeToRepr
<
bool
>
();
template
<
>
template
<
>
Type
StdTypeToRepr
<
std
::
vector
<
char
>>
();
template
<
>
Type
StdTypeToRepr
<
std
::
string
>
();
Type
StdTypeToRepr
<
std
::
string
>
();
// Factors that impact the kernel picking strategy. Multiple factors can be
// Factors that impact the kernel picking strategy. Multiple factors can be
...
...
lite/kernels/arm/conv_compute.cc
浏览文件 @
c74a68b7
...
@@ -39,6 +39,13 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
...
@@ -39,6 +39,13 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
int
pad
=
param
.
paddings
[
0
];
int
pad
=
param
.
paddings
[
0
];
int
stride
=
param
.
strides
[
0
];
int
stride
=
param
.
strides
[
0
];
int
chin
=
param
.
x
->
dims
()[
1
];
int
hin
=
param
.
x
->
dims
()[
2
];
int
win
=
param
.
x
->
dims
()[
3
];
int
chout
=
param
.
output
->
dims
()[
1
];
int
hout
=
param
.
output
->
dims
()[
2
];
int
wout
=
param
.
output
->
dims
()[
3
];
bool
kps_equal
=
(
param
.
paddings
[
0
]
==
param
.
paddings
[
1
])
&&
bool
kps_equal
=
(
param
.
paddings
[
0
]
==
param
.
paddings
[
1
])
&&
(
param
.
strides
[
0
]
==
param
.
strides
[
1
])
&&
(
kw
==
kh
);
(
param
.
strides
[
0
]
==
param
.
strides
[
1
])
&&
(
kw
==
kh
);
bool
no_dilation
=
(
param
.
dilations
[
0
]
==
1
)
&&
(
param
.
dilations
[
1
]
==
1
);
bool
no_dilation
=
(
param
.
dilations
[
0
]
==
1
)
&&
(
param
.
dilations
[
1
]
==
1
);
...
@@ -54,7 +61,7 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
...
@@ -54,7 +61,7 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
VLOG
(
3
)
<<
"invoking dw conv"
;
VLOG
(
3
)
<<
"invoking dw conv"
;
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
stride
==
1
&&
kps_equal
&&
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
stride
==
1
&&
kps_equal
&&
no_dilation
)
{
no_dilation
)
{
if
(
ic
>=
32
&&
oc
>=
32
)
{
if
(
ic
>=
32
&&
oc
>=
32
&&
hout
>
16
&&
wout
>
16
)
{
/// winograd conv impl
/// winograd conv impl
impl_
=
new
WinogradConv
<
PRECISION
(
kFloat
),
PRECISION
(
kFloat
)
>
;
impl_
=
new
WinogradConv
<
PRECISION
(
kFloat
),
PRECISION
(
kFloat
)
>
;
VLOG
(
3
)
<<
"invoking winograd conv"
;
VLOG
(
3
)
<<
"invoking winograd conv"
;
...
@@ -63,8 +70,8 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
...
@@ -63,8 +70,8 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
impl_
=
new
DirectConv
<
PRECISION
(
kFloat
),
PRECISION
(
kFloat
)
>
;
impl_
=
new
DirectConv
<
PRECISION
(
kFloat
),
PRECISION
(
kFloat
)
>
;
VLOG
(
3
)
<<
"invoking direct conv"
;
VLOG
(
3
)
<<
"invoking direct conv"
;
}
}
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
stride
==
2
&&
kps_equal
&&
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
stride
==
2
&&
no_dilation
)
{
chin
*
chout
<
4
*
hin
*
win
&&
kps_equal
&&
no_dilation
)
{
/// direct conv impl
/// direct conv impl
impl_
=
new
DirectConv
<
PRECISION
(
kFloat
),
PRECISION
(
kFloat
)
>
;
impl_
=
new
DirectConv
<
PRECISION
(
kFloat
),
PRECISION
(
kFloat
)
>
;
VLOG
(
3
)
<<
"invoking direct conv"
;
VLOG
(
3
)
<<
"invoking direct conv"
;
...
...
lite/kernels/cuda/CMakeLists.txt
浏览文件 @
c74a68b7
...
@@ -32,6 +32,8 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c
...
@@ -32,6 +32,8 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c
nv_test
(
transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda
)
nv_test
(
transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda
)
nv_test
(
concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda
)
nv_test
(
concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda
)
nv_test
(
elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda
)
nv_test
(
elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda
)
nv_test
(
softmax_compute_cuda_test SRCS softmax_compute_test.cc DEPS softmax_compute_cuda
)
nv_test
(
pool_compute_cuda_test SRCS pool_compute_test.cc DEPS pool_compute_cuda
)
#nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda)
#nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda)
nv_test
(
mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda
)
nv_test
(
mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda
)
nv_test
(
dropout_compute_cuda_test SRCS dropout_compute_test.cc DEPS dropout_compute_cuda
)
nv_test
(
dropout_compute_cuda_test SRCS dropout_compute_test.cc DEPS dropout_compute_cuda
)
...
...
lite/kernels/cuda/pool_compute_test.cc
浏览文件 @
c74a68b7
...
@@ -194,9 +194,9 @@ TEST(pool_cuda, compute) {
...
@@ -194,9 +194,9 @@ TEST(pool_cuda, compute) {
for
(
auto
stride
:
{
1
,
2
})
{
for
(
auto
stride
:
{
1
,
2
})
{
for
(
auto
pad
:
{
0
,
1
})
{
for
(
auto
pad
:
{
0
,
1
})
{
for
(
auto
n
:
{
1
,
2
})
{
for
(
auto
n
:
{
1
,
2
})
{
for
(
auto
c
:
{
1
,
3
,
256
})
{
for
(
auto
c
:
{
1
,
3
})
{
for
(
auto
h
:
{
2
,
3
,
4
,
6
,
13
})
{
for
(
auto
h
:
{
2
,
3
,
4
,
11
})
{
for
(
auto
w
:
{
2
,
3
,
4
,
6
,
13
})
{
for
(
auto
w
:
{
2
,
3
,
4
,
11
})
{
VLOG
(
3
)
<<
"n:"
<<
n
<<
" c:"
<<
c
<<
" h:"
<<
h
VLOG
(
3
)
<<
"n:"
<<
n
<<
" c:"
<<
c
<<
" h:"
<<
h
<<
" w:"
<<
w
<<
" ksize:"
<<
ksize
<<
" w:"
<<
w
<<
" ksize:"
<<
ksize
<<
" stride:"
<<
stride
<<
" pad:"
<<
pad
<<
" stride:"
<<
stride
<<
" pad:"
<<
pad
...
...
lite/kernels/cuda/softmax_compute_test.cc
0 → 100644
浏览文件 @
c74a68b7
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/cuda/softmax_compute.h"
#include <gtest/gtest.h>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
cuda
{
using
Tensor
=
lite
::
Tensor
;
using
DDim
=
lite
::
DDim
;
template
<
typename
dtype
>
static
void
softmax_compute_ref
(
const
operators
::
SoftmaxParam
&
param
)
{
const
dtype
*
x_data
=
param
.
x
->
mutable_data
<
const
dtype
>
();
dtype
*
output_data
=
param
.
output
->
mutable_data
<
dtype
>
();
DDim
x_dims
=
param
.
x
->
dims
();
ASSERT_EQ
(
x_dims
.
data
(),
param
.
output
->
dims
().
data
());
auto
x_rank
=
x_dims
.
size
();
int
axis
=
param
.
axis
;
if
(
axis
<
0
)
{
axis
+=
x_rank
;
}
int
axis_size
=
x_dims
[
axis
];
int
outer_num
=
x_dims
.
Slice
(
0
,
axis
).
production
();
int
inner_num
=
x_dims
.
Slice
(
axis
+
1
,
x_rank
).
production
();
int
compute_size
=
outer_num
*
inner_num
;
for
(
int
i
=
0
;
i
<
compute_size
;
i
++
)
{
int
idx_inner
=
i
%
inner_num
;
int
idx_outer
=
(
i
/
inner_num
)
*
axis_size
;
int
start
=
idx_outer
*
inner_num
+
idx_inner
;
int
offset
;
offset
=
start
;
dtype
max_data
=
std
::
numeric_limits
<
dtype
>::
lowest
();
for
(
int
j
=
0
;
j
<
axis_size
;
j
++
)
{
max_data
=
x_data
[
offset
]
>
max_data
?
x_data
[
offset
]
:
max_data
;
offset
+=
inner_num
;
}
offset
=
start
;
dtype
sum_data
=
(
dtype
)
0
;
for
(
int
j
=
0
;
j
<
axis_size
;
j
++
)
{
output_data
[
offset
]
=
exp
(
x_data
[
offset
]
-
max_data
);
sum_data
+=
output_data
[
offset
];
offset
+=
inner_num
;
}
offset
=
start
;
for
(
int
j
=
0
;
j
<
axis_size
;
j
++
)
{
output_data
[
offset
]
/=
sum_data
;
offset
+=
inner_num
;
}
}
}
TEST
(
softmax_cuda
,
compute
)
{
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
auto
&
context
=
ctx
->
As
<
CUDAContext
>
();
cudaStream_t
stream
;
cudaStreamCreate
(
&
stream
);
context
.
SetExecStream
(
stream
);
SoftmaxCompute
softmax
;
operators
::
SoftmaxParam
param
;
softmax
.
SetContext
(
std
::
move
(
ctx
));
lite
::
Tensor
x
;
lite
::
Tensor
x_cpu
;
lite
::
Tensor
output
;
lite
::
Tensor
output_cpu
;
lite
::
Tensor
output_ref
;
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
1
,
4
})
{
for
(
auto
h
:
{
5
,
1
,
112
})
{
for
(
auto
w
:
{
1
,
6
,
112
})
{
for
(
auto
axis
:
{
-
2
,
-
1
,
0
,
1
,
2
})
{
x
.
Resize
({
n
,
c
,
h
,
w
});
x_cpu
.
Resize
({
n
,
c
,
h
,
w
});
output
.
Resize
({
n
,
c
,
h
,
w
});
output_cpu
.
Resize
({
n
,
c
,
h
,
w
});
output_ref
.
Resize
({
n
,
c
,
h
,
w
});
auto
*
x_cpu_data
=
x_cpu
.
mutable_data
<
float
>
();
auto
*
output_data
=
output
.
mutable_data
<
float
>
(
TARGET
(
kCUDA
));
auto
*
output_cpu_data
=
output_ref
.
mutable_data
<
float
>
();
auto
*
output_ref_data
=
output_ref
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_cpu_data
[
i
]
=
i
;
}
x
.
Assign
<
float
,
lite
::
DDim
,
TARGET
(
kCUDA
)
>
(
x_cpu_data
,
x_cpu
.
dims
());
param
.
x
=
&
x
;
param
.
axis
=
axis
;
param
.
output
=
&
output
;
softmax
.
SetParam
(
param
);
softmax
.
Launch
();
param
.
x
=
&
x_cpu
;
param
.
output
=
&
output_ref
;
softmax_compute_ref
<
float
>
(
param
);
cudaDeviceSynchronize
();
CopySync
<
TARGET
(
kCUDA
)
>
(
output_cpu_data
,
output_data
,
sizeof
(
float
)
*
output
.
numel
(),
IoDirection
::
DtoH
);
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
output_cpu_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
}
}
}
}
}
}
// namespace cuda
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/cuda/yolo_box_compute_test.cc
浏览文件 @
c74a68b7
...
@@ -89,7 +89,7 @@ inline static void calc_label_score(float* scores,
...
@@ -89,7 +89,7 @@ inline static void calc_label_score(float* scores,
template
<
typename
T
>
template
<
typename
T
>
static
void
YoloBoxRef
(
const
T
*
input
,
static
void
YoloBoxRef
(
const
T
*
input
,
const
T
*
imgsize
,
const
int
*
imgsize
,
T
*
boxes
,
T
*
boxes
,
T
*
scores
,
T
*
scores
,
const
float
conf_thresh
,
const
float
conf_thresh
,
...
@@ -106,8 +106,8 @@ static void YoloBoxRef(const T* input,
...
@@ -106,8 +106,8 @@ static void YoloBoxRef(const T* input,
float
box
[
4
];
float
box
[
4
];
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
int
img_height
=
static_cast
<
int
>
(
imgsize
[
2
*
i
])
;
int
img_height
=
imgsize
[
2
*
i
]
;
int
img_width
=
static_cast
<
int
>
(
imgsize
[
2
*
i
+
1
])
;
int
img_width
=
imgsize
[
2
*
i
+
1
]
;
for
(
int
j
=
0
;
j
<
an_num
;
j
++
)
{
for
(
int
j
=
0
;
j
<
an_num
;
j
++
)
{
for
(
int
k
=
0
;
k
<
h
;
k
++
)
{
for
(
int
k
=
0
;
k
<
h
;
k
++
)
{
...
@@ -184,12 +184,12 @@ TEST(yolo_box, normal) {
...
@@ -184,12 +184,12 @@ TEST(yolo_box, normal) {
auto
*
scores_data
=
scores
.
mutable_data
<
float
>
(
TARGET
(
kCUDA
));
auto
*
scores_data
=
scores
.
mutable_data
<
float
>
(
TARGET
(
kCUDA
));
float
*
x_cpu_data
=
x_cpu
.
mutable_data
<
float
>
();
float
*
x_cpu_data
=
x_cpu
.
mutable_data
<
float
>
();
float
*
sz_cpu_data
=
sz_cpu
.
mutable_data
<
floa
t
>
();
int
*
sz_cpu_data
=
sz_cpu
.
mutable_data
<
in
t
>
();
float
*
boxes_cpu_data
=
boxes_cpu
.
mutable_data
<
float
>
();
float
*
boxes_cpu_data
=
boxes_cpu
.
mutable_data
<
float
>
();
float
*
scores_cpu_data
=
scores_cpu
.
mutable_data
<
float
>
();
float
*
scores_cpu_data
=
scores_cpu
.
mutable_data
<
float
>
();
float
*
x_ref_data
=
x_ref
.
mutable_data
<
float
>
();
float
*
x_ref_data
=
x_ref
.
mutable_data
<
float
>
();
float
*
sz_ref_data
=
sz_ref
.
mutable_data
<
floa
t
>
();
int
*
sz_ref_data
=
sz_ref
.
mutable_data
<
in
t
>
();
float
*
boxes_ref_data
=
boxes_ref
.
mutable_data
<
float
>
();
float
*
boxes_ref_data
=
boxes_ref
.
mutable_data
<
float
>
();
float
*
scores_ref_data
=
scores_ref
.
mutable_data
<
float
>
();
float
*
scores_ref_data
=
scores_ref
.
mutable_data
<
float
>
();
...
@@ -203,7 +203,7 @@ TEST(yolo_box, normal) {
...
@@ -203,7 +203,7 @@ TEST(yolo_box, normal) {
sz_ref_data
[
1
]
=
32
;
sz_ref_data
[
1
]
=
32
;
x
.
Assign
<
float
,
lite
::
DDim
,
TARGET
(
kCUDA
)
>
(
x_cpu_data
,
x_cpu
.
dims
());
x
.
Assign
<
float
,
lite
::
DDim
,
TARGET
(
kCUDA
)
>
(
x_cpu_data
,
x_cpu
.
dims
());
sz
.
Assign
<
floa
t
,
lite
::
DDim
,
TARGET
(
kCUDA
)
>
(
sz_cpu_data
,
sz_cpu
.
dims
());
sz
.
Assign
<
in
t
,
lite
::
DDim
,
TARGET
(
kCUDA
)
>
(
sz_cpu_data
,
sz_cpu
.
dims
());
param
.
X
=
&
x
;
param
.
X
=
&
x
;
param
.
ImgSize
=
&
sz
;
param
.
ImgSize
=
&
sz
;
...
...
lite/model_parser/model_parser.cc
浏览文件 @
c74a68b7
...
@@ -727,10 +727,8 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer,
...
@@ -727,10 +727,8 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer,
// Load model
// Load model
std
::
string
prog_path
=
model_buffer
;
naive_buffer
::
BinaryTable
table
;
naive_buffer
::
BinaryTable
table
;
table
.
LoadFromMemory
(
prog_path
.
c_str
(),
prog_path
.
length
());
table
.
LoadFromMemory
(
model_buffer
.
c_str
(),
model_buffer
.
length
());
naive_buffer
::
proto
::
ProgramDesc
nb_proto_prog
(
&
table
);
naive_buffer
::
proto
::
ProgramDesc
nb_proto_prog
(
&
table
);
nb_proto_prog
.
Load
();
nb_proto_prog
.
Load
();
...
@@ -742,8 +740,7 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer,
...
@@ -742,8 +740,7 @@ void LoadModelNaiveFromMemory(const std::string &model_buffer,
// Load Params
// Load Params
// NOTE: Only main block be used now.
// NOTE: Only main block be used now.
// only combined Params are supported in Loading Model from memory
// only combined Params are supported in Loading Model from memory
std
::
string
combined_params_path
=
param_buffer
;
LoadCombinedParamsNaive
(
param_buffer
,
scope
,
*
cpp_prog
,
true
);
LoadCombinedParamsNaive
(
combined_params_path
,
scope
,
*
cpp_prog
,
true
);
VLOG
(
4
)
<<
"Load model from naive buffer memory successfully"
;
VLOG
(
4
)
<<
"Load model from naive buffer memory successfully"
;
}
}
...
...
lite/model_parser/naive_buffer/naive_buffer.h
浏览文件 @
c74a68b7
...
@@ -126,6 +126,41 @@ using UInt64Builder = PrimaryBuilder<uint64_t>;
...
@@ -126,6 +126,41 @@ using UInt64Builder = PrimaryBuilder<uint64_t>;
using
Float32Builder
=
PrimaryBuilder
<
float
>
;
using
Float32Builder
=
PrimaryBuilder
<
float
>
;
using
Float64Builder
=
PrimaryBuilder
<
double
>
;
using
Float64Builder
=
PrimaryBuilder
<
double
>
;
template
<
typename
Primary
>
class
PrimaryListBuilder
:
public
FieldBuilder
{
std
::
vector
<
Primary
>
data_
;
public:
using
value_type
=
Primary
;
explicit
PrimaryListBuilder
(
BinaryTable
*
table
)
:
FieldBuilder
(
table
)
{}
PrimaryListBuilder
(
BinaryTable
*
table
,
const
std
::
vector
<
Primary
>&
val
)
:
FieldBuilder
(
table
),
data_
(
val
)
{}
/// Set data.
void
set
(
const
std
::
vector
<
Primary
>&
x
)
{
data_
=
x
;
}
const
std
::
vector
<
Primary
>&
data
()
const
{
return
data_
;
}
/// Save information to the corresponding BinaryTable.
void
Save
()
override
;
/// Load information from the corresponding BinaryTable.
void
Load
()
override
;
/// Number of elements.
size_t
size
()
const
{
return
data_
.
size
();
}
Type
type
()
const
override
{
return
core
::
StdTypeToRepr
<
std
::
vector
<
Primary
>>
();
}
/// clear builder
void
Clear
()
{
data_
.
clear
();
}
~
PrimaryListBuilder
()
=
default
;
};
/*
/*
* Builder for all the primary types. int32, float, bool and so on.
* Builder for all the primary types. int32, float, bool and so on.
*/
*/
...
@@ -344,6 +379,36 @@ void PrimaryBuilder<Primary>::Load() {
...
@@ -344,6 +379,36 @@ void PrimaryBuilder<Primary>::Load() {
table
()
->
Consume
(
sizeof
(
value_type
));
table
()
->
Consume
(
sizeof
(
value_type
));
}
}
template
<
typename
Primary
>
void
PrimaryListBuilder
<
Primary
>::
Load
()
{
CHECK
(
data_
.
empty
())
<<
"Duplicate load"
;
// Load number of elements first.
uint64_t
num_elems
{};
memcpy
(
&
num_elems
,
table
()
->
cursor
(),
sizeof
(
uint64_t
));
table
()
->
Consume
(
sizeof
(
uint64_t
));
data_
.
resize
(
num_elems
);
for
(
uint64_t
i
=
0
;
i
<
num_elems
;
i
++
)
{
memcpy
(
&
data_
[
i
],
table
()
->
cursor
(),
sizeof
(
value_type
));
table
()
->
Consume
(
sizeof
(
value_type
));
}
}
template
<
typename
Primary
>
void
PrimaryListBuilder
<
Primary
>::
Save
()
{
// store number of elements in the head.
uint64_t
num_elems
=
size
();
table
()
->
Require
(
sizeof
(
uint64_t
));
memcpy
(
table
()
->
cursor
(),
&
num_elems
,
sizeof
(
uint64_t
));
table
()
->
Consume
(
sizeof
(
uint64_t
));
table
()
->
Require
(
num_elems
*
sizeof
(
value_type
));
memcpy
(
table
()
->
cursor
(),
reinterpret_cast
<
byte_t
*>
(
&
data_
[
0
]),
num_elems
*
sizeof
(
value_type
));
table
()
->
Consume
(
num_elems
*
sizeof
(
value_type
));
}
template
<
typename
EnumType
>
template
<
typename
EnumType
>
void
EnumBuilder
<
EnumType
>::
Save
()
{
void
EnumBuilder
<
EnumType
>::
Save
()
{
value_type
holder
=
static_cast
<
value_type
>
(
data_
);
value_type
holder
=
static_cast
<
value_type
>
(
data_
);
...
...
lite/model_parser/naive_buffer/param_desc.cc
浏览文件 @
c74a68b7
...
@@ -149,15 +149,16 @@ void ParamDesc::SetDim(const std::vector<int64_t>& dim) {
...
@@ -149,15 +149,16 @@ void ParamDesc::SetDim(const std::vector<int64_t>& dim) {
CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \
CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \
<< "Data Type mismatch"; \
<< "Data Type mismatch"; \
std::vector<T> res; \
std::vector<T> res; \
auto& data_builder = desc_->GetField<
ListBuilder<CharBuilde
r>>("data"); \
auto& data_builder = desc_->GetField<
PrimaryListBuilder<cha
r>>("data"); \
auto
data = RepeatedToVector<char, CharBuilder>(data_builder);
\
auto
& data = data_builder.data();
\
size_t size = data.size() / sizeof(T); \
size_t size = data.size() / sizeof(T); \
auto* data_ptr = reinterpret_cast<
T*>(&data[0]);
\
auto* data_ptr = reinterpret_cast<
const T*>(&data[0]);
\
for (size_t i = 0; i < size; ++i) { \
for (size_t i = 0; i < size; ++i) { \
res.push_back(data_ptr[i]); \
res.push_back(data_ptr[i]); \
} \
} \
return res; \
return res; \
}
}
GET_DATA_IMPL
(
uint8_t
,
UINT8
);
GET_DATA_IMPL
(
uint8_t
,
UINT8
);
GET_DATA_IMPL
(
int8_t
,
INT8
);
GET_DATA_IMPL
(
int8_t
,
INT8
);
GET_DATA_IMPL
(
int16_t
,
INT16
);
GET_DATA_IMPL
(
int16_t
,
INT16
);
...
@@ -172,14 +173,13 @@ GET_DATA_IMPL(double, FP64);
...
@@ -172,14 +173,13 @@ GET_DATA_IMPL(double, FP64);
CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \
CHECK(GetDataType() == VarDescAPI::VarDataType::type__) \
<< "Data Type mismatch, call SetDataType first."; \
<< "Data Type mismatch, call SetDataType first."; \
auto* data_builder = \
auto* data_builder = \
desc_->GetMutableField<
ListBuilder<CharBuilde
r>>("data"); \
desc_->GetMutableField<
PrimaryListBuilder<cha
r>>("data"); \
CHECK(data_builder); \
CHECK(data_builder); \
data_builder->Clear(); \
data_builder->Clear(); \
size_t size = size__ * sizeof(T); \
size_t size = size__ * sizeof(T); \
auto* data_ptr = reinterpret_cast<const char*>(data_ptr__); \
auto* data_ptr = reinterpret_cast<const char*>(data_ptr__); \
for (size_t i = 0; i < size; ++i) { \
std::vector<char> data_vec(data_ptr, data_ptr + size); \
data_builder->New()->set(data_ptr[i]); \
data_builder->set(data_vec);
}
#define SET_DATA_IMPL(T, type__) \
#define SET_DATA_IMPL(T, type__) \
template <> \
template <> \
...
...
lite/model_parser/naive_buffer/proto/framework.nb.h
浏览文件 @
c74a68b7
...
@@ -191,7 +191,7 @@ class ParamDesc : public StructBuilder {
...
@@ -191,7 +191,7 @@ class ParamDesc : public StructBuilder {
New
<
lod_type
>
(
"lod"
);
New
<
lod_type
>
(
"lod"
);
NewUInt32
(
"tensor_version"
);
NewUInt32
(
"tensor_version"
);
New
<
TensorDesc
>
(
"tensor_desc"
);
New
<
TensorDesc
>
(
"tensor_desc"
);
New
<
ListBuilder
<
CharBuilde
r
>>
(
"data"
);
New
<
PrimaryListBuilder
<
cha
r
>>
(
"data"
);
}
}
};
};
...
...
lite/operators/conv_op.cc
浏览文件 @
c74a68b7
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "lite/operators/conv_op.h"
#include "lite/operators/conv_op.h"
#include <algorithm>
#include <vector>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/op_registry.h"
...
@@ -51,10 +52,41 @@ inline int ConvOutputSize(
...
@@ -51,10 +52,41 @@ inline int ConvOutputSize(
return
output_size
;
return
output_size
;
}
}
inline
void
UpdatePaddingAndDilation
(
std
::
vector
<
int
>*
paddings
,
std
::
vector
<
int
>*
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
string
padding_algorithm
,
const
lite
::
DDim
data_dims
,
const
lite
::
DDim
&
ksize
)
{
// when padding_desc is "VALID" or "SAME"
if
(
padding_algorithm
==
"SAME"
)
{
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
int
out_size
=
(
data_dims
[
i
+
2
]
+
strides
[
i
]
-
1
)
/
strides
[
i
];
int
pad_sum
=
std
::
max
((
out_size
-
1
)
*
strides
[
i
]
+
ksize
[
i
]
-
data_dims
[
i
+
2
],
(
int64_t
)
0
);
// pad
*
(
paddings
->
begin
()
+
i
)
=
pad_sum
/
2
;
// dilation
*
(
dilations
->
begin
()
+
i
)
=
1
;
}
}
else
if
(
padding_algorithm
==
"VALID"
)
{
for
(
auto
&
it
:
*
paddings
)
{
it
=
0
;
}
}
}
bool
ConvOpLite
::
InferShape
()
const
{
bool
ConvOpLite
::
InferShape
()
const
{
const
auto
in_dims
=
param_
.
x
->
dims
();
const
auto
in_dims
=
param_
.
x
->
dims
();
const
auto
filter_dims
=
param_
.
filter
->
dims
();
const
auto
filter_dims
=
param_
.
filter
->
dims
();
UpdatePaddingAndDilation
(
&
param_
.
paddings
,
&
param_
.
dilations
,
param_
.
strides
,
padding_algorithm_
,
in_dims
,
filter_dims
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
param_
.
strides
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
param_
.
strides
.
size
();
++
i
)
{
output_shape
.
push_back
(
ConvOutputSize
(
in_dims
[
i
+
2
],
output_shape
.
push_back
(
ConvOutputSize
(
in_dims
[
i
+
2
],
...
...
lite/operators/conv_op.h
浏览文件 @
c74a68b7
...
@@ -93,6 +93,10 @@ class ConvOpLite : public OpLite {
...
@@ -93,6 +93,10 @@ class ConvOpLite : public OpLite {
<<
"The fused conv only supports fuse with relu and leaky relu"
;
<<
"The fused conv only supports fuse with relu and leaky relu"
;
}
}
}
}
if
(
op_desc
.
HasAttr
(
"padding_algorithm"
))
{
padding_algorithm_
=
op_desc
.
GetAttr
<
std
::
string
>
(
"padding_algorithm"
);
}
// For Int8
// For Int8
if
(
op_desc
.
HasAttr
(
"enable_int8"
))
{
if
(
op_desc
.
HasAttr
(
"enable_int8"
))
{
param_
.
enable_int8
=
op_desc
.
GetAttr
<
bool
>
(
"enable_int8"
);
param_
.
enable_int8
=
op_desc
.
GetAttr
<
bool
>
(
"enable_int8"
);
...
@@ -114,6 +118,7 @@ class ConvOpLite : public OpLite {
...
@@ -114,6 +118,7 @@ class ConvOpLite : public OpLite {
private:
private:
mutable
ConvParam
param_
;
mutable
ConvParam
param_
;
std
::
string
padding_algorithm_
{
""
};
};
};
}
// namespace operators
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录