Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1b84c0bf
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1b84c0bf
编写于
9月 11, 2020
作者:
W
Wilber
提交者:
GitHub
9月 11, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Lite subgraph refine predictor (#27167)
上级
2e597696
变更
13
显示空白变更内容
内联
并排
Showing
13 changed file
with
176 addition
and
75 deletion
+176
-75
cmake/external/lite.cmake
cmake/external/lite.cmake
+1
-1
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+4
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
.../fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
+6
-4
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-0
paddle/fluid/inference/lite/engine.cc
paddle/fluid/inference/lite/engine.cc
+29
-17
paddle/fluid/inference/lite/engine.h
paddle/fluid/inference/lite/engine.h
+16
-11
paddle/fluid/inference/lite/tensor_utils.cc
paddle/fluid/inference/lite/tensor_utils.cc
+71
-26
paddle/fluid/inference/lite/test_engine.cc
paddle/fluid/inference/lite/test_engine.cc
+4
-4
paddle/fluid/inference/lite/test_tensor_utils.cc
paddle/fluid/inference/lite/test_tensor_utils.cc
+33
-4
paddle/fluid/inference/tests/api/lite_resnet50_test.cc
paddle/fluid/inference/tests/api/lite_resnet50_test.cc
+1
-1
paddle/fluid/operators/lite/lite_engine_op.h
paddle/fluid/operators/lite/lite_engine_op.h
+4
-4
paddle/fluid/operators/lite/lite_engine_op_test.cc
paddle/fluid/operators/lite/lite_engine_op_test.cc
+3
-3
未找到文件。
cmake/external/lite.cmake
浏览文件 @
1b84c0bf
...
@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
...
@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set
(
LITE_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/lite
)
set
(
LITE_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/lite
)
if
(
NOT LITE_GIT_TAG
)
if
(
NOT LITE_GIT_TAG
)
set
(
LITE_GIT_TAG
dfdfa6440c83bf0b415f9f5a9ff84842ce0bb0fa
)
set
(
LITE_GIT_TAG
6d2b2a4028a58715b01887b04eb9bff8432eb184
)
endif
()
endif
()
if
(
NOT CUDA_ARCH_NAME
)
if
(
NOT CUDA_ARCH_NAME
)
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
1b84c0bf
...
@@ -218,6 +218,10 @@ struct Argument {
...
@@ -218,6 +218,10 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
fusion_statis
,
FusionStatis
,
fusion_statis_t
);
DECL_ARGUMENT_FIELD
(
fusion_statis
,
FusionStatis
,
fusion_statis_t
);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD
(
cpu_math_library_num_threads
,
CpuMathLibraryNumThreads
,
int
);
private:
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
};
};
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
1b84c0bf
...
@@ -150,6 +150,8 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -150,6 +150,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"use_xpu"
,
new
bool
(
argument
->
use_xpu
()));
pass
->
Set
(
"use_xpu"
,
new
bool
(
argument
->
use_xpu
()));
pass
->
Set
(
"xpu_l3_workspace_size"
,
pass
->
Set
(
"xpu_l3_workspace_size"
,
new
int
(
argument
->
xpu_l3_workspace_size
()));
new
int
(
argument
->
xpu_l3_workspace_size
()));
pass
->
Set
(
"cpu_math_library_num_threads"
,
new
int
(
argument
->
cpu_math_library_num_threads
()));
}
}
disable_logs_
=
argument
->
disable_logs
();
disable_logs_
=
argument
->
disable_logs
();
if
(
pass_name
==
"fc_fuse_pass"
)
{
if
(
pass_name
==
"fc_fuse_pass"
)
{
...
...
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
浏览文件 @
1b84c0bf
...
@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
...
@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
bool
use_xpu
=
Get
<
bool
>
(
"use_xpu"
);
bool
use_xpu
=
Get
<
bool
>
(
"use_xpu"
);
int
xpu_l3_workspace_size
=
Get
<
int
>
(
"xpu_l3_workspace_size"
);
int
xpu_l3_workspace_size
=
Get
<
int
>
(
"xpu_l3_workspace_size"
);
int
cpu_math_library_num_threads
=
Get
<
int
>
(
"cpu_math_library_num_threads"
);
lite_api
::
TargetType
target_type
;
lite_api
::
TargetType
target_type
;
if
(
use_gpu
)
{
if
(
use_gpu
)
{
...
@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine(
...
@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the
// Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects
// input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible.
// whether tensor sharing is feasible.
paddle
::
lite
::
Place
({
target_type
,
precision_type
}),
paddle
::
lite
_api
::
Place
({
target_type
,
precision_type
}),
paddle
::
lite
::
Place
({
target_type
,
PRECISION
(
kInt64
)}),
paddle
::
lite
_api
::
Place
({
target_type
,
PRECISION
(
kInt64
)}),
paddle
::
lite
::
Place
({
target_type
,
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
target_type
,
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kFloat
)}),
};
};
config
.
cpu_math_library_num_threads
=
cpu_math_library_num_threads
;
config
.
xpu_l3_workspace_size
=
xpu_l3_workspace_size
;
config
.
xpu_l3_workspace_size
=
xpu_l3_workspace_size
;
if
(
dump_model
)
{
if
(
dump_model
)
{
lite
::
StrToBinaryFile
(
"./model.bin"
,
config
.
model
);
lite
::
StrToBinaryFile
(
"./model.bin"
,
config
.
model
);
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
1b84c0bf
...
@@ -461,6 +461,8 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -461,6 +461,8 @@ void AnalysisPredictor::PrepareArgument() {
}
}
if
(
config_
.
lite_engine_enabled
())
{
if
(
config_
.
lite_engine_enabled
())
{
argument_
.
SetCpuMathLibraryNumThreads
(
config_
.
cpu_math_library_num_threads
());
argument_
.
SetLitePrecisionMode
(
config_
.
lite_precision_mode_
);
argument_
.
SetLitePrecisionMode
(
config_
.
lite_precision_mode_
);
argument_
.
SetLitePassesFilter
(
config_
.
lite_passes_filter_
);
argument_
.
SetLitePassesFilter
(
config_
.
lite_passes_filter_
);
argument_
.
SetLiteOpsFilter
(
config_
.
lite_ops_filter_
);
argument_
.
SetLiteOpsFilter
(
config_
.
lite_ops_filter_
);
...
...
paddle/fluid/inference/lite/engine.cc
浏览文件 @
1b84c0bf
...
@@ -20,8 +20,12 @@
...
@@ -20,8 +20,12 @@
#define LITE_WITH_XPU 1
#define LITE_WITH_XPU 1
#endif
#endif
#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/lite/engine.h"
#include
"lite/api/paddle_use_passes.h"
#include
<utility>
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
...
@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
}
paddle
::
lite
::
Predictor
*
EngineManager
::
Get
(
const
std
::
string
&
name
)
const
{
paddle
::
lite_api
::
PaddlePredictor
*
EngineManager
::
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
return
engines_
.
at
(
name
).
get
();
}
}
paddle
::
lite
::
Predictor
*
EngineManager
::
Create
(
const
std
::
string
&
name
,
paddle
::
lite_api
::
PaddlePredictor
*
EngineManager
::
Create
(
const
EngineConfig
&
cfg
)
{
const
std
::
string
&
name
,
const
EngineConfig
&
cfg
)
{
if
(
cfg
.
valid_places
.
front
().
target
==
TARGET
(
kCUDA
))
{
// config info for predictor.
#ifdef PADDLE_WITH_CUDA
paddle
::
lite_api
::
CxxConfig
lite_cxx_config
;
paddle
::
lite
::
Env
<
TARGET
(
kCUDA
)
>::
Init
();
lite_cxx_config
.
set_model_buffer
(
cfg
.
model
.
c_str
(),
cfg
.
model
.
size
(),
cfg
.
param
.
c_str
(),
cfg
.
param
.
size
());
lite_cxx_config
.
set_valid_places
(
cfg
.
valid_places
);
#ifdef PADDLE_WITH_ARM
set_threads
.
set_threads
(
cfg
.
cpu_math_library_num_threads
);
#else
lite_cxx_config
.
set_x86_math_library_num_threads
(
cfg
.
cpu_math_library_num_threads
);
#endif
#endif
}
else
if
(
cfg
.
valid_places
.
front
().
target
==
TARGET
(
kXPU
))
{
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
paddle
::
lite
::
TargetWrapper
<
TARGET
(
kXPU
)
>::
workspace_l3_size_per_thread
=
lite_cxx_config
.
set_xpu_workspace_l3_size_per_thread
(
cfg
.
xpu_l3_workspace_size
;
cfg
.
xpu_l3_workspace_size
)
;
#endif
#endif
}
auto
*
p
=
new
paddle
::
lite
::
Predictor
();
// create predictor
p
->
Build
(
""
,
cfg
.
model
,
cfg
.
param
,
cfg
.
valid_places
,
cfg
.
neglected_passes
,
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>
p
=
cfg
.
model_type
,
cfg
.
model_from_memory
);
paddle
::
lite_api
::
CreatePaddlePredictor
(
lite_cxx_config
);
engines_
[
name
]
.
reset
(
p
);
engines_
[
name
]
=
std
::
move
(
p
);
return
p
;
return
engines_
[
name
].
get
()
;
}
}
void
EngineManager
::
DeleteAll
()
{
void
EngineManager
::
DeleteAll
()
{
for
(
auto
&
item
:
engines_
)
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
item
.
second
.
reset
();
}
}
}
}
...
...
paddle/fluid/inference/lite/engine.h
浏览文件 @
1b84c0bf
...
@@ -23,12 +23,9 @@
...
@@ -23,12 +23,9 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall"
#pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h"
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h"
#include "lite/api/paddle_place.h"
#include "lite/core/context.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#pragma GCC diagnostic pop
#pragma GCC diagnostic pop
namespace
paddle
{
namespace
paddle
{
...
@@ -38,25 +35,33 @@ namespace lite {
...
@@ -38,25 +35,33 @@ namespace lite {
struct
EngineConfig
{
struct
EngineConfig
{
std
::
string
model
;
std
::
string
model
;
std
::
string
param
;
std
::
string
param
;
paddle
::
lite
::
Place
prefer_place
;
std
::
vector
<
paddle
::
lite_api
::
Place
>
valid_places
;
std
::
vector
<
paddle
::
lite
::
Place
>
valid_places
;
std
::
vector
<
std
::
string
>
neglected_passes
;
std
::
vector
<
std
::
string
>
neglected_passes
;
lite_api
::
LiteModelType
model_type
{
lite_api
::
LiteModelType
::
kProtobuf
};
lite_api
::
LiteModelType
model_type
{
lite_api
::
LiteModelType
::
kProtobuf
};
bool
model_from_memory
{
true
};
bool
model_from_memory
{
true
};
// for xpu
size_t
xpu_l3_workspace_size
;
size_t
xpu_l3_workspace_size
;
// for x86 or arm
int
cpu_math_library_num_threads
{
1
};
// for cuda
bool
use_multi_stream
{
false
};
};
};
class
EngineManager
{
class
EngineManager
{
public:
public:
bool
Empty
()
const
;
bool
Empty
()
const
;
bool
Has
(
const
std
::
string
&
name
)
const
;
bool
Has
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
::
Predictor
*
Get
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
_api
::
Paddle
Predictor
*
Get
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
::
Predictor
*
Create
(
const
std
::
string
&
name
,
paddle
::
lite
_api
::
Paddle
Predictor
*
Create
(
const
std
::
string
&
name
,
const
EngineConfig
&
cfg
);
const
EngineConfig
&
cfg
);
void
DeleteAll
();
void
DeleteAll
();
private:
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
paddle
::
lite
::
Predictor
>>
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>>
engines_
;
engines_
;
};
};
...
...
paddle/fluid/inference/lite/tensor_utils.cc
浏览文件 @
1b84c0bf
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map>
#include <map>
#include <memory>
#include <memory>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
...
@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
...
@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
}
}
}
}
void
InitDstTensor
(
paddle
::
lite
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
)
{
void
*
GetLiteTensorDataPtr
(
paddle
::
lite_api
::
Tensor
*
src
,
PrecisionType
precision_type
,
TargetType
target_type
)
{
void
*
res
{
nullptr
};
switch
(
precision_type
)
{
case
PrecisionType
::
kFloat
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
float
>
(
target_type
));
break
;
case
PrecisionType
::
kInt8
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
int8_t
>
(
target_type
));
break
;
case
PrecisionType
::
kInt32
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
int32_t
>
(
target_type
));
break
;
case
PrecisionType
::
kInt64
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
int64_t
>
(
target_type
));
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."
));
break
;
}
return
res
;
}
int64_t
GetLiteTensorNumel
(
const
paddle
::
lite_api
::
Tensor
&
tensor
)
{
auto
shape
=
tensor
.
shape
();
int64_t
numel
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
return
numel
;
}
void
InitDstTensor
(
paddle
::
lite_api
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
)
{
// Currently, Lite needs to explicitly specify the target type of
// Currently, Lite needs to explicitly specify the target type of
// the input tensor.
// the input tensor.
constexpr
int
empty_size
=
0
;
constexpr
int
empty_size
=
0
;
dst
->
mutable_data
(
GetLiteTargetType
(
src
.
place
()),
empty_size
);
dst
->
Resize
({
empty_size
});
dst
->
set_precision
(
GetLitePrecisionType
(
src
.
type
()));
GetLiteTensorDataPtr
(
dst
,
GetLitePrecisionType
(
src
.
type
()),
SetLoD
(
dst
->
mutable_lod
(),
src
.
lod
());
GetLiteTargetType
(
src
.
place
()));
dst
->
SetPrecision
(
GetLitePrecisionType
(
src
.
type
()));
paddle
::
lite
::
LoD
lite_lod
;
SetLoD
(
&
lite_lod
,
src
.
lod
());
dst
->
SetLoD
(
lite_lod
);
}
}
void
InitDstTensor
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite
::
Tensor
&
src
)
{
void
InitDstTensor
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite_api
::
Tensor
&
src
)
{
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
framework
::
proto
::
VarType_Type_FP32
;
framework
::
proto
::
VarType_Type_FP32
;
dst
->
mutable_data
(
inference
::
lite
::
utils
::
GetNativePlace
(
src
.
target
()),
dst
->
mutable_data
(
inference
::
lite
::
utils
::
GetNativePlace
(
src
.
target
()),
...
@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
...
@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
}
}
template
<
>
template
<
>
void
TensorCopyAsync
(
paddle
::
lite
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
,
void
TensorCopyAsync
(
paddle
::
lite_api
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
,
const
platform
::
DeviceContext
&
ctx
)
{
const
platform
::
DeviceContext
&
ctx
)
{
InitDstTensor
(
dst
,
src
);
InitDstTensor
(
dst
,
src
);
const
platform
::
Place
&
src_place
=
src
.
place
();
const
platform
::
Place
&
src_place
=
src
.
place
();
...
@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
...
@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
src
.
type
());
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
src
.
type
());
dst
->
Resize
(
framework
::
vectorize
(
src
.
dims
()));
dst
->
Resize
(
framework
::
vectorize
(
src
.
dims
()));
const
void
*
src_data
=
src
.
data
<
void
>
();
const
void
*
src_data
=
src
.
data
<
void
>
();
void
*
dst_data
=
dst
->
mutable_data
(
bytes
);
void
*
dst_data
{
nullptr
};
dst_data
=
GetLiteTensorDataPtr
(
dst
,
GetLitePrecisionType
(
src
.
type
()),
GetLiteTargetType
(
src
.
place
()));
VLOG
(
3
)
<<
"[CopyAsync fluid -> lite] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
VLOG
(
3
)
<<
"[CopyAsync fluid -> lite] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
<<
", dst = "
<<
dst
<<
", src_type = "
<<
src
.
type
();
<<
", dst = "
<<
dst
<<
", src_type = "
<<
src
.
type
();
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
dst
->
memory_size
()
;
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
bytes
;
}
}
template
<
>
template
<
>
void
TensorCopyAsync
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite
::
Tensor
&
src
,
void
TensorCopyAsync
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite_api
::
Tensor
&
src
,
const
platform
::
DeviceContext
&
ctx
)
{
const
platform
::
DeviceContext
&
ctx
)
{
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
.
dims
().
Vectoriz
e
()));
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
.
shap
e
()));
InitDstTensor
(
dst
,
src
);
InitDstTensor
(
dst
,
src
);
const
platform
::
Place
&
src_place
=
GetNativePlace
(
src
.
target
());
const
platform
::
Place
&
src_place
=
GetNativePlace
(
src
.
target
());
const
platform
::
Place
&
dst_place
=
dst
->
place
();
const
platform
::
Place
&
dst_place
=
dst
->
place
();
const
size_t
bytes
=
int64_t
src_numel
=
GetLiteTensorNumel
(
src
);
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
dst
->
type
());
const
size_t
bytes
=
src_numel
*
framework
::
SizeOfType
(
dst
->
type
());
const
void
*
src_data
=
src
.
raw_data
();
const
void
*
src_data
=
src
.
data
<
void
>
();
// When Lite is ready, the source type needs to be modified here.
// When Lite is ready, the source type needs to be modified here.
void
*
dst_data
=
dst
->
mutable_data
(
dst_place
,
dst
->
type
());
void
*
dst_data
=
dst
->
mutable_data
(
dst_place
,
dst
->
type
());
VLOG
(
3
)
<<
"[CopyAsync lite -> fluid] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
VLOG
(
3
)
<<
"[CopyAsync lite -> fluid] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
<<
", dst = "
<<
dst
<<
", src_type = "
<<
dst
->
type
();
<<
", dst = "
<<
dst
<<
", src_type = "
<<
dst
->
type
();
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
src
.
memory_size
()
;
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
bytes
;
}
}
template
<
>
template
<
>
void
TensorDataShare
(
paddle
::
lite
::
Tensor
*
dst
,
framework
::
LoDTensor
*
src
)
{
void
TensorDataShare
(
paddle
::
lite_api
::
Tensor
*
dst
,
framework
::
LoDTensor
*
src
)
{
const
size_t
bytes
=
static_cast
<
size_t
>
(
src
->
numel
())
*
framework
::
SizeOfType
(
src
->
type
());
auto
buf
=
std
::
make_shared
<
paddle
::
lite
::
Buffer
>
(
paddle
::
lite
::
Buffer
(
src
->
data
<
void
>
(),
GetLiteTargetType
(
src
->
place
()),
src
->
memory_size
()));
dst
->
Resize
(
framework
::
vectorize
(
src
->
dims
()));
dst
->
Resize
(
framework
::
vectorize
(
src
->
dims
()));
dst
->
set_precision
(
GetLitePrecisionType
(
src
->
type
()));
dst
->
ShareExternalMemory
(
src
->
data
<
void
>
(),
src
->
memory_size
(),
SetLoD
(
dst
->
mutable_lod
(),
src
->
lod
());
GetLiteTargetType
(
src
->
place
()));
dst
->
ResetBuffer
(
buf
,
bytes
);
dst
->
SetPrecision
(
GetLitePrecisionType
(
src
->
type
()));
paddle
::
lite
::
LoD
lite_lod
;
SetLoD
(
&
lite_lod
,
src
->
lod
());
dst
->
SetLoD
(
lite_lod
);
}
}
template
<
>
template
<
>
void
TensorDataShare
(
framework
::
LoDTensor
*
dst
,
paddle
::
lite
::
Tensor
*
src
)
{
void
TensorDataShare
(
framework
::
LoDTensor
*
dst
,
paddle
::
lite
_api
::
Tensor
*
src
)
{
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
framework
::
proto
::
VarType_Type_FP32
;
framework
::
proto
::
VarType_Type_FP32
;
void
*
src_raw_data
=
src
->
raw_data
();
void
*
src_raw_data
=
GetLiteTensorDataPtr
(
src
,
GetLitePrecisionType
(
dtype
),
src
->
target
());
size_t
memory_size
=
GetLiteTensorNumel
(
*
src
)
*
sizeof
(
float
);
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
holder
(
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
holder
(
new
memory
::
allocation
::
Allocation
(
src_raw_data
,
src
->
memory_size
()
,
new
memory
::
allocation
::
Allocation
(
src_raw_data
,
memory_size
,
GetNativePlace
(
src
->
target
())));
GetNativePlace
(
src
->
target
())));
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
->
dims
().
Vectoriz
e
()));
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
->
shap
e
()));
SetLoD
(
dst
->
mutable_lod
(),
src
->
lod
());
SetLoD
(
dst
->
mutable_lod
(),
src
->
lod
());
dst
->
ResetHolderWithType
(
holder
,
dtype
);
dst
->
ResetHolderWithType
(
holder
,
dtype
);
}
}
...
...
paddle/fluid/inference/lite/test_engine.cc
浏览文件 @
1b84c0bf
...
@@ -102,10 +102,10 @@ TEST(EngineManager, engine) {
...
@@ -102,10 +102,10 @@ TEST(EngineManager, engine) {
config
.
model_from_memory
=
true
;
config
.
model_from_memory
=
true
;
config
.
valid_places
=
{
config
.
valid_places
=
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
#endif
paddle
::
lite
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
};
};
LOG
(
INFO
)
<<
"Create EngineManager"
;
LOG
(
INFO
)
<<
"Create EngineManager"
;
...
@@ -118,7 +118,7 @@ TEST(EngineManager, engine) {
...
@@ -118,7 +118,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Has
(
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Has
(
unique_key
),
unique_key
),
true
);
true
);
paddle
::
lite
::
Predictor
*
engine_0
=
paddle
::
lite
_api
::
Paddle
Predictor
*
engine_0
=
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
unique_key
);
unique_key
);
CHECK_NOTNULL
(
engine_0
);
CHECK_NOTNULL
(
engine_0
);
...
...
paddle/fluid/inference/lite/test_tensor_utils.cc
浏览文件 @
1b84c0bf
...
@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
...
@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
EXPECT_ANY_THROW
(
GetNativeLayoutType
(
DataLayoutType
::
kNHWC
));
EXPECT_ANY_THROW
(
GetNativeLayoutType
(
DataLayoutType
::
kNHWC
));
}
}
template
<
typename
T
>
void
test_lite_tensor_data_ptr
(
PrecisionType
precision_type
)
{
void
*
GetLiteTensorDataPtr
(
paddle
::
lite_api
::
Tensor
*
src
,
PrecisionType
precision_type
,
TargetType
target_type
);
const
int
count
=
4
;
paddle
::
lite
::
Tensor
lite_tensor
;
lite_tensor
.
Resize
({
count
});
auto
*
lite_tensor_data
=
lite_tensor
.
mutable_data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
lite_tensor_data
[
i
]
=
i
;
}
paddle
::
lite_api
::
Tensor
lite_api_tensor
(
&
lite_tensor
);
T
*
data
=
static_cast
<
T
*>
(
GetLiteTensorDataPtr
(
&
lite_api_tensor
,
precision_type
,
TargetType
::
kHost
));
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
CHECK_EQ
(
data
[
i
],
static_cast
<
T
>
(
i
))
<<
"the i-th num is not correct."
;
}
}
TEST
(
LiteEngineOp
,
GetLiteTensorDataPtr
)
{
test_lite_tensor_data_ptr
<
int64_t
>
(
PrecisionType
::
kInt64
);
test_lite_tensor_data_ptr
<
int32_t
>
(
PrecisionType
::
kInt32
);
test_lite_tensor_data_ptr
<
int8_t
>
(
PrecisionType
::
kInt8
);
EXPECT_ANY_THROW
(
test_lite_tensor_data_ptr
<
double
>
(
PrecisionType
::
kUnk
));
}
void
test_tensor_copy
(
const
platform
::
DeviceContext
&
ctx
)
{
void
test_tensor_copy
(
const
platform
::
DeviceContext
&
ctx
)
{
// Create LoDTensor.
// Create LoDTensor.
std
::
vector
<
float
>
vector
({
1
,
2
,
3
,
4
});
std
::
vector
<
float
>
vector
({
1
,
2
,
3
,
4
});
...
@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
...
@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
lod_tensor
.
set_lod
(
lod
);
lod_tensor
.
set_lod
(
lod
);
// Create lite::Tensor and copy.
// Create lite::Tensor and copy.
paddle
::
lite
::
Tensor
lite_tensor
;
paddle
::
lite
::
Tensor
lite_tensor
;
TensorCopyAsync
(
&
lite_tensor
,
lod_tensor
,
ctx
);
paddle
::
lite_api
::
Tensor
lite_api_tensor
(
&
lite_tensor
);
TensorCopyAsync
(
&
lite_api_tensor
,
lod_tensor
,
ctx
);
// Copy to LoDTensor.
// Copy to LoDTensor.
framework
::
LoDTensor
lod_tensor_n
;
framework
::
LoDTensor
lod_tensor_n
;
TensorCopyAsync
(
&
lod_tensor_n
,
lite_tensor
,
ctx
);
TensorCopyAsync
(
&
lod_tensor_n
,
lite_
api_
tensor
,
ctx
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
platform
::
GpuStreamSync
(
platform
::
GpuStreamSync
(
...
@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
...
@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
lod_tensor
.
set_lod
(
lod
);
lod_tensor
.
set_lod
(
lod
);
// Create lite::Tensor and share.
// Create lite::Tensor and share.
paddle
::
lite
::
Tensor
lite_tensor
;
paddle
::
lite
::
Tensor
lite_tensor
;
TensorDataShare
(
&
lite_tensor
,
&
lod_tensor
);
paddle
::
lite_api
::
Tensor
lite_api_tensor
(
&
lite_tensor
);
TensorDataShare
(
&
lite_api_tensor
,
&
lod_tensor
);
// Copy to LoDTensor.
// Copy to LoDTensor.
framework
::
LoDTensor
lod_tensor_n
;
framework
::
LoDTensor
lod_tensor_n
;
TensorCopyAsync
(
&
lod_tensor_n
,
lite_tensor
,
ctx
);
TensorCopyAsync
(
&
lod_tensor_n
,
lite_
api_
tensor
,
ctx
);
std
::
vector
<
float
>
result
;
std
::
vector
<
float
>
result
;
TensorToVector
(
lod_tensor_n
,
ctx
,
&
result
);
TensorToVector
(
lod_tensor_n
,
ctx
,
&
result
);
ASSERT_EQ
(
result
,
vector
);
ASSERT_EQ
(
result
,
vector
);
...
...
paddle/fluid/inference/tests/api/lite_resnet50_test.cc
浏览文件 @
1b84c0bf
...
@@ -27,7 +27,7 @@ TEST(AnalysisPredictor, use_gpu) {
...
@@ -27,7 +27,7 @@ TEST(AnalysisPredictor, use_gpu) {
AnalysisConfig
config
;
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
EnableUseGpu
(
100
,
0
);
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
,
true
);
std
::
vector
<
PaddleTensor
>
inputs
;
std
::
vector
<
PaddleTensor
>
inputs
;
auto
predictor
=
CreatePaddlePredictor
(
config
);
auto
predictor
=
CreatePaddlePredictor
(
config
);
...
...
paddle/fluid/operators/lite/lite_engine_op.h
浏览文件 @
1b84c0bf
...
@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase {
...
@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase {
private:
private:
std
::
vector
<
std
::
string
>
in_names_
;
std
::
vector
<
std
::
string
>
in_names_
;
std
::
vector
<
std
::
string
>
out_names_
;
std
::
vector
<
std
::
string
>
out_names_
;
paddle
::
lite
::
Predictor
*
engine_
;
paddle
::
lite
_api
::
Paddle
Predictor
*
engine_
;
framework
::
proto
::
VarType
::
Type
precision_
;
framework
::
proto
::
VarType
::
Type
precision_
;
bool
use_gpu_
;
bool
use_gpu_
;
bool
zero_copy_
;
bool
zero_copy_
;
...
@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase {
...
@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase {
framework
::
LoDTensor
src_t
=
framework
::
LoDTensor
src_t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
in_names_
[
i
]);
in_names_
[
i
]);
paddle
::
lite
::
Tensor
*
dst_t
=
engine_
->
GetInput
(
i
);
paddle
::
lite
_api
::
Tensor
dst_t
=
*
(
engine_
->
GetInput
(
i
)
);
VLOG
(
3
)
<<
"== fluid -> lite ("
<<
in_names_
[
i
]
<<
" -> "
VLOG
(
3
)
<<
"== fluid -> lite ("
<<
in_names_
[
i
]
<<
" -> "
<<
engine_
->
GetInputNames
()[
i
]
<<
")"
;
<<
engine_
->
GetInputNames
()[
i
]
<<
")"
;
inference
::
lite
::
utils
::
TensorCopy
(
dst_t
,
&
src_t
,
*
ctx
,
zero_copy_
);
inference
::
lite
::
utils
::
TensorCopy
(
&
dst_t
,
&
src_t
,
*
ctx
,
zero_copy_
);
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
dev_place
))
{
if
(
platform
::
is_gpu_place
(
dev_place
))
{
...
@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase {
...
@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase {
engine_
->
Run
();
engine_
->
Run
();
VLOG
(
3
)
<<
"lite engine run done"
;
VLOG
(
3
)
<<
"lite engine run done"
;
for
(
size_t
i
=
0
;
i
<
out_names_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
out_names_
.
size
();
i
++
)
{
paddle
::
lite
::
Tensor
src_t
=
*
(
engine_
->
GetOutput
(
i
));
paddle
::
lite
_api
::
Tensor
src_t
=
*
(
engine_
->
GetOutput
(
i
));
framework
::
LoDTensor
*
dst_t
=
framework
::
LoDTensor
*
dst_t
=
&
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
&
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
out_names_
[
i
]);
scope
,
out_names_
[
i
]);
...
...
paddle/fluid/operators/lite/lite_engine_op_test.cc
浏览文件 @
1b84c0bf
...
@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) {
...
@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) {
inference
::
lite
::
EngineConfig
config
;
inference
::
lite
::
EngineConfig
config
;
config
.
valid_places
=
{
config
.
valid_places
=
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
#endif
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
};
};
serialize_params
(
&
(
config
.
param
),
&
scope
,
repetitive_params
);
serialize_params
(
&
(
config
.
param
),
&
scope
,
repetitive_params
);
config
.
model
=
program
.
Proto
()
->
SerializeAsString
();
config
.
model
=
program
.
Proto
()
->
SerializeAsString
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录