Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b31905c5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b31905c5
编写于
9月 28, 2018
作者:
T
Tao Luo
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into resnet50_ut
上级
1dcd6ee5
7a5f3f75
变更
43
展开全部
隐藏空白更改
内联
并排
Showing
43 changed file
with
2369 addition
and
133 deletion
+2369
-133
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+1
-0
paddle/fluid/API.spec
paddle/fluid/API.spec
+6
-2
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+5
-2
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+6
-6
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+150
-0
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+63
-0
paddle/fluid/framework/naive_executor_test.cc
paddle/fluid/framework/naive_executor_test.cc
+70
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+8
-2
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+31
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+1
-1
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+13
-7
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+211
-31
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+49
-10
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+67
-0
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+22
-16
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+13
-9
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+3
-3
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+111
-0
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
+46
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+138
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+53
-1
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+6
-1
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+4
-1
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+282
-3
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+2
-1
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+3
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+2
-4
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+21
-0
paddle/fluid/string/pretty_log.h
paddle/fluid/string/pretty_log.h
+4
-4
python/CMakeLists.txt
python/CMakeLists.txt
+1
-0
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/quantize/__init__.py
python/paddle/fluid/contrib/quantize/__init__.py
+20
-0
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
+557
-0
python/paddle/fluid/contrib/tests/CMakeLists.txt
python/paddle/fluid/contrib/tests/CMakeLists.txt
+6
-0
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
...on/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+272
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+0
-1
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+2
-4
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+7
-8
python/paddle/fluid/transpiler/__init__.py
python/paddle/fluid/transpiler/__init__.py
+6
-2
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+101
-11
python/setup.py.in
python/setup.py.in
+1
-0
未找到文件。
cmake/external/anakin.cmake
浏览文件 @
b31905c5
...
@@ -52,6 +52,7 @@ ExternalProject_Add(
...
@@ -52,6 +52,7 @@ ExternalProject_Add(
PREFIX
${
ANAKIN_SOURCE_DIR
}
PREFIX
${
ANAKIN_SOURCE_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
-DUSE_LOGGER=YES
-DUSE_X86_PLACE=YES
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
...
...
paddle/fluid/API.spec
浏览文件 @
b31905c5
...
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
...
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
], varargs=None, keywords=None, defaults=(None, False, 0
))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
, 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False
))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
...
@@ -299,13 +299,17 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init',
...
@@ -299,13 +299,17 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init',
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
], varargs=None, keywords=None, defaults=(None, False, 0
))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
, 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False
))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
b31905c5
...
@@ -56,9 +56,9 @@ else()
...
@@ -56,9 +56,9 @@ else()
cc_test
(
mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor
)
cc_test
(
mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
else
()
else
()
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
...
@@ -141,12 +141,15 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
...
@@ -141,12 +141,15 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
if
(
WITH_DISTRIBUTE
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
else
()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass elementwise_add_op
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
b31905c5
...
@@ -28,9 +28,9 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
...
@@ -28,9 +28,9 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
fc_fuse_pass inference
)
pass_library
(
fc_fuse_pass inference
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
endif
()
endif
()
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
...
@@ -49,6 +49,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
...
@@ -49,6 +49,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
endif
()
endif
()
paddle/fluid/framework/naive_executor.cc
0 → 100644
浏览文件 @
b31905c5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
framework
{
// These code can be shared with Executor.
static
void
InitializeVariable
(
Variable
*
var
,
proto
::
VarType
::
Type
var_type
)
{
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR
)
{
var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
SELECTED_ROWS
)
{
var
->
GetMutable
<
SelectedRows
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FEED_MINIBATCH
)
{
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FETCH_LIST
)
{
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
STEP_SCOPES
)
{
var
->
GetMutable
<
std
::
vector
<
framework
::
Scope
>>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_RANK_TABLE
)
{
var
->
GetMutable
<
LoDRankTable
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
var
->
GetMutable
<
LoDTensorArray
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
PLACE_LIST
)
{
var
->
GetMutable
<
platform
::
PlaceList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
READER
)
{
var
->
GetMutable
<
ReaderHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
CHANNEL
)
{
var
->
GetMutable
<
ChannelHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
RAW
)
{
// GetMutable will be called in operator
}
else
{
PADDLE_THROW
(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]"
,
var_type
);
}
}
void
NaiveExecutor
::
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
if
(
!
parent_scope
)
{
scope_
=
new
framework
::
Scope
;
}
else
{
scope_
=
&
parent_scope
->
NewScope
();
}
CreateVariables
(
program_desc
,
scope_
,
block_id
);
CreateOps
(
program_desc
,
block_id
,
with_feed_fetch_ops
);
}
void
NaiveExecutor
::
Run
()
{
for
(
auto
&
op
:
ops_
)
{
VLOG
(
4
)
<<
"run "
<<
op
->
Type
();
op
->
Run
(
*
scope_
,
place_
);
}
}
void
NaiveExecutor
::
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
)
{
PADDLE_ENFORCE
(
scope
);
auto
&
global_block
=
desc
.
Block
(
block_id
);
const
Scope
*
ancestor_scope
=
scope
;
while
(
ancestor_scope
->
parent
())
{
ancestor_scope
=
ancestor_scope
->
parent
();
}
if
(
ancestor_scope
!=
scope
)
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
continue
;
}
// Create persistable vars in ancestor scope.
if
(
var
->
Persistable
())
{
auto
*
ptr
=
const_cast
<
Scope
*>
(
ancestor_scope
)
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" global, which pointer is "
<<
ptr
;
}
else
{
// Create temporary variables in local scope.
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" locally, which pointer is "
<<
ptr
;
}
}
}
else
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create variable "
<<
var
->
Name
()
<<
", which pointer is "
<<
ptr
;
}
}
}
void
NaiveExecutor
::
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
for
(
const
auto
&
op_desc
:
desc
.
Block
(
block_id
).
AllOps
())
{
if
(
!
with_feed_fetch_ops
&&
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
))
{
string
::
PrettyLogEndl
(
string
::
Style
::
detail
(),
"--- skip [%s], %s -> %s"
,
op_desc
->
Input
(
"X"
)[
0
],
op_desc
->
Type
(),
op_desc
->
Output
(
"Out"
)[
0
]);
continue
;
}
ops_
.
emplace_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
}
LoDTensor
*
NaiveExecutor
::
FindTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
scope_
,
"Need to init scope first"
);
auto
*
var
=
scope_
->
FindVar
(
name
);
PADDLE_ENFORCE
(
var
,
"No variable [%s] in the scope"
);
auto
*
tensor
=
const_cast
<
LoDTensor
*>
(
&
var
->
Get
<
LoDTensor
>
());
return
tensor
;
}
void
NaiveExecutor
::
CleanFeedFetchOps
()
{
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops
;
for
(
auto
&
op
:
ops_
)
{
if
(
op
->
Type
()
!=
"feed"
&&
op
->
Type
()
!=
"fetch"
)
{
ops
.
emplace_back
(
std
::
move
(
op
));
}
}
ops_
.
swap
(
ops
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/naive_executor.h
0 → 100644
浏览文件 @
b31905c5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
/*
* Simple, intuitive and effective. Only single thread is supported, and
* currently designed for inference.
*/
class
NaiveExecutor
{
public:
explicit
NaiveExecutor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
// Run all the operators.
void
Run
();
// Get an tensor to operating directly, without the need for feed_ops.
LoDTensor
*
FindTensor
(
const
std
::
string
&
name
);
Scope
*
scope
()
{
return
scope_
;
}
void
CleanFeedFetchOps
();
protected:
void
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
);
void
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
private:
const
platform
::
Place
place_
;
// Catch the required resource to avoid recreate.
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
Scope
*
scope_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/naive_executor_test.cc
0 → 100644
浏览文件 @
b31905c5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include <gtest/gtest.h>
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
namespace
paddle
{
namespace
framework
{
TEST
(
NaiveExecutor
,
Basic
)
{
ProgramDesc
program
;
auto
*
main_block
=
program
.
MutableBlock
(
0
);
auto
*
a
=
main_block
->
Var
(
"a"
);
// input
auto
*
b
=
main_block
->
Var
(
"b"
);
// input
auto
*
c
=
main_block
->
Var
(
"c"
);
// input
a
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
b
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
c
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
auto
*
add
=
main_block
->
AppendOp
();
add
->
SetType
(
"elementwise_add"
);
add
->
SetInput
(
"X"
,
{
"a"
});
add
->
SetInput
(
"Y"
,
{
"b"
});
add
->
SetOutput
(
"Out"
,
{
"c"
});
auto
place
=
platform
::
CPUPlace
();
NaiveExecutor
exe
(
place
);
exe
.
Prepare
(
nullptr
,
program
,
0
,
false
/*with feed fetch ops*/
);
auto
*
a_tensor
=
exe
.
FindTensor
(
"a"
);
auto
*
b_tensor
=
exe
.
FindTensor
(
"b"
);
auto
*
c_tensor
=
exe
.
FindTensor
(
"c"
);
a_tensor
->
Resize
({
1
,
4
});
b_tensor
->
Resize
({
1
,
4
});
c_tensor
->
Resize
({
1
,
4
});
b_tensor
->
mutable_data
<
float
>
(
place
);
a_tensor
->
mutable_data
<
float
>
(
place
);
float
a_arr
[]
=
{
0
,
1
,
2
,
3
};
float
b_arr
[]
=
{
0.0
,
.1
,
.2
,
.3
};
std
::
copy_n
(
a_arr
,
4
,
a_tensor
->
mutable_data
<
float
>
(
place
));
std
::
copy_n
(
b_arr
,
4
,
b_tensor
->
mutable_data
<
float
>
(
place
));
exe
.
Run
();
auto
*
c_data
=
c_tensor
->
mutable_data
<
float
>
(
place
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
EXPECT_NEAR
(
c_data
[
i
],
1.1
*
i
,
1e-3
);
}
}
}
// namespace framework
}
// namespace paddle
USE_OP
(
elementwise_add
);
paddle/fluid/framework/operator.cc
浏览文件 @
b31905c5
...
@@ -154,9 +154,15 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
...
@@ -154,9 +154,15 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform
::
SetDeviceId
(
dev_id
);
platform
::
SetDeviceId
(
dev_id
);
#endif
#endif
}
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
}
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
if
(
VLOG_IS_ON
(
3
))
{
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
b31905c5
...
@@ -20,6 +20,13 @@ limitations under the License. */
...
@@ -20,6 +20,13 @@ limitations under the License. */
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
// The mutex is not needed by training and inference, only for distribution.
#if PADDLE_WITH_DISTRIBUTE
#define WITH_LOCK 1
#else
#define WITH_LOCK 0
#endif
DEFINE_bool
(
benchmark
,
false
,
DEFINE_bool
(
benchmark
,
false
,
"Doing memory benchmark. It will make deleting scope synchronized, "
"Doing memory benchmark. It will make deleting scope synchronized, "
"and add some memory usage logs."
"and add some memory usage logs."
...
@@ -49,18 +56,24 @@ int64_t GetEagerDeletionThreshold() {
...
@@ -49,18 +56,24 @@ int64_t GetEagerDeletionThreshold() {
Scope
::~
Scope
()
{
DropKids
();
}
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
Scope
&
Scope
::
NewScope
()
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
kids_
.
push_back
(
new
Scope
(
this
));
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
return
*
kids_
.
back
();
}
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
return
VarInternal
(
name
);
return
VarInternal
(
name
);
}
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
*
name
=
new_name
;
...
@@ -69,29 +82,39 @@ Variable* Scope::Var(std::string* name) {
...
@@ -69,29 +82,39 @@ Variable* Scope::Var(std::string* name) {
}
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
return
FindVarInternal
(
name
);
return
FindVarInternal
(
name
);
}
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
return
FindScopeInternal
(
var
);
return
FindScopeInternal
(
var
);
}
}
void
Scope
::
DropKids
()
{
void
Scope
::
DropKids
()
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
for
(
Scope
*
s
:
kids_
)
delete
s
;
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
kids_
.
clear
();
}
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
return
it
!=
this
->
kids_
.
end
();
}
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
std
::
vector
<
std
::
string
>
known_vars
;
std
::
vector
<
std
::
string
>
known_vars
;
known_vars
.
reserve
(
this
->
vars_
.
size
());
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
for
(
auto
&
p
:
vars_
)
{
...
@@ -101,7 +124,9 @@ std::vector<std::string> Scope::LocalVarNames() const {
...
@@ -101,7 +124,9 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"Cannot find %p as kid scope"
,
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"Cannot find %p as kid scope"
,
scope
);
this
->
kids_
.
erase
(
it
);
this
->
kids_
.
erase
(
it
);
...
@@ -114,7 +139,9 @@ void Scope::DeleteScope(Scope* scope) const {
...
@@ -114,7 +139,9 @@ void Scope::DeleteScope(Scope* scope) const {
}
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
...
@@ -127,12 +154,16 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
...
@@ -127,12 +154,16 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
const
std
::
string
&
new_name
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
RenameInternal
(
origin_name
,
new_name
);
RenameInternal
(
origin_name
,
new_name
);
}
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
return
new_name
;
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
b31905c5
...
@@ -53,7 +53,7 @@ if(NOT APPLE)
...
@@ -53,7 +53,7 @@ if(NOT APPLE)
endif
()
endif
()
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
# tests/book depends the models that generated by python/paddle/fluid/tests/book
# tests/book depends the models that generated by python/paddle/fluid/tests/book
add_subdirectory
(
tests/book
)
add_subdirectory
(
tests/book
)
if
(
WITH_INFERENCE_API_TEST
)
if
(
WITH_INFERENCE_API_TEST
)
add_subdirectory
(
tests/api
)
add_subdirectory
(
tests/api
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
b31905c5
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
)
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
)
set
(
analysis_deps
set
(
analysis_deps
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log
)
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log
)
cc_library
(
analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
cc_library
(
analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
analyzer.cc
analyzer.cc
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
b31905c5
...
@@ -18,10 +18,10 @@ if(APPLE)
...
@@ -18,10 +18,10 @@ if(APPLE)
endif
(
APPLE
)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
${
GLOB_PASS_LIB
}
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
naive_executor
${
GLOB_PASS_LIB
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
analysis_predictor
)
endif
()
endif
()
function
(
inference_api_test TARGET_NAME
)
function
(
inference_api_test TARGET_NAME
)
...
@@ -43,8 +43,10 @@ function(inference_api_test TARGET_NAME)
...
@@ -43,8 +43,10 @@ function(inference_api_test TARGET_NAME)
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
endfunction
(
inference_api_test
)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
SRCS api_tester.cc
DEPS paddle_inference_api
)
DEPS paddle_inference_api
)
...
@@ -52,18 +54,22 @@ cc_test(test_paddle_inference_api
...
@@ -52,18 +54,22 @@ cc_test(test_paddle_inference_api
inference_api_test
(
test_api_impl SRC api_impl_tester.cc
inference_api_test
(
test_api_impl SRC api_impl_tester.cc
ARGS test_word2vec test_image_classification
)
ARGS test_word2vec test_image_classification
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests
)
cc_test
(
test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor
${
inference_deps
}
paddle_inference_api
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
paddle_inference_tensorrt_subgraph_engine
cc_library
(
paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter
)
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter
zero_copy_tensor_dummy
)
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
endif
()
endif
()
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
scope zero_copy_tensor_dummy
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
scope
)
function
(
anakin_target target_name
)
function
(
anakin_target target_name
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
endfunction
()
endfunction
()
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
b31905c5
...
@@ -16,11 +16,15 @@
...
@@ -16,11 +16,15 @@
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -28,8 +32,11 @@ DECLARE_bool(profile);
...
@@ -28,8 +32,11 @@ DECLARE_bool(profile);
namespace
paddle
{
namespace
paddle
{
using
contrib
::
AnalysisConfig
;
bool
AnalysisPredictor
::
Init
(
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
...
@@ -43,7 +50,8 @@ bool AnalysisPredictor::Init(
...
@@ -43,7 +50,8 @@ bool AnalysisPredictor::Init(
if
(
config_
.
use_gpu
)
{
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently, enable_ir_optim "
"is turned false."
;
config_
.
enable_ir_optim
=
false
;
config_
.
enable_ir_optim
=
false
;
}
else
{
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
...
@@ -56,37 +64,134 @@ bool AnalysisPredictor::Init(
...
@@ -56,37 +64,134 @@ bool AnalysisPredictor::Init(
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
}
}
executor_
.
reset
(
new
paddle
::
framework
::
Executor
(
place_
));
executor_
.
reset
(
new
paddle
::
framework
::
Naive
Executor
(
place_
));
// Initialize the inference program
if
(
!
program
)
{
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
LoadProgramDesc
())
return
false
;
// Parameters are saved in separate files sited in
OptimizeInferenceProgram
();
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
}
else
{
LOG
(
ERROR
)
<<
"fail to load inference model from "
<<
config_
.
model_dir
;
inference_program_
=
program
;
}
executor_
->
Prepare
(
scope_
.
get
(),
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
return
true
;
}
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
VLOG
(
3
)
<<
"Predictor::predict"
;
inference
::
Timer
timer
;
timer
.
tic
();
// set feed variable
std
::
vector
<
framework
::
LoDTensor
>
feeds
;
framework
::
Scope
*
scope
=
sub_scope_
?
sub_scope_
:
scope_
.
get
();
if
(
!
SetFeed
(
inputs
,
scope
))
{
LOG
(
ERROR
)
<<
"fail to set feed"
;
return
false
;
return
false
;
}
}
// Run the inference program
// if share variables, we need not create variables
executor_
->
Run
();
OptimizeInferenceProgram
();
// get fetch variable
if
(
config_
.
_use_mkldnn
)
{
if
(
!
GetFetch
(
output_data
,
scope
))
{
executor_
->
EnableMKLDNN
(
*
inference_program_
);
LOG
(
ERROR
)
<<
"fail to get fetches"
;
return
false
;
}
}
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
return
true
;
}
VLOG
(
5
)
<<
"to create variables"
;
bool
AnalysisPredictor
::
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
PADDLE_ENFORCE
(
scope_
.
get
());
framework
::
Scope
*
scope
)
{
executor_
->
CreateVariables
(
*
inference_program_
,
VLOG
(
3
)
<<
"Predictor::set_feed"
;
sub_scope_
?
sub_scope_
:
scope_
.
get
(),
0
);
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
// Get the feed_target_names and fetch_target_names
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
PrepareFeedFetch
();
<<
inputs
.
size
();
return
false
;
}
// Cache the inputs memory for better concurrency performance.
feed_tensors_
.
resize
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
&
input
=
feed_tensors_
[
i
];
framework
::
DDim
ddim
=
framework
::
make_ddim
(
inputs
[
i
].
shape
);
void
*
input_ptr
;
if
(
inputs
[
i
].
dtype
==
PaddleDType
::
INT64
)
{
input_ptr
=
input
.
mutable_data
<
int64_t
>
(
ddim
,
platform
::
CPUPlace
());
}
else
if
(
inputs
[
i
].
dtype
==
PaddleDType
::
FLOAT32
)
{
input_ptr
=
input
.
mutable_data
<
float
>
(
ddim
,
platform
::
CPUPlace
());
}
else
{
LOG
(
ERROR
)
<<
"unsupported feed type "
<<
inputs
[
i
].
dtype
;
return
false
;
}
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework
::
LoD
lod
;
for
(
auto
&
level
:
inputs
[
i
].
lod
)
{
lod
.
emplace_back
(
level
);
}
input
.
set_lod
(
lod
);
int
idx
=
-
1
;
if
(
config_
.
specify_input_name
)
{
idx
=
feed_names_
[
inputs
[
i
].
name
];
}
else
{
idx
=
boost
::
get
<
int
>
(
feeds_
[
i
]
->
GetAttr
(
"col"
));
}
framework
::
SetFeedVariable
(
scope
,
input
,
"feed"
,
idx
);
}
return
true
;
}
template
<
typename
T
>
void
AnalysisPredictor
::
GetFetchOne
(
const
framework
::
LoDTensor
&
fetch
,
PaddleTensor
*
output
)
{
// set shape.
auto
shape
=
framework
::
vectorize
(
fetch
.
dims
());
output
->
shape
.
assign
(
shape
.
begin
(),
shape
.
end
());
// set data.
const
T
*
data
=
fetch
.
data
<
T
>
();
int
num_elems
=
inference
::
VecReduceToInt
(
shape
);
output
->
data
.
Resize
(
num_elems
*
sizeof
(
T
));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy
(
output
->
data
.
data
(),
data
,
num_elems
*
sizeof
(
T
));
// set lod
output
->
lod
.
clear
();
for
(
auto
&
level
:
fetch
.
lod
())
{
output
->
lod
.
emplace_back
(
level
.
begin
(),
level
.
end
());
}
}
bool
AnalysisPredictor
::
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
outputs
,
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"Predictor::get_fetch"
;
outputs
->
resize
(
fetchs_
.
size
());
for
(
size_t
i
=
0
;
i
<
fetchs_
.
size
();
++
i
)
{
int
idx
=
boost
::
get
<
int
>
(
fetchs_
[
i
]
->
GetAttr
(
"col"
));
PADDLE_ENFORCE
((
size_t
)
idx
==
i
);
framework
::
LoDTensor
&
fetch
=
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
);
auto
type
=
fetch
.
type
();
auto
output
=
&
(
outputs
->
at
(
i
));
if
(
type
==
typeid
(
float
))
{
GetFetchOne
<
float
>
(
fetch
,
output
);
output
->
dtype
=
PaddleDType
::
FLOAT32
;
}
else
if
(
type
==
typeid
(
int64_t
))
{
GetFetchOne
<
int64_t
>
(
fetch
,
output
);
output
->
dtype
=
PaddleDType
::
INT64
;
}
else
{
LOG
(
ERROR
)
<<
"unknown type, only support float32 and int64 now."
;
}
}
return
true
;
return
true
;
}
}
...
@@ -107,6 +212,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -107,6 +212,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
new
std
::
string
(
config_
.
prog_file
));
new
std
::
string
(
config_
.
prog_file
));
argument_
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
argument_
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
}
}
argument_
.
origin_program_desc
.
reset
(
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
...
@@ -127,9 +233,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -127,9 +233,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
}
template
<
>
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
const
contrib
::
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
VLOG
(
3
)
<<
"create AnalysisConfig"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
// 1. GPU memeroy
...
@@ -150,15 +255,90 @@ CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
...
@@ -150,15 +255,90 @@ CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
}
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
return
nullptr
;
return
nullptr
;
}
}
return
predictor
;
return
predictor
;
}
}
void
AnalysisPredictor
::
PrepareFeedFetch
()
{
for
(
auto
*
op
:
inference_program_
->
Block
(
0
).
AllOps
())
{
if
(
op
->
Type
()
==
"feed"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
if
(
feeds_
.
size
()
<=
static_cast
<
size_t
>
(
idx
))
{
feeds_
.
resize
(
idx
+
1
);
}
feeds_
[
idx
]
=
op
;
feed_names_
[
op
->
Output
(
"Out"
)[
0
]]
=
idx
;
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
if
(
fetchs_
.
size
()
<=
static_cast
<
size_t
>
(
idx
))
{
fetchs_
.
resize
(
idx
+
1
);
}
fetchs_
[
idx
]
=
op
;
}
}
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetInputTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
std
::
unique_ptr
<
ZeroCopyTensor
>
res
(
new
ZeroCopyTensor
(
static_cast
<
void
*>
(
executor_
->
scope
())));
res
->
input_or_output_
=
true
;
res
->
SetName
(
name
);
return
res
;
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetOutputTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
std
::
unique_ptr
<
ZeroCopyTensor
>
res
(
new
ZeroCopyTensor
(
static_cast
<
void
*>
(
executor_
->
scope
())));
res
->
input_or_output_
=
false
;
res
->
SetName
(
name
);
return
res
;
}
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
executor_
->
Run
();
return
true
;
}
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
std
::
unique_ptr
<
framework
::
Executor
>
tmp_exe
(
new
framework
::
Executor
(
platform
::
CPUPlace
()));
if
(
!
config_
.
model_dir
.
empty
())
{
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
config_
.
param_file
);
return
false
;
}
return
true
;
}
std
::
unique_ptr
<
PaddlePredictor
>
AnalysisPredictor
::
Clone
()
{
auto
*
x
=
new
AnalysisPredictor
(
config_
);
x
->
Init
(
scope_
,
inference_program_
);
return
std
::
unique_ptr
<
PaddlePredictor
>
(
x
);
}
template
<
>
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
const
contrib
::
AnalysisConfig
&
config
)
{
const
contrib
::
AnalysisConfig
&
config
)
{
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
PaddleEngineKind
::
kAnalysis
>
(
config
);
}
}
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
b31905c5
...
@@ -12,42 +12,81 @@
...
@@ -12,42 +12,81 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
paddle
{
using
inference
::
analysis
::
Argument
;
using
inference
::
analysis
::
Argument
;
using
inference
::
analysis
::
Analyzer
;
using
inference
::
analysis
::
Analyzer
;
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
NaiveExecutor
;
using
contrib
::
AnalysisConfig
;
/* This predictor is based on the original native predictor with IR and Analysis
/* This predictor is based on the original native predictor with IR and Analysis
* support. It will optimize IR and Parameters in the runtime.
* support. It will optimize IR and Parameters in the runtime.
* TODO(Superjomn) Replace the Navive predictor?
* TODO(Superjomn) Replace the Navive predictor?
*/
*/
class
AnalysisPredictor
:
public
Native
PaddlePredictor
{
class
AnalysisPredictor
:
public
PaddlePredictor
{
public:
public:
explicit
AnalysisPredictor
(
const
contrib
::
AnalysisConfig
&
config
)
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{}
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
);
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
=
nullptr
);
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
-
1
)
override
{
int
batch_size
=
-
1
)
override
;
return
NativePaddlePredictor
::
Run
(
inputs
,
output_data
,
batch_size
);
}
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
override
;
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
override
;
bool
ZeroCopyRun
()
override
;
void
PrepareFeedFetch
();
void
OptimizeInferenceProgram
();
void
OptimizeInferenceProgram
();
Argument
&
analysis_argument
()
{
return
argument_
;
}
Argument
&
analysis_argument
()
{
return
argument_
;
}
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
framework
::
Scope
*
scope
()
{
return
executor_
->
scope
();
}
framework
::
ProgramDesc
&
program
()
{
return
*
inference_program_
;
}
protected:
bool
LoadProgramDesc
();
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
bool
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
output_data
,
framework
::
Scope
*
scope
);
template
<
typename
T
>
void
GetFetchOne
(
const
framework
::
LoDTensor
&
fetchs
,
PaddleTensor
*
output_data
);
private:
private:
contrib
::
AnalysisConfig
config_
;
contrib
::
AnalysisConfig
config_
;
Argument
argument_
;
Argument
argument_
;
std
::
unique_ptr
<
NaiveExecutor
>
executor_
;
platform
::
Place
place_
;
std
::
shared_ptr
<
framework
::
Scope
>
scope_
;
framework
::
Scope
*
sub_scope_
{
nullptr
};
std
::
shared_ptr
<
framework
::
ProgramDesc
>
inference_program_
;
std
::
vector
<
framework
::
OpDesc
*>
feeds_
;
std
::
map
<
std
::
string
,
size_t
>
feed_names_
;
std
::
vector
<
framework
::
OpDesc
*>
fetchs_
;
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, so cache them.
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor_tester.cc
0 → 100644
浏览文件 @
b31905c5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string
(
dirname
,
""
,
"dirname to tests."
);
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"/word2vec.inference.model"
;
config
.
use_feed_fetch_ops
=
false
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
auto
w1
=
predictor
->
GetInputTensor
(
"secondw"
);
auto
w2
=
predictor
->
GetInputTensor
(
"thirdw"
);
auto
w3
=
predictor
->
GetInputTensor
(
"forthw"
);
w0
->
Reshape
({
4
,
1
});
w1
->
Reshape
({
4
,
1
});
w2
->
Reshape
({
4
,
1
});
w3
->
Reshape
({
4
,
1
});
auto
*
w0_data
=
w0
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w1_data
=
w1
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w2_data
=
w2
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w3_data
=
w3
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
w0_data
[
i
]
=
i
;
w1_data
[
i
]
=
i
;
w2_data
[
i
]
=
i
;
w3_data
[
i
]
=
i
;
}
predictor
->
ZeroCopyRun
();
auto
out
=
predictor
->
GetOutputTensor
(
"fc_1.tmp_2"
);
PaddlePlace
place
;
int
size
=
0
;
auto
*
out_data
=
out
->
data
<
float
>
(
&
place
,
&
size
);
LOG
(
INFO
)
<<
"output size: "
<<
size
/
sizeof
(
float
);
LOG
(
INFO
)
<<
"output_data: "
<<
out_data
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/api.cc
浏览文件 @
b31905c5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
//
you may not use this file except in compliance with the License.
// Licensed under the Apache License, Version 2.0 (the "License");
You may obtain a copy of the License at
// you may not use this file except in compliance with the License.
http://www.apache.org/licenses/LICENSE-2.0
// You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
//
distributed under the License is distributed on an "AS IS" BASIS,
// http://www.apache.org/licenses/LICENSE-2.0
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
See the License for the specific language governing permissions and
// Unless required by applicable law or agreed to in writing, software
limitations under the License. */
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle_inference_api.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -26,7 +32,7 @@ int PaddleDtypeSize(PaddleDType dtype) {
...
@@ -26,7 +32,7 @@ int PaddleDtypeSize(PaddleDType dtype) {
}
}
}
}
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
:
data_
(
other
.
data_
),
:
data_
(
other
.
data_
),
length_
(
other
.
length_
),
length_
(
other
.
length_
),
memory_owned_
(
other
.
memory_owned_
)
{
memory_owned_
(
other
.
memory_owned_
)
{
...
@@ -35,9 +41,9 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
...
@@ -35,9 +41,9 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
other
.
length_
=
0
;
other
.
length_
=
0
;
}
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
if
(
!
other
.
memory_owned_
)
{
if
(
!
other
.
memory_owned_
)
{
data_
=
other
.
data_
;
data_
=
other
.
data_
;
length_
=
other
.
length_
;
length_
=
other
.
length_
;
...
@@ -51,7 +57,7 @@ PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
...
@@ -51,7 +57,7 @@ PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
return
*
this
;
return
*
this
;
}
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
// only the buffer with external memory can be copied
// only the buffer with external memory can be copied
data_
=
other
.
data_
;
data_
=
other
.
data_
;
length_
=
other
.
length_
;
length_
=
other
.
length_
;
...
@@ -75,7 +81,7 @@ void PaddleBuf::Resize(size_t length) {
...
@@ -75,7 +81,7 @@ void PaddleBuf::Resize(size_t length) {
}
}
}
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
Free
();
Free
();
memory_owned_
=
false
;
memory_owned_
=
false
;
data_
=
data
;
data_
=
data
;
...
@@ -85,7 +91,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
...
@@ -85,7 +91,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
void
PaddleBuf
::
Free
()
{
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
if
(
memory_owned_
&&
data_
)
{
PADDLE_ENFORCE_GT
(
length_
,
0
);
PADDLE_ENFORCE_GT
(
length_
,
0
);
free
(
static_cast
<
char
*>
(
data_
));
free
(
static_cast
<
char
*>
(
data_
));
data_
=
nullptr
;
data_
=
nullptr
;
length_
=
0
;
length_
=
0
;
}
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
b31905c5
...
@@ -145,7 +145,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -145,7 +145,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
VLOG
(
4
)
<<
"Run prepared context"
;
VLOG
(
4
)
<<
"Run prepared context"
;
executor_
->
RunPreparedContext
(
ctx_
.
get
(),
scope
,
executor_
->
RunPreparedContext
(
ctx_
.
get
(),
scope
,
false
,
/* don't create local scope each time*/
false
,
/* don't create local scope each time*/
false
/* don't create variable ea
t
ch time */
);
false
/* don't create variable each time */
);
VLOG
(
4
)
<<
"Finish prepared context"
;
VLOG
(
4
)
<<
"Finish prepared context"
;
// get fetch variable
// get fetch variable
if
(
!
GetFetch
(
output_data
,
scope
))
{
if
(
!
GetFetch
(
output_data
,
scope
))
{
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
b31905c5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
...
@@ -30,6 +30,8 @@
...
@@ -30,6 +30,8 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -52,6 +54,8 @@ class NativePaddlePredictor : public PaddlePredictor {
...
@@ -52,6 +54,8 @@ class NativePaddlePredictor : public PaddlePredictor {
~
NativePaddlePredictor
()
override
;
~
NativePaddlePredictor
()
override
;
framework
::
Scope
*
scope
()
{
return
sub_scope_
?
sub_scope_
:
scope_
.
get
();
}
protected:
protected:
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
framework
::
Scope
*
scope
);
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
b31905c5
...
@@ -43,7 +43,7 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
...
@@ -43,7 +43,7 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
NativeConfig
GetConfig
()
{
NativeConfig
GetConfig
()
{
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
model_dir
=
FLAGS_dirname
+
"
/
word2vec.inference.model"
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -110,7 +110,7 @@ void MainImageClassification(bool use_gpu) {
...
@@ -110,7 +110,7 @@ void MainImageClassification(bool use_gpu) {
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
FLAGS_dirname
+
"
/
image_classification_resnet.inference.model"
;
const
bool
is_combined
=
false
;
const
bool
is_combined
=
false
;
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
...
@@ -214,7 +214,7 @@ void MainThreadsImageClassification(bool use_gpu) {
...
@@ -214,7 +214,7 @@ void MainThreadsImageClassification(bool use_gpu) {
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
FLAGS_dirname
+
"
/
image_classification_resnet.inference.model"
;
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
0 → 100644
浏览文件 @
b31905c5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
void
ZeroCopyTensor
::
Reshape
(
const
std
::
vector
<
int
>
&
shape
)
{
PADDLE_ENFORCE
(
!
name_
.
empty
(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved."
);
PADDLE_ENFORCE
(
input_or_output_
,
"Can't reshape the output tensor, it is readonly"
);
PADDLE_ENFORCE
(
scope_
);
auto
*
scope
=
static_cast
<
framework
::
Scope
*>
(
scope_
);
auto
*
var
=
scope
->
FindVar
(
name_
);
PADDLE_ENFORCE
(
var
,
"No tensor called [%s] in the runtime scope"
,
name_
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
framework
::
make_ddim
(
shape
));
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
switch
(
static_cast
<
int
>
(
place
))
{
case
static_cast
<
int
>
(
PaddlePlace
::
kCPU
):
{
return
tensor
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
}
case
static_cast
<
int
>
(
PaddlePlace
::
kGPU
):
{
return
tensor
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
}
default:
PADDLE_THROW
(
"Unsupported place: %d"
,
static_cast
<
int
>
(
place
));
break
;
}
return
nullptr
;
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
auto
*
res
=
tensor
->
data
<
T
>
();
if
(
platform
::
is_cpu_place
(
tensor
->
place
()))
{
*
place
=
PaddlePlace
::
kCPU
;
}
else
if
(
platform
::
is_gpu_place
(
tensor
->
place
()))
{
*
place
=
PaddlePlace
::
kGPU
;
}
else
{
*
place
=
PaddlePlace
::
kUNK
;
}
*
size
=
tensor
->
numel
();
return
res
;
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
mutable_data
<
float
>(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
<
int64_t
>(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
PADDLE_ENFORCE
(
!
name_
.
empty
(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved."
);
PADDLE_ENFORCE
(
scope_
);
auto
*
scope
=
static_cast
<
framework
::
Scope
*>
(
scope_
);
auto
*
var
=
scope
->
FindVar
(
name_
);
PADDLE_ENFORCE
(
var
,
"No tensor called [%s] in the runtime scope"
,
name_
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
return
tensor
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
PADDLE_ENFORCE
(
tensor
,
"not found tensor called %s in the scope"
,
name_
);
return
framework
::
vectorize
(
tensor
->
dims
());
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
framework
::
LoD
lod
;
for
(
auto
&
level
:
x
)
{
lod
.
emplace_back
(
level
);
}
tensor
->
set_lod
(
lod
);
}
std
::
vector
<
std
::
vector
<
size_t
>>
ZeroCopyTensor
::
lod
()
const
{
std
::
vector
<
std
::
vector
<
size_t
>>
res
;
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
for
(
auto
&
level
:
tensor
->
lod
())
{
res
.
emplace_back
(
level
);
}
return
res
;
}
}
// namespace paddle
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
0 → 100644
浏览文件 @
b31905c5
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
void
ZeroCopyTensor
::
Reshape
(
const
std
::
vector
<
int
>
&
shape
)
{}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
)
{
return
nullptr
;
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
return
nullptr
;
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
return
nullptr
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
return
{};
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{}
std
::
vector
<
std
::
vector
<
size_t
>>
ZeroCopyTensor
::
lod
()
const
{
return
std
::
vector
<
std
::
vector
<
size_t
>>
();
}
}
// namespace paddle
paddle/fluid/inference/api/helper.h
浏览文件 @
b31905c5
...
@@ -21,8 +21,10 @@
...
@@ -21,8 +21,10 @@
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -93,6 +95,20 @@ static void TensorAssignData(PaddleTensor *tensor,
...
@@ -93,6 +95,20 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
}
}
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
int
size
{
0
};
auto
*
ptr
=
tensor
->
mutable_data
<
T
>
(
PaddlePlace
::
kCPU
);
int
c
=
0
;
for
(
const
auto
&
f
:
data
)
{
for
(
T
v
:
f
)
{
ptr
[
c
++
]
=
v
;
}
}
return
size
;
}
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
...
@@ -138,5 +154,127 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
...
@@ -138,5 +154,127 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
}
}
}
}
template
<
typename
T
>
std
::
string
LoDTensorSummary
(
const
framework
::
LoDTensor
&
tensor
)
{
std
::
stringstream
ss
;
ss
<<
"
\n
---- tensor ---"
<<
'\n'
;
ss
<<
"lod: ["
;
for
(
const
auto
&
level
:
tensor
.
lod
())
{
ss
<<
"[ "
;
for
(
auto
i
:
level
)
{
ss
<<
i
<<
", "
;
}
ss
<<
"]"
;
}
ss
<<
"]
\n
"
;
ss
<<
"shape: ["
;
int
size
=
1
;
for
(
int
i
=
0
;
i
<
tensor
.
dims
().
size
();
i
++
)
{
int
dim
=
tensor
.
dims
()[
i
];
ss
<<
dim
<<
", "
;
size
*=
dim
;
}
ss
<<
"]
\n
"
;
ss
<<
"data: "
;
for
(
int
i
=
0
;
i
<
std
::
min
(
20
,
size
);
i
++
)
{
ss
<<
tensor
.
data
<
T
>
()[
i
]
<<
" "
;
}
ss
<<
"
\n
"
;
return
ss
.
str
();
}
static
bool
CompareLoD
(
const
framework
::
LoD
&
a
,
const
framework
::
LoD
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"lod size not match %d != %d"
,
a
.
size
(),
b
.
size
());
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
auto
&
al
=
a
[
i
];
auto
&
bl
=
b
[
i
];
if
(
al
.
size
()
!=
bl
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"level size %d != %d"
,
al
.
size
(),
bl
.
size
());
return
false
;
}
}
return
true
;
}
static
bool
CompareShape
(
const
std
::
vector
<
int64_t
>
&
a
,
const
std
::
vector
<
int64_t
>
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"shape size not match %d != %d"
,
a
.
size
(),
b
.
size
());
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
if
(
a
[
i
]
!=
b
[
i
])
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"shape %d-th element not match %d != %d"
,
i
,
a
[
i
],
b
[
i
]);
return
false
;
}
}
return
true
;
}
static
bool
CompareTensorData
(
const
framework
::
LoDTensor
&
a
,
const
framework
::
LoDTensor
&
b
)
{
auto
a_shape
=
framework
::
vectorize
(
a
.
dims
());
auto
b_shape
=
framework
::
vectorize
(
b
.
dims
());
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
if
(
a_size
!=
b_size
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data size not match, %d != %d"
,
a_size
,
b_size
);
}
for
(
size_t
i
=
0
;
i
<
a_size
;
i
++
)
{
if
(
a
.
type
()
==
typeid
(
float
))
{
const
auto
*
a_data
=
a
.
data
<
float
>
();
const
auto
*
b_data
=
b
.
data
<
float
>
();
if
(
std
::
abs
(
a_data
[
i
]
-
b_data
[
i
])
>
1e-3
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data %d-th element not match, %f != %f"
,
i
,
a_data
[
i
],
b_data
[
i
]);
return
false
;
}
}
else
if
(
a
.
type
()
==
typeid
(
int64_t
))
{
const
auto
*
a_data
=
a
.
data
<
int64_t
>
();
const
auto
*
b_data
=
b
.
data
<
int64_t
>
();
if
(
std
::
abs
(
a_data
[
i
]
-
b_data
[
i
])
>
1e-3
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data %d-th element not match, %f != %f"
,
i
,
a_data
[
i
],
b_data
[
i
]);
return
false
;
}
}
}
return
true
;
}
static
bool
CompareTensor
(
const
framework
::
LoDTensor
&
a
,
const
framework
::
LoDTensor
&
b
)
{
if
(
!
CompareLoD
(
a
.
lod
(),
b
.
lod
()))
{
return
false
;
}
if
(
!
CompareShape
(
framework
::
vectorize
(
a
.
dims
()),
framework
::
vectorize
(
b
.
dims
())))
{
return
false
;
}
if
(
!
CompareTensorData
(
a
,
b
))
{
return
false
;
}
return
true
;
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
b31905c5
...
@@ -101,6 +101,40 @@ struct PaddleTensor {
...
@@ -101,6 +101,40 @@ struct PaddleTensor {
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
};
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
// Tensor without copy, currently only supports AnalysisPredictor.
class
ZeroCopyTensor
{
public:
void
Reshape
(
const
std
::
vector
<
int
>&
shape
);
// Get the memory in CPU or GPU with specific data type, should Reshape first
// to tell the data size.
// Once can directly call this data to feed the data.
// This is for write the input tensor.
template
<
typename
T
>
T
*
mutable_data
(
PaddlePlace
place
);
// Get the memory directly, will return the place and memory size by pointer.
// This is for reading the output tensor.
template
<
typename
T
>
T
*
data
(
PaddlePlace
*
place
,
int
*
size
);
std
::
vector
<
int64_t
>
shape
();
void
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
x
);
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
protected:
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
*
FindTensor
()
const
;
private:
std
::
string
name_
;
bool
input_or_output_
;
friend
class
AnalysisPredictor
;
void
*
scope_
{
nullptr
};
};
/*
/*
* A simple Inference API for Paddle.
* A simple Inference API for Paddle.
*/
*/
...
@@ -120,6 +154,19 @@ class PaddlePredictor {
...
@@ -120,6 +154,19 @@ class PaddlePredictor {
std
::
vector
<
PaddleTensor
>*
output_data
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
int
batch_size
=
-
1
)
=
0
;
// Zero copy input and output optimization.
// Get the input or output tensors, and operate on their memory directly,
// without copy.
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
bool
ZeroCopyRun
()
{
return
false
;
}
// Clone a predictor that share the model weights, the Cloned predictor should
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
// be thread-safe.
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
...
@@ -218,7 +265,12 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -218,7 +265,12 @@ struct AnalysisConfig : public NativeConfig {
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
std
::
vector
<
std
::
string
>
ir_passes
;
std
::
vector
<
std
::
string
>
ir_passes
;
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
bool
use_feed_fetch_ops
{
true
};
// NOTE this is just for internal development, please not use it. NOT
// stable
// yet.
bool
_use_mkldnn
{
false
};
bool
_use_mkldnn
{
false
};
};
};
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
b31905c5
...
@@ -18,6 +18,8 @@ namespace paddle {
...
@@ -18,6 +18,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
int64_t
>
data
;
std
::
vector
<
int64_t
>
data
;
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
size_t
>
lod
;
...
@@ -78,6 +80,7 @@ struct DataRecord {
...
@@ -78,6 +80,7 @@ struct DataRecord {
}
}
}
}
}
}
DataRecord
NextBatch
()
{
DataRecord
NextBatch
()
{
DataRecord
data
;
DataRecord
data
;
data
.
data
=
batched_datas
[
batch_iter
];
data
.
data
=
batched_datas
[
batch_iter
];
...
@@ -155,7 +158,9 @@ TEST(Analyzer_LAC, fuse_statis) {
...
@@ -155,7 +158,9 @@ TEST(Analyzer_LAC, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
b31905c5
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word_data_all
,
mention_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
word_data_all
,
mention_data_all
;
...
@@ -145,7 +146,9 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
...
@@ -145,7 +146,9 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
b31905c5
...
@@ -12,12 +12,16 @@
...
@@ -12,12 +12,16 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_bool
(
with_precision_check
,
true
,
"turn on test"
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
using
namespace
framework
;
// NOLINT
using
namespace
framework
;
// NOLINT
using
namespace
contrib
;
// NOLINT
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
...
@@ -29,10 +33,12 @@ struct DataRecord {
...
@@ -29,10 +33,12 @@ struct DataRecord {
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
:
batch_size
(
batch_size
)
{
Load
(
path
);
Load
(
path
);
}
}
DataRecord
NextBatch
()
{
DataRecord
NextBatch
()
{
DataRecord
data
;
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
size_t
batch_end
=
batch_iter
+
batch_size
;
...
@@ -101,6 +107,7 @@ struct DataRecord {
...
@@ -101,6 +107,7 @@ struct DataRecord {
num_samples
=
num_lines
;
num_samples
=
num_lines
;
}
}
};
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
int
batch_size
)
{
PaddleTensor
lod_attention_tensor
,
init_zero_tensor
,
lod_tensor_tensor
,
PaddleTensor
lod_attention_tensor
,
init_zero_tensor
,
lod_tensor_tensor
,
...
@@ -149,7 +156,55 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -149,7 +156,55 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
}
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
void
PrepareZeroCopyInputs
(
ZeroCopyTensor
*
lod_attention_tensor
,
ZeroCopyTensor
*
cell_init_tensor
,
ZeroCopyTensor
*
data_tensor
,
ZeroCopyTensor
*
hidden_init_tensor
,
ZeroCopyTensor
*
week_tensor
,
ZeroCopyTensor
*
minute_tensor
,
DataRecord
*
data_record
,
int
batch_size
)
{
auto
one_batch
=
data_record
->
NextBatch
();
std
::
vector
<
int
>
rnn_link_data_shape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_link_data
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_link_data
.
front
().
size
())});
lod_attention_tensor
->
Reshape
({
1
,
2
});
lod_attention_tensor
->
SetLoD
({
one_batch
.
lod1
,
one_batch
.
lod2
});
cell_init_tensor
->
Reshape
({
batch_size
,
15
});
cell_init_tensor
->
SetLoD
({
one_batch
.
lod3
});
hidden_init_tensor
->
Reshape
({
batch_size
,
15
});
hidden_init_tensor
->
SetLoD
({
one_batch
.
lod3
});
data_tensor
->
Reshape
(
rnn_link_data_shape
);
data_tensor
->
SetLoD
({
one_batch
.
lod1
});
week_tensor
->
Reshape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_week_datas
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_week_datas
.
front
().
size
())});
week_tensor
->
SetLoD
({
one_batch
.
lod3
});
minute_tensor
->
Reshape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_minute_datas
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_minute_datas
.
front
().
size
())});
minute_tensor
->
SetLoD
({
one_batch
.
lod3
});
// assign data
float
arr0
[]
=
{
0
,
0
};
std
::
vector
<
float
>
zeros
(
batch_size
*
15
,
0
);
std
::
copy_n
(
arr0
,
2
,
lod_attention_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
arr0
,
2
,
data_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
zeros
.
begin
(),
zeros
.
size
(),
cell_init_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
zeros
.
begin
(),
zeros
.
size
(),
hidden_init_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
ZeroCopyTensorAssignData
(
data_tensor
,
one_batch
.
rnn_link_data
);
ZeroCopyTensorAssignData
(
week_tensor
,
one_batch
.
rnn_week_datas
);
ZeroCopyTensorAssignData
(
minute_tensor
,
one_batch
.
rnn_minute_datas
);
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
use_gpu
=
false
;
...
@@ -187,7 +242,9 @@ TEST(Analyzer_rnn1, fuse_statis) {
...
@@ -187,7 +242,9 @@ TEST(Analyzer_rnn1, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
...
@@ -214,7 +271,229 @@ TEST(Analyzer_rnn1, multi_thread) {
...
@@ -214,7 +271,229 @@ TEST(Analyzer_rnn1, multi_thread) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* num_threads */
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
bool
CompareTensors
(
framework
::
Scope
&
a_scope
,
framework
::
Scope
&
b_scope
,
const
std
::
vector
<
std
::
string
>
&
tensors
)
{
for
(
auto
&
x
:
tensors
)
{
auto
*
a_var
=
a_scope
.
FindVar
(
x
);
auto
*
b_var
=
b_scope
.
FindVar
(
x
);
if
(
a_var
&&
b_var
)
{
if
(
a_var
->
Type
()
==
typeid
(
framework
::
LoDTensor
)
||
a_var
->
Type
()
==
typeid
(
framework
::
Tensor
))
{
LOG
(
INFO
)
<<
"comparing tensor "
<<
x
;
auto
&
a_t
=
a_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
b_t
=
b_var
->
Get
<
framework
::
LoDTensor
>
();
if
(
!
inference
::
CompareTensor
(
a_t
,
b_t
))
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor %s not match in two scopes"
,
x
);
}
}
else
{
LOG
(
INFO
)
<<
"skip no tensor "
<<
x
;
}
}
else
{
LOG
(
INFO
)
<<
"skip tensor "
<<
x
;
}
}
return
true
;
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// on the complex RNN1 model.
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
PaddlePlace
place
;
int
output_size
{
0
};
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
// the analysis predictor needs feed/fetch.
auto
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
week
);
NEW_TENSOR
(
minute
);
NEW_TENSOR
(
hidden_init
);
// Prepare data for AnalysisPredictor
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareZeroCopyInputs
(
data_lod_attention_tensor
.
get
(),
cell_init_tensor
.
get
(),
data_tensor
.
get
(),
hidden_init_tensor
.
get
(),
week_tensor
.
get
(),
minute_tensor
.
get
(),
&
data
,
FLAGS_batch_size
);
// Prepare data for NativePredictor
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
native_inputs
;
SetInput
(
&
native_inputs
);
std
::
vector
<
PaddleTensor
>
native_outputs
;
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
auto
output_tensor
=
predictor
->
GetOutputTensor
(
"final_output.tmp_1"
);
// Run analysis predictor
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
ASSERT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
ASSERT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
ASSERT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
Timer
timer
;
double
total_time
{
0
};
double
native_total_time
{
0
};
double
analysis_total_time
{
0.
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
}
auto
*
output_data
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
ASSERT_GT
(
output_size
,
0
);
// more than one output!
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
// Run native predictor.
timer
.
tic
();
ASSERT_TRUE
(
native_predictor
->
Run
(
native_inputs
.
front
(),
&
native_outputs
));
native_total_time
+=
timer
.
toc
();
}
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
ASSERT_TRUE
(
analysis_predictor
->
Run
(
native_inputs
.
front
(),
&
analysis_outputs
));
analysis_total_time
+=
timer
.
toc
();
}
if
(
!
FLAGS_with_precision_check
)
{
return
;
}
int
native_output_size
=
VecReduceToInt
(
native_outputs
.
front
().
shape
);
EXPECT_EQ
(
native_output_size
,
output_size
);
// Compare tensors between analysis and zerocopy
auto
*
p0
=
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
*
p1
=
static_cast
<
AnalysisPredictor
*>
(
analysis_predictor
.
get
());
auto
*
p2
=
static_cast
<
NativePaddlePredictor
*>
(
native_predictor
.
get
());
std
::
vector
<
std
::
string
>
tensor_names
;
for
(
auto
&
var_desc
:
p0
->
program
().
Block
(
0
).
AllVars
())
{
tensor_names
.
push_back
(
var_desc
->
Name
());
}
LOG
(
INFO
)
<<
"Comparing tensors"
;
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p1
->
scope
(),
{
"final_output.tmp_1"
}));
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p2
->
scope
(),
{
"final_output.tmp_1"
}));
LOG
(
INFO
)
<<
"output1 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p0
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output2 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p1
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output3 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p2
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
for
(
int
i
=
0
;
i
<
output_size
;
i
++
)
{
LOG
(
INFO
)
<<
output_data
[
i
]
<<
" "
<<
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
]
<<
" "
<<
static_cast
<
float
*>
(
analysis_outputs
.
front
().
data
.
data
())[
i
];
EXPECT_NEAR
(
output_data
[
i
],
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
],
1e-3
);
}
LOG
(
INFO
)
<<
"batch_size: "
<<
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"zero average time: "
<<
total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"analysis average time: "
<<
analysis_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"native average time: "
<<
native_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
}
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
config
,
&
total_time_of_threads
,
&
predictors
,
tid
]
{
// auto predictor = base_predictor->Clone();
auto
&
predictor
=
predictors
[
tid
];
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
week
);
NEW_TENSOR
(
minute
);
NEW_TENSOR
(
hidden_init
);
// Prepare data for AnalysisPredictor
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
Timer
timer
;
double
total_time
{
0
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
PrepareZeroCopyInputs
(
data_lod_attention_tensor
.
get
(),
cell_init_tensor
.
get
(),
data_tensor
.
get
(),
hidden_init_tensor
.
get
(),
week_tensor
.
get
(),
minute_tensor
.
get
(),
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
}
total_time_of_threads
+=
total_time
;
LOG
(
INFO
)
<<
"thread time: "
<<
total_time
/
FLAGS_repeat
;
});
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
LOG
(
INFO
)
<<
"average time: "
<<
total_time_of_threads
/
FLAGS_num_threads
/
FLAGS_repeat
;
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
b31905c5
...
@@ -182,7 +182,8 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
...
@@ -182,7 +182,8 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
}
}
// Compare result of NativeConfig and AnalysisConfig
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
b31905c5
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
contrib
::
AnalysisConfig
;
struct
Record
{
struct
Record
{
std
::
vector
<
float
>
data
;
std
::
vector
<
float
>
data
;
...
@@ -114,7 +115,8 @@ TEST(Analyzer_vis, fuse_statis) {
...
@@ -114,7 +115,8 @@ TEST(Analyzer_vis, fuse_statis) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
}
}
// Compare result of NativeConfig and AnalysisConfig
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
b31905c5
...
@@ -86,11 +86,9 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
...
@@ -86,11 +86,9 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
PaddlePredictor
*
predictor
,
int
*
num_ops
)
{
int
*
num_ops
)
{
auto
predictor
=
CreateTestPredictor
(
config
);
auto
*
analysis_predictor
=
static_cast
<
AnalysisPredictor
*>
(
predictor
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
framework
::
ir
::
kFuseStatisAttr
);
...
...
paddle/fluid/memory/malloc.cc
浏览文件 @
b31905c5
...
@@ -36,6 +36,8 @@ namespace memory {
...
@@ -36,6 +36,8 @@ namespace memory {
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
...
@@ -48,6 +50,25 @@ BuddyAllocator* GetCPUBuddyAllocator() {
...
@@ -48,6 +50,25 @@ BuddyAllocator* GetCPUBuddyAllocator() {
return
a
;
return
a
;
}
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
)
{
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
...
...
paddle/fluid/string/pretty_log.h
浏览文件 @
b31905c5
...
@@ -56,13 +56,13 @@ struct Style {
...
@@ -56,13 +56,13 @@ struct Style {
};
};
template
<
typename
...
Args
>
template
<
typename
...
Args
>
static
void
PrettyLogEndl
(
const
std
::
string
&
style
,
const
char
*
fmt
,
static
void
PrettyLogEndl
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
const
Args
&
...
args
)
{
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
()
<<
std
::
endl
;
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
()
<<
std
::
endl
;
}
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
static
void
PrettyLog
(
const
std
::
string
&
style
,
const
char
*
fmt
,
static
void
PrettyLog
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
const
Args
&
...
args
)
{
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
();
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
();
}
}
...
...
python/CMakeLists.txt
浏览文件 @
b31905c5
...
@@ -87,6 +87,7 @@ if (WITH_TESTING)
...
@@ -87,6 +87,7 @@ if (WITH_TESTING)
endif
()
endif
()
endif
()
endif
()
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/contrib/tests
)
endif
()
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
b31905c5
...
@@ -20,8 +20,11 @@ from . import memory_usage_calc
...
@@ -20,8 +20,11 @@ from . import memory_usage_calc
from
.memory_usage_calc
import
*
from
.memory_usage_calc
import
*
from
.
import
op_frequence
from
.
import
op_frequence
from
.op_frequence
import
*
from
.op_frequence
import
*
from
.
import
quantize
from
.quantize
import
*
__all__
=
[]
__all__
=
[]
__all__
+=
decoder
.
__all__
__all__
+=
decoder
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
python/paddle/fluid/contrib/quantize/__init__.py
0 → 100644
浏览文件 @
b31905c5
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
.
import
quantize_transpiler
from
.quantize_transpiler
import
*
__all__
=
quantize_transpiler
.
__all__
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
0 → 100644
浏览文件 @
b31905c5
此差异已折叠。
点击以展开。
python/paddle/fluid/contrib/tests/CMakeLists.txt
0 → 100644
浏览文件 @
b31905c5
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
0 → 100644
浏览文件 @
b31905c5
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import
numpy
as
np
import
six
import
unittest
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.contrib.quantize.quantize_transpiler
import
_original_var_name
from
paddle.fluid.contrib.quantize.quantize_transpiler
import
QuantizeTranspiler
def
linear_fc
(
num
):
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
128
,
act
=
'relu'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
hidden
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
def
residual_block
(
num
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
,
bias_attr
=
False
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
bias_attr
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
conv
=
conv_bn_layer
(
hidden
,
16
,
3
,
1
,
1
,
act
=
None
,
bias_attr
=
True
)
short
=
conv_bn_layer
(
hidden
,
16
,
1
,
1
,
0
,
act
=
None
)
hidden
=
fluid
.
layers
.
elementwise_add
(
x
=
conv
,
y
=
short
,
act
=
'relu'
)
fc
=
fluid
.
layers
.
fc
(
input
=
hidden
,
size
=
10
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
fc
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
def
conv_net
(
img
,
label
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
img
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_1
=
fluid
.
layers
.
batch_norm
(
conv_pool_1
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
return
avg_loss
class
TestQuantizeTranspiler
(
unittest
.
TestCase
):
def
setUp
(
self
):
# since quant_op and dequant_op is not ready, use cos and sin for test
self
.
weight_quant_op_type
=
'fake_quantize_abs_max'
self
.
dequant_op_type
=
'fake_dequantize_max_abs'
self
.
quantizable_op_and_inputs
=
{
'conv2d'
:
[
'Input'
,
'Filter'
],
'depthwise_conv2d'
:
[
'Input'
,
'Filter'
],
'mul'
:
[
'X'
,
'Y'
]
}
self
.
quantizable_op_grad_and_inputs
=
{
'conv2d_grad'
:
[
'Input'
,
'Filter'
],
'depthwise_conv2d_grad'
:
[
'Input'
,
'Filter'
],
'mul_grad'
:
[
'X'
,
'Y'
]
}
def
check_program
(
self
,
program
):
quantized_ops
=
{}
persistable_vars
=
[
v
.
name
for
v
in
filter
(
lambda
var
:
var
.
persistable
,
program
.
list_vars
())
]
for
block
in
program
.
blocks
:
for
idx
,
op
in
enumerate
(
block
.
ops
):
# check forward
if
op
.
type
in
self
.
quantizable_op_and_inputs
:
for
i
,
arg_name
in
enumerate
(
op
.
input_arg_names
):
quant_op_type
=
self
.
weight_quant_op_type
if
\
_original_var_name
(
arg_name
)
\
in
persistable_vars
else
self
.
act_quant_op_type
self
.
assertTrue
(
arg_name
.
endswith
(
'.quantized.dequantized'
))
if
arg_name
not
in
quantized_ops
:
self
.
assertEqual
(
block
.
ops
[
idx
-
2
*
i
-
1
].
type
,
self
.
dequant_op_type
)
self
.
assertEqual
(
block
.
ops
[
idx
-
2
*
i
-
2
].
type
,
quant_op_type
)
quantized_ops
[
arg_name
]
=
block
.
ops
[
idx
-
2
*
i
-
2
]
else
:
op_idx
=
block
.
ops
.
index
(
quantized_ops
[
arg_name
])
self
.
assertLess
(
op_idx
,
idx
)
# check backward
if
op
.
type
in
self
.
quantizable_op_grad_and_inputs
:
for
pname
in
self
.
quantizable_op_grad_and_inputs
[
op
.
type
]:
arg_name
=
op
.
input
(
pname
)[
0
]
self
.
assertTrue
(
arg_name
.
endswith
(
'.quantized.dequantized'
))
self
.
assertTrue
(
arg_name
in
quantized_ops
)
def
linear_fc_quant
(
self
,
quant_type
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
linear_fc
(
3
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
t
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
t
.
training_transpile
(
main
)
self
.
check_program
(
main
)
def
test_linear_fc_quant_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_abs_max'
self
.
linear_fc_quant
(
'abs_max'
)
def
test_linear_fc_quant_range_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_range_abs_max'
self
.
linear_fc_quant
(
'range_abs_max'
)
def
residual_block_quant
(
self
,
quant_type
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
residual_block
(
2
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
t
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
t
.
training_transpile
(
main
)
self
.
check_program
(
main
)
def
test_residual_block_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_abs_max'
self
.
residual_block_quant
(
'abs_max'
)
def
test_residual_block_range_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_range_abs_max'
self
.
residual_block_quant
(
'range_abs_max'
)
def
freeze_program
(
self
,
use_cuda
):
def
build_program
(
main
,
startup
,
is_test
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
loss
=
conv_net
(
img
,
label
)
if
not
is_test
:
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
return
[
img
,
label
],
loss
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
feeds
,
loss
=
build_program
(
main
,
startup
,
False
)
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
quant_transpiler
=
QuantizeTranspiler
()
quant_transpiler
.
training_transpile
(
main
)
quant_transpiler
.
training_transpile
(
test_program
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
iter
=
5
batch_size
=
8
class_num
=
10
exe
.
run
(
startup
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
with
fluid
.
program_guard
(
main
):
for
_
in
range
(
iter
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
main
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
with
fluid
.
program_guard
(
test_program
):
test_data
=
next
(
test_reader
())
w_var
=
fluid
.
framework
.
_get_var
(
'conv2d_1.w_0.quantized'
,
test_program
)
# Testing during training
test_loss1
,
w_quant
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
,
w_var
])
# Freeze program for inference, but the weight of fc/conv is still float type.
quant_transpiler
.
freeze_program
(
test_program
,
place
)
test_loss2
,
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
1e-3
)
w_freeze
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
'conv2d_1.w_0'
)
.
get_tensor
())
self
.
assertEqual
(
np
.
sum
(
w_freeze
),
np
.
sum
(
w_quant
))
# Convert parameter to 8-bit.
quant_transpiler
.
convert_to_int8
(
test_program
,
place
)
# Save the 8-bit parameter and model file.
fluid
.
io
.
save_inference_model
(
'model_8bit'
,
[
'image'
,
'label'
],
[
loss
],
exe
,
test_program
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[
infer
,
feed
,
fetch
]
=
fluid
.
io
.
load_inference_model
(
'model_8bit'
,
exe
)
# Check the loaded 8-bit weight.
w_8bit
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
'conv2d_1.w_0.int8'
)
.
get_tensor
())
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
def
test_freeze_program_cuda
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_program
(
True
)
def
test_freeze_program_cpu
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_program
(
False
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
b31905c5
...
@@ -28,7 +28,6 @@ list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/Paddl
...
@@ -28,7 +28,6 @@ list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/Paddl
list
(
REMOVE_ITEM TEST_OPS op_test
)
# op_test is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS op_test
)
# op_test is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS decorators
)
# decorators is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS decorators
)
# decorators is a helper python file, not a test
if
(
APPLE
)
if
(
APPLE
)
if
(
NOT WITH_DISTRIBUTE
)
if
(
NOT WITH_DISTRIBUTE
)
list
(
REMOVE_ITEM TEST_OPS test_desc_clone
)
list
(
REMOVE_ITEM TEST_OPS test_desc_clone
)
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
b31905c5
...
@@ -50,9 +50,7 @@ class TestDistRunnerBase(object):
...
@@ -50,9 +50,7 @@ class TestDistRunnerBase(object):
def
run_pserver
(
self
,
args
):
def
run_pserver
(
self
,
args
):
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
2
)
# NOTE: pserver should not call memory optimize
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
args
.
endpoints
,
fluid
.
default_main_program
(),
args
.
endpoints
,
args
.
trainers
,
args
.
sync_mode
)
args
.
trainers
,
args
.
sync_mode
)
...
@@ -70,7 +68,7 @@ class TestDistRunnerBase(object):
...
@@ -70,7 +68,7 @@ class TestDistRunnerBase(object):
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
fluid
.
default_main_program
()
,
skip_grads
=
True
)
if
args
.
is_dist
:
if
args
.
is_dist
:
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
fluid
.
default_main_program
(),
...
...
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
b31905c5
...
@@ -26,14 +26,13 @@ class TestDistSeResneXt2x2(TestDistBase):
...
@@ -26,14 +26,13 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
# TODO(typhoonzero): fix this test
class
TestDistseResnXt2x2WithMemopt
(
TestDistBase
):
# class TestDistseResnXt2x2WithMemopt(TestDistBase):
def
_setup_config
(
self
):
# def _setup_config(self):
self
.
_sync_mode
=
True
# self._sync_mode = True
self
.
_mem_opt
=
True
# self._mem_opt = True
def
test_dist_train
(
self
):
# def test_dist_train(self):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
# self.check_with_place("dist_se_resnext.py", delta=1e-7)
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
...
...
python/paddle/fluid/transpiler/__init__.py
浏览文件 @
b31905c5
...
@@ -20,6 +20,10 @@ from .memory_optimization_transpiler import memory_optimize, release_memory
...
@@ -20,6 +20,10 @@ from .memory_optimization_transpiler import memory_optimize, release_memory
from
.ps_dispatcher
import
HashName
,
RoundRobin
from
.ps_dispatcher
import
HashName
,
RoundRobin
__all__
=
[
__all__
=
[
"DistributeTranspiler"
,
"memory_optimize"
,
"release_memory"
,
"HashName"
,
"DistributeTranspiler"
,
"RoundRobin"
,
"DistributeTranspilerConfig"
"memory_optimize"
,
"release_memory"
,
"HashName"
,
"RoundRobin"
,
"DistributeTranspilerConfig"
,
]
]
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
b31905c5
...
@@ -14,10 +14,10 @@
...
@@ -14,10 +14,10 @@
from
__future__
import
print_function
from
__future__
import
print_function
from
collections
import
defaultdict
,
OrderedDict
,
Callable
from
collections
import
defaultdict
,
MutableSet
from
..
import
core
from
..
import
core
from
...
import
compat
as
cpt
from
...
import
compat
as
cpt
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
,
core
from
..backward
import
_rename_arg_
from
..backward
import
_rename_arg_
from
functools
import
reduce
from
functools
import
reduce
from
six.moves
import
range
from
six.moves
import
range
...
@@ -44,17 +44,82 @@ SUB_BLOCK_PAIR = [("while", "while_grad"), ("parallel_do", "parallel_do_grad"),
...
@@ -44,17 +44,82 @@ SUB_BLOCK_PAIR = [("while", "while_grad"), ("parallel_do", "parallel_do_grad"),
PRINT_LOG
=
False
PRINT_LOG
=
False
class
OrderedSet
(
MutableSet
):
def
__init__
(
self
,
iterable
=
None
):
self
.
end
=
end
=
[]
end
+=
[
None
,
end
,
end
]
# sentinel node for doubly linked list
self
.
map
=
{}
# key --> [key, prev, next]
if
iterable
is
not
None
:
self
|=
iterable
def
__len__
(
self
):
return
len
(
self
.
map
)
def
__contains__
(
self
,
key
):
return
key
in
self
.
map
def
add
(
self
,
key
):
if
key
not
in
self
.
map
:
end
=
self
.
end
curr
=
end
[
1
]
curr
[
2
]
=
end
[
1
]
=
self
.
map
[
key
]
=
[
key
,
curr
,
end
]
def
update
(
self
,
other
):
for
e
in
other
:
self
.
add
(
e
)
def
discard
(
self
,
key
):
if
key
in
self
.
map
:
key
,
prev
,
next
=
self
.
map
.
pop
(
key
)
prev
[
2
]
=
next
next
[
1
]
=
prev
def
remove
(
self
,
key
):
self
.
discard
(
key
)
def
__iter__
(
self
):
end
=
self
.
end
curr
=
end
[
2
]
while
curr
is
not
end
:
yield
curr
[
0
]
curr
=
curr
[
2
]
def
__reversed__
(
self
):
end
=
self
.
end
curr
=
end
[
1
]
while
curr
is
not
end
:
yield
curr
[
0
]
curr
=
curr
[
1
]
def
pop
(
self
,
last
=
True
):
if
not
self
:
raise
KeyError
(
'set is empty'
)
key
=
self
.
end
[
1
][
0
]
if
last
else
self
.
end
[
2
][
0
]
self
.
discard
(
key
)
return
key
def
__repr__
(
self
):
if
not
self
:
return
'%s()'
%
(
self
.
__class__
.
__name__
,
)
return
'%s(%r)'
%
(
self
.
__class__
.
__name__
,
list
(
self
))
def
__eq__
(
self
,
other
):
if
isinstance
(
other
,
OrderedSet
):
return
len
(
self
)
==
len
(
other
)
and
list
(
self
)
==
list
(
other
)
return
set
(
self
)
==
set
(
other
)
class
ControlFlowGraph
(
object
):
class
ControlFlowGraph
(
object
):
def
__init__
(
self
,
program
,
ops
,
forward_num
,
skip_opt
):
def
__init__
(
self
,
program
,
ops
,
forward_num
,
skip_opt
):
self
.
_program
=
program
self
.
_program
=
program
self
.
_ops
=
ops
self
.
_ops
=
ops
self
.
_forward_num
=
forward_num
self
.
_forward_num
=
forward_num
self
.
_successors
=
defaultdict
(
s
et
)
self
.
_successors
=
defaultdict
(
OrderedS
et
)
self
.
_presuccessors
=
defaultdict
(
s
et
)
self
.
_presuccessors
=
defaultdict
(
OrderedS
et
)
self
.
_uses
=
defaultdict
(
s
et
)
self
.
_uses
=
defaultdict
(
OrderedS
et
)
self
.
_defs
=
defaultdict
(
s
et
)
self
.
_defs
=
defaultdict
(
OrderedS
et
)
self
.
_live_in
=
defaultdict
(
s
et
)
self
.
_live_in
=
defaultdict
(
OrderedS
et
)
self
.
_live_out
=
defaultdict
(
s
et
)
self
.
_live_out
=
defaultdict
(
OrderedS
et
)
self
.
_skip_opt
=
skip_opt
self
.
_skip_opt
=
skip_opt
self
.
pool
=
[]
self
.
pool
=
[]
...
@@ -116,7 +181,7 @@ class ControlFlowGraph(object):
...
@@ -116,7 +181,7 @@ class ControlFlowGraph(object):
# NOTE: must sort the in_diff set for cases that get different cache var.
# NOTE: must sort the in_diff set for cases that get different cache var.
# FIXME(typhoonzero): maybe use a "sorted set" is better than this.
# FIXME(typhoonzero): maybe use a "sorted set" is better than this.
can_optimize
=
[
can_optimize
=
[
x
for
x
in
sorted
(
list
(
in_diff
))
x
for
x
in
in_diff
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
]
]
if
can_optimize
:
if
can_optimize
:
...
@@ -224,7 +289,7 @@ class ControlFlowGraph(object):
...
@@ -224,7 +289,7 @@ class ControlFlowGraph(object):
if
self
.
pool
:
if
self
.
pool
:
# NOTE: must sort the in_diff set for cases that get different cache var.
# NOTE: must sort the in_diff set for cases that get different cache var.
defs_can_optimize
=
[
defs_can_optimize
=
[
x
for
x
in
s
orted
(
list
(
self
.
_defs
[
i
]))
x
for
x
in
s
elf
.
_defs
[
i
]
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
]
]
out_pair
=
[
out_pair
=
[
...
@@ -381,7 +446,19 @@ def _get_cfgs(input_program):
...
@@ -381,7 +446,19 @@ def _get_cfgs(input_program):
return
cfgs
return
cfgs
def
memory_optimize
(
input_program
,
skip_opt_set
=
None
,
print_log
=
False
,
level
=
0
):
def
_is_opt_role_op
(
op
):
op_maker
=
core
.
op_proto_and_checker_maker
optimize_role
=
core
.
op_proto_and_checker_maker
.
OpRole
.
Optimize
if
op_maker
.
kOpRoleAttrName
()
in
op
.
attr_names
and
\
int
(
op
.
all_attrs
()[
op_maker
.
kOpRoleAttrName
()])
==
int
(
optimize_role
):
return
True
def
memory_optimize
(
input_program
,
skip_opt_set
=
None
,
print_log
=
False
,
level
=
0
,
skip_grads
=
False
):
"""Optimize memory by reusing var memory.
"""Optimize memory by reusing var memory.
Note: it doesn't not support subblock nested in subblock.
Note: it doesn't not support subblock nested in subblock.
...
@@ -398,6 +475,19 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
...
@@ -398,6 +475,19 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
raise
ValueError
(
"only support opt_level 0 or 1."
)
raise
ValueError
(
"only support opt_level 0 or 1."
)
global
PRINT_LOG
global
PRINT_LOG
PRINT_LOG
=
print_log
PRINT_LOG
=
print_log
if
skip_grads
:
grad_set
=
set
()
OP_ROLE_VAR
=
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
()
for
op
in
input_program
.
global_block
().
ops
:
if
_is_opt_role_op
(
op
):
if
op
.
attr
(
OP_ROLE_VAR
):
grad_name
=
op
.
attr
(
OP_ROLE_VAR
)[
1
]
grad_set
.
add
(
grad_name
)
if
not
skip_opt_set
:
skip_opt_set
=
grad_set
else
:
skip_opt_set
.
update
(
grad_set
)
cfgs
=
_get_cfgs
(
input_program
)
cfgs
=
_get_cfgs
(
input_program
)
for
cfg
in
cfgs
:
for
cfg
in
cfgs
:
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
...
...
python/setup.py.in
浏览文件 @
b31905c5
...
@@ -106,6 +106,7 @@ packages=['paddle',
...
@@ -106,6 +106,7 @@ packages=['paddle',
'paddle.fluid.layers',
'paddle.fluid.layers',
'paddle.fluid.contrib',
'paddle.fluid.contrib',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details']
'paddle.fluid.transpiler.details']
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录