From ad0dfb17c138ab1454ec4f1a370e6e8b8bb76780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 9 Jan 2020 12:15:31 +0800 Subject: [PATCH] [Feature] Lite subgraph (#22114) --- CMakeLists.txt | 1 + cmake/external/lite.cmake | 87 +++++ cmake/flags.cmake | 5 - cmake/system.cmake | 5 + cmake/third_party.cmake | 4 + paddle/fluid/inference/CMakeLists.txt | 4 + paddle/fluid/inference/analysis/argument.h | 6 + .../inference/analysis/ir_pass_manager.cc | 11 + .../analysis/ir_passes/CMakeLists.txt | 9 + .../analysis/ir_passes/lite_subgraph_pass.cc | 336 ++++++++++++++++++ .../analysis/ir_passes/lite_subgraph_pass.h | 45 +++ .../ir_passes/lite_subgraph_pass_tester.cc | 59 +++ .../analysis/ir_passes/subgraph_util.cc | 52 ++- .../analysis/ir_passes/subgraph_util.h | 15 +- paddle/fluid/inference/api/analysis_config.cc | 33 ++ .../fluid/inference/api/analysis_predictor.cc | 7 + .../inference/api/paddle_analysis_config.h | 14 + .../inference/api/paddle_pass_builder.cc | 8 +- .../fluid/inference/api/paddle_pass_builder.h | 1 + paddle/fluid/inference/lite/CMakeLists.txt | 5 + paddle/fluid/inference/lite/engine.cc | 64 ++++ paddle/fluid/inference/lite/engine.h | 55 +++ paddle/fluid/inference/lite/op_teller.cc | 92 +++++ paddle/fluid/inference/lite/op_teller.h | 70 ++++ paddle/fluid/inference/lite/tensor_utils.cc | 181 ++++++++++ paddle/fluid/inference/lite/tensor_utils.h | 33 ++ paddle/fluid/inference/lite/test_engine.cc | 132 +++++++ .../fluid/inference/lite/test_tensor_utils.cc | 116 ++++++ .../fluid/inference/tests/api/CMakeLists.txt | 7 +- .../tests/api/lite_mul_model_test.cc | 58 +++ paddle/fluid/operators/CMakeLists.txt | 5 + paddle/fluid/operators/lite/CMakeLists.txt | 2 + paddle/fluid/operators/lite/lite_engine_op.cc | 44 +++ paddle/fluid/operators/lite/lite_engine_op.h | 110 ++++++ .../operators/lite/lite_engine_op_test.cc | 115 ++++++ paddle/fluid/operators/lite/ut_helper.h | 111 ++++++ paddle/fluid/platform/gpu_info.cc | 11 + paddle/fluid/platform/gpu_info.h | 3 + paddle/scripts/paddle_build.sh | 4 +- 39 files changed, 1909 insertions(+), 11 deletions(-) create mode 100644 cmake/external/lite.cmake create mode 100644 paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc create mode 100644 paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h create mode 100644 paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc create mode 100644 paddle/fluid/inference/lite/CMakeLists.txt create mode 100644 paddle/fluid/inference/lite/engine.cc create mode 100644 paddle/fluid/inference/lite/engine.h create mode 100644 paddle/fluid/inference/lite/op_teller.cc create mode 100644 paddle/fluid/inference/lite/op_teller.h create mode 100644 paddle/fluid/inference/lite/tensor_utils.cc create mode 100644 paddle/fluid/inference/lite/tensor_utils.h create mode 100644 paddle/fluid/inference/lite/test_engine.cc create mode 100644 paddle/fluid/inference/lite/test_tensor_utils.cc create mode 100644 paddle/fluid/inference/tests/api/lite_mul_model_test.cc create mode 100644 paddle/fluid/operators/lite/CMakeLists.txt create mode 100644 paddle/fluid/operators/lite/lite_engine_op.cc create mode 100644 paddle/fluid/operators/lite/lite_engine_op.h create mode 100644 paddle/fluid/operators/lite/lite_engine_op_test.cc create mode 100644 paddle/fluid/operators/lite/ut_helper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f5b9412f9ac..a322e9567c9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,7 @@ option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface" option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE}) option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF) +option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF) # PY_VERSION if(NOT PY_VERSION) diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake new file mode 100644 index 00000000000..d8eb7b76205 --- /dev/null +++ b/cmake/external/lite.cmake @@ -0,0 +1,87 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT LINUX OR NOT WITH_MKL) + message("Paddle-lite will not build because the required Linux and MKL do not exist.") + set(WITH_LITE OFF) + return() +endif() + +if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) + include(ExternalProject) + set(LITE_PROJECT extern_lite) + set(LITE_SOURCES_DIR ${THIRD_PARTY_PATH}/lite) + set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite) + + # No quotes, so cmake can resolve it as a command with arguments. + set(LITE_BUILD_COMMAND $(MAKE) -j) + set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON + -DLITE_WITH_CUDA=${WITH_GPU} + -DWITH_MKLDNN=OFF + -DLITE_WITH_X86=ON + -DLITE_WITH_PROFILE=OFF + -DWITH_LITE=OFF + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF + -DWITH_PYTHON=OFF + -DWITH_TESTING=ON + -DLITE_BUILD_EXTRA=ON + -DCUDNN_ROOT=${CUDNN_ROOT} + -DLITE_WITH_ARM=OFF) + + ExternalProject_Add( + ${LITE_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git" + GIT_TAG 947cda26637d46dc23f4e39d2b52e7d9a1fa6eef + PREFIX ${LITE_SOURCES_DIR} + UPDATE_COMMAND "" + BUILD_COMMAND ${LITE_BUILD_COMMAND} + INSTALL_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + ${LITE_OPTIONAL_ARGS} + ) + ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR) + ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR) + set(LITE_BINARY_DIR ${BINARY_DIR}) + set(LITE_SOURCE_DIR ${SOURCE_DIR}) + +endif() + +message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}") +message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}") +include_directories(${LITE_SOURCE_DIR}) +include_directories(${LITE_BINARY_DIR}) + +function(external_lite_static_libs alias path) + add_library(${alias} STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET ${alias} PROPERTY IMPORTED_LOCATION + ${path}) + if (LITE_PROJECT) + add_dependencies(${alias} ${LITE_PROJECT}) + endif() +endfunction() + +external_lite_static_libs(lite_full_static ${LITE_BINARY_DIR}/lite/api/libapi_full_static.a) + +add_definitions(-DPADDLE_WITH_LITE) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 99200ae2db0..884e5d45a6a 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -207,11 +207,6 @@ if(LINUX) ${GPU_COMMON_FLAGS}) endif(LINUX) -if(UNIX AND NOT APPLE) - # except apple from nix*Os family - set(LINUX TRUE) -endif(UNIX AND NOT APPLE) - foreach(flag ${COMMON_FLAGS}) safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) diff --git a/cmake/system.cmake b/cmake/system.cmake index 65db05bebe9..c740136b93d 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -20,6 +20,11 @@ # for instance, protobuf libs path is /lib64 # on CentOS, but /lib on other systems. +if(UNIX AND NOT APPLE) + # except apple from nix*Os family + set(LINUX TRUE) +endif(UNIX AND NOT APPLE) + IF(WIN32) SET(HOST_SYSTEM "win32") ELSE(WIN32) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index ed870aebf98..2288ecd09c4 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -284,4 +284,8 @@ if(WITH_DGC) list(APPEND third_party_deps extern_dgc) endif() +if (WITH_LITE) + include(external/lite) +endif (WITH_LITE) + add_custom_target(third_party DEPENDS ${third_party_deps}) diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index c88e5f04286..cb1a8834cdf 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -21,6 +21,10 @@ if (ANAKIN_SUBGRAPH) add_subdirectory(anakin) endif() +if (WITH_LITE) + add_subdirectory(lite) +endif() + get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES) get_property(fluid_third_partys GLOBAL PROPERTY FLUID_THRID_PARTYS) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 5b47e9ebffb..a8076dd199d 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -197,6 +197,12 @@ struct Argument { DECL_ARGUMENT_FIELD(anakin_ops_filter, AnakinOpsFilter, std::vector); + DECL_ARGUMENT_FIELD(lite_passes_filter, LitePassesFilter, + std::vector); + DECL_ARGUMENT_FIELD(lite_ops_filter, LiteOpsFilter, std::vector); + DECL_ARGUMENT_FIELD(lite_precision_mode, LitePrecisionMode, + AnalysisConfig::Precision); + // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index a1f8ff47801..385cd760244 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -128,6 +128,17 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("program", new framework::ProgramDesc *(&argument->main_program())); } + if (pass_name == "lite_subgraph_pass") { + bool enable_int8 = + argument->lite_precision_mode() == AnalysisConfig::Precision::kInt8; + pass->Set("program", + new framework::ProgramDesc *(&argument->main_program())); + pass->Set("lite_ops_filter", + new std::vector(argument->lite_ops_filter())); + pass->Set("predictor_id", new int(argument->predictor_id())); + pass->Set("enable_int8", new bool(enable_int8)); + pass->Set("use_gpu", new bool(argument->use_gpu())); + } if (pass_name == "anakin_subgraph_pass") { pass->Set("program", new framework::ProgramDesc *(&argument->main_program())); diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index 3a76bb27482..4ef3381ac5c 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -23,3 +23,12 @@ if (ANAKIN_SUBGRAPH) file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n") set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "") endif() + +if (WITH_LITE) + cc_library(lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS ${analysis_deps} subgraph_util lite_op_teller) + set(analysis_deps ${analysis_deps} subgraph_util lite_subgraph_pass CACHE INTERNAL "") + set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) + file(APPEND ${pass_file} "USE_PASS(lite_subgraph_pass);\n") + set(INFER_IR_PASSES ${INFER_IR_PASSES} lite_subgraph_pass CACHE INTERNAL "") + cc_test(lite_subgraph_pass_tester SRCS lite_subgraph_pass_tester.cc DEPS lite_subgraph_pass gtest glog) +endif() diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc new file mode 100644 index 00000000000..91d0aec3f41 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc @@ -0,0 +1,336 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/lite/op_teller.h" +#include "paddle/fluid/inference/utils/singleton.h" + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/subgraph_detector.h" +#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h" +#include "paddle/fluid/string/pretty_log.h" + +#include "paddle/fluid/inference/lite/engine.h" + +namespace paddle { +namespace inference { +namespace analysis { + +using framework::ir::Node; +using framework::ir::Agent; +using framework::ir::SubGraphFuser; +using framework::ir::Graph; + +namespace lite { + +std::string UniqueKey(const std::vector& engine_inputs, + const std::vector& engine_outputs, + const std::string& id) { + std::string engine_hash_key = ""; + for (auto name : engine_inputs) { + engine_hash_key += name; + } + for (auto name : engine_outputs) { + engine_hash_key += name; + } + engine_hash_key += id; + auto engine_key = std::to_string(std::hash()(engine_hash_key)); + return engine_key; +} + +std::vector IOVarsFilter(const std::vector& nodes) { + std::set names; + for (const auto& node : nodes) { + if (node->IsVar() && !node->Var()->Persistable()) { + names.insert(node->Name()); + } + } + return std::vector(names.begin(), names.end()); +} + +void StrToBinaryFile(const std::string& path, const std::string& str) { + std::ofstream file(path.c_str(), std::ios::binary); + file.write(str.c_str(), str.size()); + file.close(); +} + +void ModifyHostSubgraphOps( + framework::ProgramDesc* host_program, framework::BlockDesc* host_sub_block, + const std::vector& subgraph_ops) { + for (auto* op_desc : subgraph_ops) { + auto* sub_block_op = host_sub_block->AppendOp(); + sub_block_op->CopyFrom(*op_desc); + if (op_desc->HasAttr("sub_block")) { + int32_t global_sub_id = host_sub_block->ID(); + auto* op_sub_block = + host_program->MutableBlock(op_desc->GetBlockAttrId("sub_block")); + op_sub_block->Proto()->set_parent_idx(global_sub_id); + } + } +} + +void ModifyHostProgram(framework::ProgramDesc* host_program, + framework::BlockDesc* host_sub_block, + const std::unordered_set& io_var_nodes, + const std::vector& subgraph_ops) { + for (auto* var_node : io_var_nodes) { + auto* sub_block_var = host_sub_block->Var(var_node->Name()); + sub_block_var->Proto()->CopyFrom(*var_node->Var()->Proto()); + } + ModifyHostSubgraphOps(host_program, host_sub_block, subgraph_ops); +} + +void AppendLiteSubBlocks(const std::vector& subgraph_ops, + framework::ProgramDesc* engine_program, + framework::ProgramDesc* host_program, + const int32_t host_sub_id) { + std::unordered_map sub_blocks_map; + std::unordered_set copied_host_ids; + sub_blocks_map[host_sub_id] = framework::kRootBlockIndex; + std::function&)> append_sub_blocks; + append_sub_blocks = [&](const std::vector& ops) { + for (auto* op_desc : ops) { + if (op_desc->HasAttr("sub_block")) { + int32_t host_op_sub_id = op_desc->GetBlockAttrId("sub_block"); + if (copied_host_ids.count(host_op_sub_id)) continue; + size_t engine_block_size = engine_program->Size(); + auto* host_op_sub_block = host_program->MutableBlock(host_op_sub_id); + auto* engine_op_sub_block = + engine_program->AppendBlock(*(op_desc->Block())); + for (auto* var : host_op_sub_block->AllVars()) { + auto* engine_var = engine_op_sub_block->Var(var->Name()); + engine_var->Proto()->CopyFrom(*var->Proto()); + } + for (auto* op : host_op_sub_block->AllOps()) { + auto* engine_op = engine_op_sub_block->AppendOp(); + engine_op->Proto()->CopyFrom(*op->Proto()); + } + sub_blocks_map[host_op_sub_id] = engine_block_size; + append_sub_blocks(host_op_sub_block->AllOps()); + } + } + }; + append_sub_blocks(subgraph_ops); + for (size_t i = 0; i < engine_program->Size(); i++) { + for (auto* op_desc : engine_program->Block(i).AllOps()) { + if (op_desc->HasAttr("sub_block")) { + int32_t id = op_desc->GetBlockAttrId("sub_block"); + op_desc->SetAttr("sub_block", sub_blocks_map[id]); + } + } + } +} + +// The modification of pass should be a process of framework::desc +// (initial) -> proto::desc (flush) -> framework::desc (final). +// Ir::Graph is limited to changing the main block, so the sub block +// needs to be processed here. +void ModifyEngineProgram(Node* merged_node, + framework::ProgramDesc* host_program, + framework::ProgramDesc* engine_program, + const int32_t host_sub_block_id, + const std::unordered_set& io_var_nodes, + const std::vector& subgraph_ops) { + // 1. Fill the main block of lite program. + framework::BlockDesc* engine_global_block = + engine_program->MutableBlock(framework::kRootBlockIndex); + PrependFeedOps(engine_global_block, IOVarsFilter(merged_node->inputs)); + for (auto* var_node : io_var_nodes) { + framework::VarDesc* sub_block_var = + engine_global_block->Var(var_node->Name()); + sub_block_var->Proto()->CopyFrom(*var_node->Var()->Proto()); + } + for (auto* op_desc : subgraph_ops) { + auto* sub_block_op = engine_global_block->AppendOp(); + sub_block_op->CopyFrom(*op_desc); + } + PrependFetchOps(engine_global_block, IOVarsFilter(merged_node->outputs)); + + // 2. Append sub blocks in the lite program. + AppendLiteSubBlocks(subgraph_ops, engine_program, host_program, + host_sub_block_id); +} + +void OrganizeProgram(Node* merged_node, framework::ProgramDesc* host_program, + framework::ProgramDesc* engine_program, + std::vector* repetitive_params) { + std::vector& subgraph = *Agent(merged_node).subgraph(); + PADDLE_ENFORCE_EQ(subgraph.empty(), false, + platform::errors::NotFound( + "No subgraph found in lite subgraph pass. Please use " + "the full model call from Analysis Predictor.")); + + const framework::BlockDesc& host_global_block = + host_program->Block(framework::kRootBlockIndex); + framework::BlockDesc* host_sub_block = + host_program->AppendBlock(host_global_block); + + string::PrettyLogDetail("--- detect a sub-graph with %d nodes", + subgraph.size()); + + std::unordered_set io_var_nodes = GetRelatedIOVarNodes(subgraph); + for (const auto* node : io_var_nodes) { + VLOG(3) << "IO Variable Name: " << node->Name(); + } + + std::vector subgraph_ops; + for (auto* op_node : subgraph) { + subgraph_ops.push_back(op_node->Op()); + } + + ModifyHostProgram(host_program, host_sub_block, io_var_nodes, subgraph_ops); + ModifyEngineProgram(merged_node, host_program, engine_program, + host_sub_block->ID(), io_var_nodes, subgraph_ops); + *repetitive_params = ExtractParameters(io_var_nodes, true); + for (const auto& param : *repetitive_params) { + VLOG(3) << "Repetitive param: " << param; + } + host_program->Flush(); + engine_program->Flush(); +} +} // namespace lite + +void LiteSubgraphPass::SetUpEngine( + framework::ProgramDesc* program, + const std::vector& repetitive_params, + const std::string& unique_key, bool dump_model) const { + inference::lite::EngineConfig config; + auto* scope = param_scope(); + + // When the pass is started, only the persistent variables of the + // main block are read. Fluid seems to allow persistence variables + // in the sub block, but they are controlled by context, so the + // support is suspended here. + auto serialize_params = [](std::string* str, framework::Scope* scope, + const std::vector& params) { + std::ostringstream os; + platform::CPUDeviceContext ctx; + for (const auto& param : params) { + VLOG(3) << "Serialize param: " << param; + PADDLE_ENFORCE_NOT_NULL( + scope->FindVar(param), + platform::errors::NotFound( + "Block should already have a '%s' variable", param)); + auto* tensor = scope->FindVar(param)->GetMutable(); + framework::SerializeToStream(os, *tensor, ctx); + } + *str = os.str(); + }; + + bool use_gpu = Get("use_gpu"); + bool enable_int8 = Get("enable_int8"); + lite_api::TargetType target_type = use_gpu ? TARGET(kCUDA) : TARGET(kX86); + paddle::lite_api::PrecisionType precision_type = + enable_int8 ? PRECISION(kInt8) : PRECISION(kInt64); + serialize_params(&config.param, scope, repetitive_params); + config.model = program->Proto()->SerializeAsString(); + config.valid_places = { + paddle::lite::Place({target_type, precision_type}), + paddle::lite::Place({target_type, PRECISION(kFloat)}), + paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}), + }; + if (dump_model) { + lite::StrToBinaryFile("./model.bin", config.model); + lite::StrToBinaryFile("./param.bin", config.param); + } + inference::Singleton::Global().Create( + unique_key, config); +} + +void LiteSubgraphPass::BuildOperator( + Node* merged_node, framework::ProgramDesc* global_program, + std::vector* repetitive_params) const { + framework::ProgramDesc engine_program; + + const std::string id = std::to_string(Get("predictor_id")); + const std::vector input_names = + lite::IOVarsFilter(merged_node->inputs); + const std::vector output_names = + lite::IOVarsFilter(merged_node->outputs); + const std::string unique_key = lite::UniqueKey(input_names, output_names, id); + + lite::OrganizeProgram(merged_node, global_program, &engine_program, + repetitive_params); + SetUpEngine(&engine_program, *repetitive_params, unique_key); + + auto* op_desc = merged_node->Op(); + op_desc->SetInput("Xs", input_names); + op_desc->SetOutput("Ys", output_names); + op_desc->SetType("lite_engine"); + op_desc->SetAttr("engine_key", unique_key); + op_desc->SetAttr("enable_int8", Get("enable_int8")); + op_desc->SetAttr("use_gpu", Get("use_gpu")); +} + +void LiteSubgraphPass::ApplyImpl(framework::ir::Graph* graph) const { + framework::ir::FusePassBase::Init("lite_subgraph_pass", graph); + framework::ProgramDesc* global_program = + Get("program"); + + auto& lite_ops_filter = Get>("lite_ops_filter"); + + auto teller = [&lite_ops_filter](const Node* node) { + if (!node->IsOp() || !node->Op()) + return false; + else if (node->Op()->Type() == "feed" || node->Op()->Type() == "fetch") + return false; + else if (std::find(lite_ops_filter.begin(), lite_ops_filter.end(), + node->Op()->Type()) != lite_ops_filter.end()) + return false; + return inference::lite::OpTeller::Global().Tell(node->Op()->Type(), + *node->Op()); + }; + + SubGraphFuser fuser(graph, teller, 0 /* min_subgraph_size */, "lite_engine"); + fuser(); + + std::vector repetitive_params; + for (auto* node : graph->Nodes()) { + if (node->IsOp() && !Agent(node).subgraph()->empty()) { + BuildOperator(node, global_program, &repetitive_params); + std::unordered_set nodes2remove( + Agent(node).subgraph()->begin(), Agent(node).subgraph()->end()); + framework::ir::GraphSafeRemoveNodes(graph, nodes2remove); + } + } + + std::unordered_set nodes2remove; + for (auto* node : graph->Nodes()) { + if (node->IsOp() && Agent(node).deleted()) { + nodes2remove.insert(node); + } + } + framework::ir::GraphSafeRemoveNodes(graph, nodes2remove); + graph->Set(framework::ir::kRepetitiveParamAttr, + new std::vector(repetitive_params)); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle + +REGISTER_PASS(lite_subgraph_pass, + paddle::inference::analysis::LiteSubgraphPass); diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h new file mode 100644 index 00000000000..e79a64f0f72 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" + +namespace paddle { +namespace inference { +namespace analysis { + +class LiteSubgraphPass : public framework::ir::FusePassBase { + public: + void ApplyImpl(framework::ir::Graph* graph) const override; + + private: + void BuildOperator(framework::ir::Node* merged_node, + framework::ProgramDesc* global_program, + std::vector* repetitive_params) const; + + void SetUpEngine(framework::ProgramDesc* program, + const std::vector& repetitive_params, + const std::string& unique_key, + bool dump_model = false) const; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc new file mode 100644 index 00000000000..90ad7ec0b44 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h" +#include +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/inference/lite/op_teller.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace lite { +void StrToBinaryFile(const std::string& path, const std::string& str); +void ModifyHostSubgraphOps(framework::ProgramDesc* host_program, + framework::BlockDesc* host_sub_block, + const std::vector& subgraph_ops); +void AppendLiteSubBlocks(const std::vector& subgraph_ops, + framework::ProgramDesc* engine_program, + framework::ProgramDesc* host_program, + const int32_t host_sub_id); +} + +TEST(LiteSubgraphPass, basic) { + framework::ProgramDesc host_program; + framework::ProgramDesc engine_program; + framework::BlockDesc* host_main_block = host_program.MutableBlock(0); + framework::BlockDesc* host_sub_block = + host_program.AppendBlock(*host_main_block); + framework::OpDesc* host_while_op = host_main_block->AppendOp(); + host_main_block->Var("var_main"); + host_sub_block->Var("var_sub"); + host_while_op->SetType("while"); + host_while_op->SetAttr("sub_block", host_sub_block); + framework::OpDesc* host_sub_block_op = host_sub_block->AppendOp(); + host_sub_block_op->SetType("leaky_relu"); + + CHECK(inference::lite::OpTeller::Global().Tell("while", *host_while_op)) + << "Lite operator teller test failed."; + + lite::AppendLiteSubBlocks({host_while_op}, &engine_program, &host_program, + host_sub_block->ID()); + lite::ModifyHostSubgraphOps(&host_program, host_sub_block, {host_while_op}); + lite::StrToBinaryFile("./", "test"); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc index e16cce54c24..699e9eb01de 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc @@ -26,7 +26,7 @@ namespace analysis { using framework::ir::Node; std::vector ExtractParameters( - const std::unordered_set &nodes) { + const std::unordered_set &nodes, bool sorted) { // We can judge whether a variable is a parameter by // its presistable property, but sometimes the presistable // of the feed op output is true, so we have to identify it. @@ -50,9 +50,59 @@ std::vector ExtractParameters( parameters.push_back(node->Name()); } } + if (sorted) { + std::sort(parameters.begin(), parameters.end()); + parameters.erase(std::unique(parameters.begin(), parameters.end()), + parameters.end()); + } return parameters; } +std::unordered_set GetRelatedIOVarNodes( + const std::vector &nodes) { + std::unordered_set io_nodes; + for (const auto &node : nodes) { + if (!node->IsOp()) continue; + for (const auto &in : node->inputs) { + io_nodes.insert(in); + } + for (const auto &out : node->outputs) { + io_nodes.insert(out); + } + } + return io_nodes; +} + +void PrependFeedOps(framework::BlockDesc *global_block, + const std::vector &feed_target_names, + std::string feed_holder_name) { + framework::VarDesc *feed_var = global_block->Var(feed_holder_name); + feed_var->SetType(paddle::framework::proto::VarType::FEED_MINIBATCH); + feed_var->SetPersistable(true); + for (size_t i = 0; i < feed_target_names.size(); i++) { + framework::OpDesc *feed_op = global_block->AppendOp(); + feed_op->SetType("feed"); + feed_op->SetInput("X", {feed_holder_name}); + feed_op->SetOutput("Out", {feed_target_names[i]}); + feed_op->SetAttr("col", static_cast(i)); + } +} + +void PrependFetchOps(framework::BlockDesc *global_block, + const std::vector &fetch_target_names, + std::string fetch_holder_name) { + framework::VarDesc *fetch_var = global_block->Var(fetch_holder_name); + fetch_var->SetType(paddle::framework::proto::VarType::FETCH_LIST); + fetch_var->SetPersistable(true); + for (size_t i = 0; i < fetch_target_names.size(); i++) { + framework::OpDesc *fetch_op = global_block->AppendOp(); + fetch_op->SetType("fetch"); + fetch_op->SetInput("X", {fetch_target_names[i]}); + fetch_op->SetOutput("Out", {fetch_holder_name}); + fetch_op->SetAttr("col", static_cast(i)); + } +} + void RenameAndGetOutputs( const std::vector &subgraph_nodes, framework::BlockDesc *block_desc, diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h index 444e1984cf8..1257562972e 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h @@ -30,10 +30,21 @@ limitations under the License. */ namespace paddle { namespace inference { namespace analysis { -using framework::ir::Node; std::vector ExtractParameters( - const std::unordered_set &nodes); + const std::unordered_set &nodes, + bool sorted = false); + +std::unordered_set GetRelatedIOVarNodes( + const std::vector &nodes); + +void PrependFeedOps(framework::BlockDesc *global_block, + const std::vector &feed_target_names, + std::string feed_holder_name = "feed"); + +void PrependFetchOps(framework::BlockDesc *global_block, + const std::vector &fetch_target_names, + std::string fetch_holder_name = "fetch"); void RenameAndGetOutputs( const std::vector &subgraph_nodes, diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index f6048449bcd..16cb5305f32 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -23,6 +23,7 @@ namespace paddle { extern const std::vector kTRTSubgraphPasses; extern const std::vector kAnakinSubgraphPasses; +extern const std::vector kLiteSubgraphPasses; PassStrategy *AnalysisConfig::pass_builder() const { if (!pass_builder_.get()) { @@ -128,6 +129,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(anakin_passes_filter_); CP_MEMBER(anakin_ops_filter_); + CP_MEMBER(use_lite_); + CP_MEMBER(lite_precision_mode_); + CP_MEMBER(lite_passes_filter_); + CP_MEMBER(lite_ops_filter_); + // profile related. CP_MEMBER(with_profile_); @@ -351,6 +357,20 @@ void AnalysisConfig::Update() { } } + if (use_lite_) { +#ifndef PADDLE_WITH_LITE + LOG(WARNING) << "You tried to enable the lite subgraph " + "but did not have the option -DWITH_LITE compiled."; +#endif + pass_builder()->ClearPasses(); + for (const auto &pass : kLiteSubgraphPasses) { + if (std::find(lite_passes_filter_.begin(), lite_passes_filter_.end(), + pass) == lite_passes_filter_.end()) { + pass_builder()->AppendPass(pass); + } + } + } + if (ir_debug_) { pass_builder()->TurnOnDebug(); } @@ -395,6 +415,8 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << cpu_math_library_num_threads_; ss << use_anakin_; ss << anakin_min_subgraph_size_; + + ss << use_lite_; return ss.str(); } @@ -484,6 +506,17 @@ void AnalysisConfig::EnableAnakinEngine( Update(); } +void AnalysisConfig::EnableLiteEngine( + AnalysisConfig::Precision precision_mode, + const std::vector &passes_filter, + const std::vector &ops_filter) { + use_lite_ = true; + lite_precision_mode_ = precision_mode; + lite_passes_filter_ = passes_filter; + lite_ops_filter_ = ops_filter; + Update(); +} + void AnalysisConfig::PartiallyRelease() { prog_file_.clear(); prog_file_.shrink_to_fit(); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7d4d44219c8..669bbf9b4ae 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -428,6 +428,13 @@ void AnalysisPredictor::PrepareArgument() { LOG(INFO) << "Anakin subgraph engine is enabled"; } + if (config_.lite_engine_enabled()) { + argument_.SetLitePrecisionMode(config_.lite_precision_mode_); + argument_.SetLitePassesFilter(config_.lite_passes_filter_); + argument_.SetLiteOpsFilter(config_.lite_ops_filter_); + LOG(INFO) << "Lite subgraph engine is enabled"; + } + if (config_.use_mkldnn_) { LOG(INFO) << "MKLDNN is enabled"; argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 40b24cd092d..a47c9e0d573 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -171,10 +171,19 @@ struct AnalysisConfig { std::vector passes_filter = {}, std::vector ops_filter = {}); + void EnableLiteEngine( + AnalysisConfig::Precision precision_mode = Precision::kFloat32, + const std::vector& passes_filter = {}, + const std::vector& ops_filter = {}); + /** A boolean state indicating whether the Anakin sub-graph engine is used. */ bool anakin_engine_enabled() const { return use_anakin_; } + /** A boolean state indicating whether the Lite sub-graph engine is used. + */ + bool lite_engine_enabled() const { return use_lite_; } + /** \brief Control whether to debug IR graph analysis phase. * * This will generate DOT files for visualizing the computation graph after @@ -350,6 +359,11 @@ struct AnalysisConfig { std::vector anakin_passes_filter_; std::vector anakin_ops_filter_; + bool use_lite_{false}; + std::vector lite_passes_filter_; + std::vector lite_ops_filter_; + Precision lite_precision_mode_; + // mkldnn related. int mkldnn_cache_capacity_{0}; bool use_mkldnn_quantizer_{false}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 49f637d96b6..2ad118df2ea 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -105,6 +105,12 @@ const std::vector kAnakinSubgraphPasses({ "fc_gru_fuse_pass", // }); +const std::vector kLiteSubgraphPasses({ +#ifdef PADDLE_WITH_LITE + "lite_subgraph_pass", +#endif +}); + GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { passes_.assign({ // "identity_scale_op_clean_pass", // @@ -123,7 +129,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "conv_elementwise_add2_act_fuse_pass", // "conv_elementwise_add_fuse_pass", // #endif // - "transpose_flatten_concat_fuse_pass", + "transpose_flatten_concat_fuse_pass", // // following pass should be located in the last, since it will // work on all fused ops. "runtime_context_cache_pass" diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 69bc5cd774a..d83f1ae6143 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -163,5 +163,6 @@ class GpuPassStrategy : public PassStrategy { extern const std::vector kTRTSubgraphPasses; extern const std::vector kAnakinSubgraphPasses; +extern const std::vector kLiteSubgraphPasses; } // namespace paddle diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt new file mode 100644 index 00000000000..1d957048148 --- /dev/null +++ b/paddle/fluid/inference/lite/CMakeLists.txt @@ -0,0 +1,5 @@ +cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash) +cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto) +cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost) +cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis) +cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils) diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc new file mode 100644 index 00000000000..edc4f5220aa --- /dev/null +++ b/paddle/fluid/inference/lite/engine.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef PADDLE_WITH_CUDA +#define LITE_WITH_CUDA 1 +#endif + +#include "paddle/fluid/inference/lite/engine.h" +#include "lite/core/context.h" +#include "lite/core/device_info.h" + +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" + +namespace paddle { +namespace inference { +namespace lite { + +bool EngineManager::Empty() const { return engines_.size() == 0; } + +bool EngineManager::Has(const std::string& name) const { + if (engines_.count(name) == 0) { + return false; + } + return engines_.at(name).get() != nullptr; +} + +paddle::lite::Predictor* EngineManager::Get(const std::string& name) const { + return engines_.at(name).get(); +} + +paddle::lite::Predictor* EngineManager::Create(const std::string& name, + const EngineConfig& cfg) { + auto* p = new paddle::lite::Predictor(); +#ifdef PADDLE_WITH_CUDA + paddle::lite::Env::Init(); +#endif + p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes, + cfg.model_type, cfg.model_from_memory); + engines_[name].reset(p); + return p; +} + +void EngineManager::DeleteAll() { + for (auto& item : engines_) { + item.second.reset(nullptr); + } +} + +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/engine.h b/paddle/fluid/inference/lite/engine.h new file mode 100644 index 00000000000..f29607490ed --- /dev/null +++ b/paddle/fluid/inference/lite/engine.h @@ -0,0 +1,55 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "lite/api/cxx_api.h" + +namespace paddle { +namespace inference { +namespace lite { + +struct EngineConfig { + std::string model; + std::string param; + paddle::lite::Place prefer_place; + std::vector valid_places; + std::vector neglected_passes; + lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf}; + bool model_from_memory{true}; +}; + +class EngineManager { + public: + bool Empty() const; + bool Has(const std::string& name) const; + paddle::lite::Predictor* Get(const std::string& name) const; + paddle::lite::Predictor* Create(const std::string& name, + const EngineConfig& cfg); + void DeleteAll(); + + private: + std::unordered_map> + engines_; +}; + +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/op_teller.cc b/paddle/fluid/inference/lite/op_teller.cc new file mode 100644 index 00000000000..fd7a5da7cec --- /dev/null +++ b/paddle/fluid/inference/lite/op_teller.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/lite/op_teller.h" + +#include "lite/core/op_registry.h" + +namespace paddle { +namespace inference { +namespace lite { + +// Just tell by the op_types. +struct SimpleOpTeller : public Teller { + SimpleOpTeller() { + const std::map& op2path = + OpKernelInfoCollector::Global().GetOp2PathDict(); + auto is_non_inst = [](const std::string& op) -> bool { + const std::vector ops = {"feed", "fetch", "while"}; + return std::find(ops.begin(), ops.end(), op) != ops.end(); + }; + for (const auto& op : op2path) { + if (!is_non_inst(op.first)) { + ops_.insert(op.first); + } + } + } + + bool operator()(const std::string& op_type, + const framework::OpDesc& op_desc) override { + return ops_.count(op_type); + } + + private: + std::unordered_set ops_{}; +}; + +struct SingleBlockOpTeller : public Teller { + SingleBlockOpTeller() { ops_.insert("while"); } + + bool operator()(const std::string& op_type, + const framework::OpDesc& op_desc) override { + if (ops_.count(op_type)) { + SimpleOpTeller supported; + const int id = op_desc.GetBlockAttrId("sub_block"); + const framework::BlockDesc& block_desc = + op_desc.Block()->Program()->Block(id); + const std::vector& ops_sub_block = + block_desc.AllOps(); + for (auto* op : ops_sub_block) { + if (!supported(op->Type(), *op) && !this->operator()(op->Type(), *op)) { + return false; + } + } + return true; + } + return false; + } + + private: + std::unordered_set ops_; +}; + +bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) { + for (auto& teller : tellers_) { + if ((*teller)(op_type, desc)) return true; + } + return false; +} + +OpTeller::OpTeller() { + tellers_.emplace_back(new SimpleOpTeller); + tellers_.emplace_back(new SingleBlockOpTeller); +} + +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/op_teller.h b/paddle/fluid/inference/lite/op_teller.h new file mode 100644 index 00000000000..b9391a98a2e --- /dev/null +++ b/paddle/fluid/inference/lite/op_teller.h @@ -0,0 +1,70 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include "paddle/fluid/framework/op_desc.h" + +namespace paddle { +namespace inference { +namespace lite { + +/* + * Single Op teller definition. + * One can override this and define a more complex tell logic, considerring more + * issues such as op_desc. + */ +struct Teller { + virtual bool operator()(const std::string& op_type, + const framework::OpDesc& desc) = 0; + + virtual ~Teller() = default; +}; +/* + * A real example: + * + * struct SomeTeller : public Teller { + * bool operator()(const std::string& op_type, + * const framework::OpDesc& desc) override { + * return op_type == "fc" && desc.Inputs().size() == 2; + * } + *}; + */ + +/* + * class OpTeller helps to tell whether a fluid + * operator can be transformed to a TensorRT layer. + */ +class OpTeller { + public: + static OpTeller& Global() { + static std::unique_ptr x(new OpTeller); + return *x; + } + + bool Tell(const std::string& op_type, const framework::OpDesc& desc); + + private: + OpTeller(); + + private: + std::vector> tellers_; +}; + +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc new file mode 100644 index 00000000000..9f361d563b0 --- /dev/null +++ b/paddle/fluid/inference/lite/tensor_utils.cc @@ -0,0 +1,181 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/lite/tensor_utils.h" +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/inference/lite/engine.h" + +namespace paddle { +namespace inference { +namespace lite { +namespace utils { + +using paddle::lite_api::TargetType; +using paddle::lite_api::PrecisionType; +using paddle::lite_api::DataLayoutType; + +template +void SetLoD(DstLoD* dst, const SrcLoD& src) { + dst->reserve(src.size()); + dst->clear(); + for (auto&& v : src) { + dst->emplace_back(v); + } +} +template void SetLoD( + paddle::lite::LoD* dst, const framework::LoD& src); +template void SetLoD( + framework::LoD* dst, const paddle::lite::LoD& src); + +platform::Place GetNativePlace(const TargetType& type, int id = 0) { + switch (type) { + case TargetType::kHost: + case TargetType::kX86: + return platform::CPUPlace(); + case TargetType::kCUDA: + return platform::CUDAPlace(id); + default: + LOG(FATAL) << "Error target type."; + return platform::Place(); + } +} + +TargetType GetLiteTargetType(const platform::Place& place) { + if (platform::is_cpu_place(place)) { + return TargetType::kHost; + } + return TargetType::kCUDA; +} + +PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type) { + switch (type) { + case framework::proto::VarType_Type_FP32: + return PrecisionType::kFloat; + case framework::proto::VarType_Type_INT8: + return PrecisionType::kInt8; + case framework::proto::VarType_Type_INT32: + return PrecisionType::kInt32; + case framework::proto::VarType_Type_INT64: + return PrecisionType::kInt64; + default: + LOG(FATAL) << "Error precision type."; + return PrecisionType::kUnk; + } +} + +framework::proto::VarType::Type GetNativePrecisionType( + const PrecisionType& type) { + switch (type) { + case PrecisionType::kFloat: + return framework::proto::VarType_Type_FP32; + case PrecisionType::kInt8: + return framework::proto::VarType_Type_INT8; + case PrecisionType::kInt32: + return framework::proto::VarType_Type_INT32; + case PrecisionType::kInt64: + return framework::proto::VarType_Type_INT64; + default: + LOG(FATAL) << "Error precision type."; + return static_cast(-1); + } +} + +framework::DataLayout GetNativeLayoutType(const DataLayoutType& type) { + switch (type) { + case DataLayoutType::kNCHW: + return framework::DataLayout::kNCHW; + default: + LOG(FATAL) << "Error layout type."; + return static_cast(-1); + } +} + +void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data, + const platform::Place& src_place, const void* src_data, + const size_t size, const platform::DeviceContext& ctx) { + const platform::CPUPlace cpu_place; + if (platform::is_cpu_place(dst_place) && platform::is_cpu_place(src_place)) { + memory::Copy(cpu_place, dst_data, cpu_place, src_data, size); + } else { +#ifdef PADDLE_WITH_CUDA + if (platform::is_cpu_place(dst_place) && + platform::is_gpu_place(src_place)) { + LOG(FATAL) << "lite::MemoryCopy GPU->CPU is not yet implemented."; + } else if (platform::is_gpu_place(dst_place) && + platform::is_cpu_place(src_place)) { + LOG(FATAL) << "lite::MemoryCopy CPU->GPU is not yet implemented."; + } else if (platform::is_gpu_place(dst_place) && + platform::is_gpu_place(src_place)) { + auto gpu_place = boost::get(src_place); + memory::Copy( + gpu_place, dst_data, gpu_place, src_data, size, + static_cast(ctx).stream()); + } +#else + LOG(FATAL) << "You must define PADDLE_WITH_CUDA for using CUDAPlace."; +#endif + } +} + +void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) { + // Currently, Lite needs to explicitly specify the target type of + // the input tensor. + constexpr int empty_size = 0; + dst->mutable_data(GetLiteTargetType(src.place()), empty_size); + dst->set_precision(GetLitePrecisionType(src.type())); + SetLoD(dst->mutable_lod(), src.lod()); +} + +void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { + constexpr framework::proto::VarType::Type dtype = + framework::proto::VarType_Type_FP32; + dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()), + dtype); + SetLoD(dst->mutable_lod(), src.lod()); +} + +template <> +void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, + const platform::DeviceContext& ctx) { + InitDstTensor(dst, src); + const platform::Place& src_place = src.place(); + const platform::Place& dst_place = GetNativePlace(dst->target()); + const size_t bytes = + static_cast(src.numel()) * framework::SizeOfType(src.type()); + dst->Resize(framework::vectorize(src.dims())); + const void* src_data = src.data(); + void* dst_data = dst->mutable_data(bytes); + MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); +} + +template <> +void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src, + const platform::DeviceContext& ctx) { + InitDstTensor(dst, src); + const platform::Place& src_place = GetNativePlace(src.target()); + const platform::Place& dst_place = dst->place(); + dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize())); + const size_t bytes = + static_cast(src.numel()) * framework::SizeOfType(dst->type()); + const void* src_data = src.raw_data(); + // When Lite is ready, the source type needs to be modified here. + void* dst_data = dst->mutable_data(dst_place, dst->type()); + MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); +} + +} // namespace utils +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/tensor_utils.h b/paddle/fluid/inference/lite/tensor_utils.h new file mode 100644 index 00000000000..95fe8ae903f --- /dev/null +++ b/paddle/fluid/inference/lite/tensor_utils.h @@ -0,0 +1,33 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "lite/api/paddle_place.h" +#include "lite/core/tensor.h" +#include "paddle/fluid/framework/lod_tensor.h" + +namespace paddle { +namespace inference { +namespace lite { +namespace utils { + +template +void TensorCopyAsync(DstTensor* dst, const SrcTensor& src, + const platform::DeviceContext& ctx); + +} // namespace utils +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/test_engine.cc b/paddle/fluid/inference/lite/test_engine.cc new file mode 100644 index 00000000000..325c7ab2539 --- /dev/null +++ b/paddle/fluid/inference/lite/test_engine.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/inference/lite/engine.h" +#include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/operators/lite/ut_helper.h" + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace inference { +namespace lite { + +using inference::lite::AddTensorToBlockDesc; +using inference::lite::CreateTensor; +using inference::lite::serialize_params; + +void make_fake_model(std::string* model, std::string* param) { + framework::ProgramDesc program; + LOG(INFO) << "program.block size is " << program.Size(); + auto* block_ = program.Proto()->mutable_blocks(0); + LOG(INFO) << "create block desc"; + framework::BlockDesc block_desc(&program, block_); + auto* feed0 = block_desc.AppendOp(); + feed0->SetType("feed"); + feed0->SetInput("X", {"feed"}); + feed0->SetOutput("Out", {"x"}); + feed0->SetAttr("col", 0); + auto* feed1 = block_desc.AppendOp(); + feed1->SetType("feed"); + feed1->SetInput("X", {"feed"}); + feed1->SetOutput("Out", {"y"}); + feed1->SetAttr("col", 1); + LOG(INFO) << "create elementwise_add op"; + auto* elt_add = block_desc.AppendOp(); + elt_add->SetType("elementwise_add"); + elt_add->SetInput("X", std::vector({"x"})); + elt_add->SetInput("Y", std::vector({"y"})); + elt_add->SetOutput("Out", std::vector({"z"})); + elt_add->SetAttr("axis", -1); + LOG(INFO) << "create fetch op"; + auto* fetch = block_desc.AppendOp(); + fetch->SetType("fetch"); + fetch->SetInput("X", std::vector({"z"})); + fetch->SetOutput("Out", std::vector({"out"})); + fetch->SetAttr("col", 0); + // Set inputs' variable shape in BlockDesc + AddTensorToBlockDesc(block_, "x", std::vector({2, 4}), true); + AddTensorToBlockDesc(block_, "y", std::vector({2, 4}), true); + AddTensorToBlockDesc(block_, "z", std::vector({2, 4}), false); + AddTensorToBlockDesc(block_, "out", std::vector({2, 4}), false); + + *block_->add_ops() = *feed0->Proto(); + *block_->add_ops() = *feed1->Proto(); + *block_->add_ops() = *elt_add->Proto(); + *block_->add_ops() = *fetch->Proto(); + + framework::Scope scope; +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; + platform::CUDADeviceContext ctx(place); +#else + platform::CPUPlace place; + platform::CPUDeviceContext ctx(place); +#endif + // Prepare variables. + std::vector repetitive_params{"x", "y"}; + CreateTensor(&scope, "x", std::vector({2, 4})); + CreateTensor(&scope, "y", std::vector({2, 4})); + ASSERT_EQ(block_->ops_size(), 4); + *model = program.Proto()->SerializeAsString(); + serialize_params(param, &scope, repetitive_params); +} + +TEST(EngineManager, engine) { + ASSERT_EQ( + inference::Singleton::Global().Empty(), + true); + + inference::lite::EngineConfig config; + make_fake_model(&(config.model), &(config.param)); + LOG(INFO) << "prepare config"; + + const std::string unique_key("engine_0"); + config.model_from_memory = true; + config.valid_places = { +#ifdef PADDLE_WITH_CUDA + paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), +#endif + paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), + paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), + }; + + LOG(INFO) << "Create EngineManager"; + inference::Singleton::Global().Create( + unique_key, config); + LOG(INFO) << "Create EngineManager done"; + ASSERT_EQ( + inference::Singleton::Global().Empty(), + false); + ASSERT_EQ(inference::Singleton::Global().Has( + unique_key), + true); + paddle::lite::Predictor* engine_0 = + inference::Singleton::Global().Get( + unique_key); + CHECK_NOTNULL(engine_0); + inference::Singleton::Global().DeleteAll(); + CHECK(inference::Singleton::Global().Get( + unique_key) == nullptr) + << "the engine_0 should be nullptr"; +} + +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/lite/test_tensor_utils.cc b/paddle/fluid/inference/lite/test_tensor_utils.cc new file mode 100644 index 00000000000..48ae1bd71d8 --- /dev/null +++ b/paddle/fluid/inference/lite/test_tensor_utils.cc @@ -0,0 +1,116 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/inference/lite/tensor_utils.h" + +namespace paddle { +namespace inference { +namespace lite { +namespace utils { + +using paddle::lite_api::TargetType; +using paddle::lite_api::PrecisionType; +using paddle::lite_api::DataLayoutType; + +TEST(LiteEngineOp, GetNativePlace) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + platform::Place GetNativePlace(const TargetType& type, int id = 0); + EXPECT_TRUE(platform::is_cpu_place(GetNativePlace(TargetType::kHost))); + EXPECT_TRUE(platform::is_gpu_place(GetNativePlace(TargetType::kCUDA))); + ASSERT_DEATH(GetNativePlace(TargetType::kUnk), ""); +} + +TEST(LiteEngineOp, GetLiteTargetType) { + TargetType GetLiteTargetType(const platform::Place& place); + ASSERT_EQ(GetLiteTargetType(platform::CPUPlace()), TargetType::kHost); + ASSERT_EQ(GetLiteTargetType(platform::CUDAPlace(0)), TargetType::kCUDA); +} + +TEST(LiteEngineOp, GetLitePrecisionType) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type); + ASSERT_EQ(GetLitePrecisionType(framework::proto::VarType_Type_FP32), + PrecisionType::kFloat); + ASSERT_EQ(GetLitePrecisionType(framework::proto::VarType_Type_INT8), + PrecisionType::kInt8); + ASSERT_EQ(GetLitePrecisionType(framework::proto::VarType_Type_INT32), + PrecisionType::kInt32); + ASSERT_DEATH( + GetLitePrecisionType(framework::proto::VarType_Type_SELECTED_ROWS), ""); +} + +TEST(LiteEngineOp, GetNativePrecisionType) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + framework::proto::VarType::Type GetNativePrecisionType( + const PrecisionType& type); + ASSERT_EQ(GetNativePrecisionType(PrecisionType::kFloat), + framework::proto::VarType_Type_FP32); + ASSERT_EQ(GetNativePrecisionType(PrecisionType::kInt8), + framework::proto::VarType_Type_INT8); + ASSERT_EQ(GetNativePrecisionType(PrecisionType::kInt32), + framework::proto::VarType_Type_INT32); + ASSERT_DEATH(GetNativePrecisionType(PrecisionType::kUnk), ""); +} + +TEST(LiteEngineOp, GetNativeLayoutType) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + framework::DataLayout GetNativeLayoutType(const DataLayoutType& type); + ASSERT_EQ(GetNativeLayoutType(DataLayoutType::kNCHW), + framework::DataLayout::kNCHW); + ASSERT_DEATH(GetNativeLayoutType(DataLayoutType::kNHWC), ""); +} + +void test_tensor_copy(const platform::DeviceContext& ctx) { + // Create LoDTensor. + std::vector vector({1, 2, 3, 4}); + framework::LoDTensor lod_tensor; + framework::TensorFromVector(vector, &lod_tensor); + framework::LoD lod({{0, 2, 4}}); + lod_tensor.Resize({4, 1}); + lod_tensor.set_lod(lod); + // Create lite::Tensor and copy. + paddle::lite::Tensor lite_tensor; + TensorCopyAsync(&lite_tensor, lod_tensor, ctx); + // Copy to LoDTensor. + framework::LoDTensor lod_tensor_n; + TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); +#ifdef PADDLE_WITH_CUDA + if (platform::is_gpu_place(ctx.GetPlace())) { + platform::GpuStreamSync( + static_cast(ctx).stream()); + } +#endif + std::vector result; + TensorToVector(lod_tensor_n, &result); + ASSERT_EQ(result, vector); + ASSERT_EQ(lod_tensor_n.lod(), lod_tensor.lod()); +} + +TEST(LiteEngineOp, TensorCopyAsync) { + auto* ctx_cpu = + platform::DeviceContextPool::Instance().Get(platform::CPUPlace()); + test_tensor_copy(*ctx_cpu); +#ifdef PADDLE_WITH_CUDA + auto* ctx_gpu = + platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)); + test_tensor_copy(*ctx_gpu); +#endif +} + +} // namespace utils +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index bbcd4f2136b..c3590ef6060 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -352,7 +352,6 @@ if(WITH_GPU AND TENSORRT_FOUND) inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant_small_model") if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR}) inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "quant_small_model.tar.gz") @@ -362,6 +361,12 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) endif() +set(LITE_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lite") +download_data(${LITE_MODEL_INSTALL_DIR} "mul_model_fp32.tgz") + +inference_analysis_test(lite_mul_model_test SRCS lite_mul_model_test.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${LITE_MODEL_INSTALL_DIR}) inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c ARGS --infer_model=${RESNET50_MODEL_DIR}/model) diff --git a/paddle/fluid/inference/tests/api/lite_mul_model_test.cc b/paddle/fluid/inference/tests/api/lite_mul_model_test.cc new file mode 100644 index 00000000000..a50fbfd43ea --- /dev/null +++ b/paddle/fluid/inference/tests/api/lite_mul_model_test.cc @@ -0,0 +1,58 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include + +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { + +TEST(AnalysisPredictor, use_gpu) { + std::string model_dir = FLAGS_infer_model + "/" + "mul_model"; + AnalysisConfig config; + config.EnableUseGpu(100, 0); + config.SetModel(model_dir); + config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32); + + std::vector inputs; + auto predictor = CreatePaddlePredictor(config); + std::vector input({1}); + + PaddleTensor in; + in.shape = {1, 1}; + in.data = PaddleBuf(static_cast(input.data()), 1 * sizeof(float)); + in.dtype = PaddleDType::FLOAT32; + inputs.emplace_back(in); + + std::vector outputs; + ASSERT_TRUE(predictor->Run(inputs, &outputs)); + + const std::vector truth_values = { + -0.00621776, -0.00620937, 0.00990623, -0.0039817, -0.00074315, + 0.61229795, -0.00491806, -0.00068755, 0.18409646, 0.30090684}; + + const size_t expected_size = 1; + EXPECT_EQ(outputs.size(), expected_size); + float* data_o = static_cast(outputs[0].data.data()); + for (size_t j = 0; j < outputs[0].data.length() / sizeof(float); ++j) { + EXPECT_LT(std::abs(data_o[j] - truth_values[j]), 10e-6); + } +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 920da2cfa66..947b4d8f0a5 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -38,7 +38,12 @@ if (ANAKIN_SUBGRAPH) add_subdirectory(anakin) endif() +if (WITH_LITE) + add_subdirectory(lite) +endif() + SET(OP_HEADER_DEPS xxhash executor) + if (WITH_GPU) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) endif() diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt new file mode 100644 index 00000000000..5bb78925908 --- /dev/null +++ b/paddle/fluid/operators/lite/CMakeLists.txt @@ -0,0 +1,2 @@ +op_library(lite_engine_op DEPS lite_engine lite_tensor_utils) +cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis) diff --git a/paddle/fluid/operators/lite/lite_engine_op.cc b/paddle/fluid/operators/lite/lite_engine_op.cc new file mode 100644 index 00000000000..7a879c1e216 --- /dev/null +++ b/paddle/fluid/operators/lite/lite_engine_op.cc @@ -0,0 +1,44 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/lite/lite_engine_op.h" +#include +#include + +namespace paddle { + +namespace operators { + +class LiteEngineOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Xs", "A list of inputs.").AsDuplicable(); + AddOutput("Ys", "A list of outputs.").AsDuplicable(); + AddAttr( + "engine_key", + "The engine_key here is used to distinguish different Lite Engines"); + AddComment("Lite engine operator."); + } +}; + +class LiteInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override {} +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(lite_engine, ops::LiteEngineOp, ops::LiteEngineOpMaker); diff --git a/paddle/fluid/operators/lite/lite_engine_op.h b/paddle/fluid/operators/lite/lite_engine_op.h new file mode 100644 index 00000000000..62bbef66323 --- /dev/null +++ b/paddle/fluid/operators/lite/lite_engine_op.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/platform/gpu_info.h" + +#include "paddle/fluid/inference/lite/engine.h" +#include "paddle/fluid/inference/lite/tensor_utils.h" +#include "paddle/fluid/inference/utils/singleton.h" + +namespace paddle { +namespace operators { + +class LiteEngineOp : public framework::OperatorBase { + private: + std::vector in_names_; + std::vector out_names_; + paddle::lite::Predictor *engine_; + framework::proto::VarType::Type precision_; + bool use_gpu_; + + public: + LiteEngineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : framework::OperatorBase(type, inputs, outputs, attrs) { + in_names_ = Inputs("Xs"); + out_names_ = Outputs("Ys"); + engine_ = + inference::Singleton::Global().Get( + Attr("engine_key")); + if (Attr("enable_int8")) { + precision_ = framework::proto::VarType_Type_INT8; + } else { + precision_ = framework::proto::VarType_Type_FP32; + } + use_gpu_ = Attr("use_gpu"); + } + + protected: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + Execute(scope, dev_place); + } + + void Execute(const framework::Scope &scope, + const platform::Place &dev_place) const { + const platform::DeviceContext *ctx = + platform::DeviceContextPool::Instance().Get(dev_place); + for (size_t i = 0; i < in_names_.size(); i++) { + const framework::LoDTensor &src_t = + inference::analysis::GetFromScope(scope, + in_names_[i]); + paddle::lite::Tensor *dst_t = engine_->GetInput(i); + VLOG(3) << "fluid -> lite: " << in_names_[i]; + inference::lite::utils::TensorCopyAsync(dst_t, src_t, *ctx); + } +#ifdef PADDLE_WITH_CUDA + if (platform::is_gpu_place(dev_place)) { + platform::GpuStreamSync( + static_cast(ctx)->stream()); + } +#endif + VLOG(3) << "lite engine run"; + engine_->Run(); + VLOG(3) << "lite engine run done"; + for (size_t i = 0; i < out_names_.size(); i++) { + const paddle::lite::Tensor &src_t = *(engine_->GetOutput(i)); + framework::LoDTensor *dst_t = + &inference::analysis::GetFromScope( + scope, out_names_[i]); + VLOG(3) << "lite -> fluid: " << out_names_[i]; + inference::lite::utils::TensorCopyAsync(dst_t, src_t, *ctx); + } +#ifdef PADDLE_WITH_CUDA + if (platform::is_gpu_place(dev_place)) { + platform::GpuStreamSync( + static_cast(ctx)->stream()); + } +#endif + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/lite/lite_engine_op_test.cc b/paddle/fluid/operators/lite/lite_engine_op_test.cc new file mode 100644 index 00000000000..3812911e915 --- /dev/null +++ b/paddle/fluid/operators/lite/lite_engine_op_test.cc @@ -0,0 +1,115 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/operators/lite/lite_engine_op.h" +#include "paddle/fluid/operators/lite/ut_helper.h" + +USE_NO_KERNEL_OP(lite_engine) + +using paddle::inference::lite::AddTensorToBlockDesc; +using paddle::inference::lite::CreateTensor; +using paddle::inference::lite::serialize_params; +namespace paddle { +namespace operators { +TEST(LiteEngineOp, engine_op) { + framework::ProgramDesc program; + auto* block_ = program.Proto()->mutable_blocks(0); + framework::BlockDesc block_desc(&program, block_); + auto* feed0 = block_desc.AppendOp(); + feed0->SetType("feed"); + feed0->SetInput("X", {"feed"}); + feed0->SetOutput("Out", {"x"}); + feed0->SetAttr("col", 0); + auto* feed1 = block_desc.AppendOp(); + feed1->SetType("feed"); + feed1->SetInput("X", {"feed"}); + feed1->SetOutput("Out", {"y"}); + feed1->SetAttr("col", 1); + LOG(INFO) << "create elementwise_add op"; + auto* elt_add = block_desc.AppendOp(); + elt_add->SetType("elementwise_add"); + elt_add->SetInput("X", std::vector({"x"})); + elt_add->SetInput("Y", std::vector({"y"})); + elt_add->SetOutput("Out", std::vector({"z"})); + elt_add->SetAttr("axis", -1); + LOG(INFO) << "create fetch op"; + auto* fetch = block_desc.AppendOp(); + fetch->SetType("fetch"); + fetch->SetInput("X", std::vector({"z"})); + fetch->SetOutput("Out", std::vector({"out"})); + fetch->SetAttr("col", 0); + // Set inputs' variable shape in BlockDesc + AddTensorToBlockDesc(block_, "x", std::vector({2, 4}), true); + AddTensorToBlockDesc(block_, "y", std::vector({2, 4}), true); + AddTensorToBlockDesc(block_, "z", std::vector({2, 4}), false); + AddTensorToBlockDesc(block_, "out", std::vector({2, 4}), false); + *block_->add_ops() = *feed1->Proto(); + *block_->add_ops() = *feed0->Proto(); + *block_->add_ops() = *elt_add->Proto(); + *block_->add_ops() = *fetch->Proto(); + framework::Scope scope; +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; + platform::CUDADeviceContext ctx(place); +#else + platform::CPUPlace place; + platform::CPUDeviceContext ctx(place); +#endif + // Prepare variables. + CreateTensor(&scope, "x", std::vector({2, 4}), false); + CreateTensor(&scope, "y", std::vector({2, 4}), false); + CreateTensor(&scope, "out", std::vector({2, 4}), false); + + ASSERT_EQ(block_->ops_size(), 4); + + std::vector repetitive_params{"x", "y"}; + inference::lite::EngineConfig config; + config.valid_places = { +#ifdef PADDLE_WITH_CUDA + paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), +#endif + paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), + paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), + }; + serialize_params(&(config.param), &scope, repetitive_params); + config.model = program.Proto()->SerializeAsString(); + LOG(INFO) << "create lite_engine desc"; + framework::OpDesc engine_op_desc(nullptr); + engine_op_desc.SetType("lite_engine"); + engine_op_desc.SetInput("Xs", std::vector({"x", "y"})); + engine_op_desc.SetOutput("Ys", std::vector({"out"})); + std::string engine_key = "engine_0"; + engine_op_desc.SetAttr("engine_key", engine_key); + engine_op_desc.SetAttr("enable_int8", false); + engine_op_desc.SetAttr("use_gpu", true); + engine_op_desc.SetBlockAttr("sub_block", &block_desc); + inference::Singleton::Global().Create( + engine_key, config); + LOG(INFO) << "create engine op"; + auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc); + LOG(INFO) << "engine_op " << engine_op.get(); + // Execute them. + LOG(INFO) << "engine_op run"; + engine_op->Run(scope, place); + LOG(INFO) << "done"; +} +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/lite/ut_helper.h b/paddle/fluid/operators/lite/ut_helper.h new file mode 100644 index 00000000000..b549af81d8d --- /dev/null +++ b/paddle/fluid/operators/lite/ut_helper.h @@ -0,0 +1,111 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#pragma once + +#include + +#include +#include + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace inference { +namespace lite { + +void AddTensorToBlockDesc(framework::proto::BlockDesc* block, + const std::string& name, + const std::vector& shape, + bool persistable = false) { + using framework::proto::VarType; + auto* var = block->add_vars(); + framework::VarDesc desc(name); + desc.SetType(VarType::LOD_TENSOR); + desc.SetDataType(VarType::FP32); + desc.SetShape(shape); + desc.SetPersistable(persistable); + *var = *desc.Proto(); +} +void serialize_params(std::string* str, framework::Scope* scope, + const std::vector& params) { + std::ostringstream os; +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; + platform::CUDADeviceContext ctx(place); +#else + platform::CPUDeviceContext ctx; +#endif + for (const auto& param : params) { + PADDLE_ENFORCE_NOT_NULL( + scope->FindVar(param), + platform::errors::NotFound("Block should already have a '%s' variable", + param)); + auto* tensor = scope->FindVar(param)->GetMutable(); + framework::SerializeToStream(os, *tensor, ctx); + } + *str = os.str(); +} +/* + * Get a random float value between [low, high] + */ +float random(float low, float high) { + // static std::random_device rd; + static std::mt19937 mt(100); + std::uniform_real_distribution dist(low, high); + return dist(mt); +} +void RandomizeTensor(framework::LoDTensor* tensor, + const platform::Place& place) { + auto dims = tensor->dims(); + size_t num_elements = analysis::AccuDims(dims, dims.size()); + PADDLE_ENFORCE_GT(num_elements, 0, + platform::errors::InvalidArgument( + "The input tensor dimension of the randomized tensor " + "function should be greater than zero.")); + platform::CPUPlace cpu_place; + framework::LoDTensor temp_tensor; + temp_tensor.Resize(dims); + auto* temp_data = temp_tensor.mutable_data(cpu_place); + for (size_t i = 0; i < num_elements; i++) { + *(temp_data + i) = random(0., 1.); + } + TensorCopySync(temp_tensor, place, tensor); +} + +void CreateTensor(framework::Scope* scope, const std::string& name, + const std::vector& shape, bool in_cuda = true) { + auto* var = scope->Var(name); + auto* tensor = var->GetMutable(); + auto dims = framework::make_ddim(shape); + tensor->Resize(dims); + platform::Place place; + if (in_cuda) { +#ifdef PADDLE_WITH_CUDA + place = platform::CUDAPlace(0); +#else + LOG(FATAL) << "You must define PADDLE_WITH_CUDA for using CUDAPlace."; +#endif + } else { + place = platform::CPUPlace(); + } + RandomizeTensor(tensor, place); +} + +} // namespace lite +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index b3f00bf7c02..ad664b88e60 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -349,6 +349,16 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) { error_code, CudaErrorWebsite()); } +void GpuStreamSync(cudaStream_t stream) { + auto error_code = cudaStreamSynchronize(stream); + PADDLE_ENFORCE_CUDA_SUCCESS( + error_code, + platform::errors::External( + "cudaStreamSynchronize failed in paddle::platform::GpuStreamSync " + "error code : %d, %s", + error_code, CudaErrorWebsite())); +} + void RaiseNonOutOfMemoryError(cudaError_t *status) { if (*status == cudaErrorMemoryAllocation) { *status = cudaSuccess; @@ -363,5 +373,6 @@ void RaiseNonOutOfMemoryError(cudaError_t *status) { PADDLE_ENFORCE_CUDA_SUCCESS(*status); } + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h index 6ed2b344b95..46e5326c8b7 100644 --- a/paddle/fluid/platform/gpu_info.h +++ b/paddle/fluid/platform/gpu_info.h @@ -101,6 +101,9 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src, //! Set memory dst with value count size asynchronously void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream); +//! Blocks until stream has completed all operations. +void GpuStreamSync(cudaStream_t stream); + //! Raise error if status is not cudaSuccess or OOM, otherwise reset status. void RaiseNonOutOfMemoryError(cudaError_t *status); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index a4b07eb5ada..87256550f18 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -203,6 +203,7 @@ function cmake_base() { -DPY_VERSION=${PY_VERSION:-2.7} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} -DWITH_GRPC=${grpc_flag} + -DWITH_LITE=${WITH_LITE:-OFF} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -234,7 +235,8 @@ EOF -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \ -DPY_VERSION=${PY_VERSION:-2.7} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} \ - -DWITH_GRPC=${grpc_flag} + -DWITH_GRPC=${grpc_flag} \ + -DWITH_LITE=${WITH_LITE:-OFF} } -- GitLab