未验证 提交 ad0dfb17 编写于 作者: 石晓伟 提交者: GitHub

[Feature] Lite subgraph (#22114)

上级 7d10edc5
...@@ -86,6 +86,7 @@ option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface" ...@@ -86,6 +86,7 @@ option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface"
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE}) option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF) option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF)
option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT LINUX OR NOT WITH_MKL)
message("Paddle-lite will not build because the required Linux and MKL do not exist.")
set(WITH_LITE OFF)
return()
endif()
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
include(ExternalProject)
set(LITE_PROJECT extern_lite)
set(LITE_SOURCES_DIR ${THIRD_PARTY_PATH}/lite)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
# No quotes, so cmake can resolve it as a command with arguments.
set(LITE_BUILD_COMMAND $(MAKE) -j)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=${WITH_GPU}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=ON
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=${CUDNN_ROOT}
-DLITE_WITH_ARM=OFF)
ExternalProject_Add(
${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG 947cda26637d46dc23f4e39d2b52e7d9a1fa6eef
PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS}
)
ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR)
ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR)
set(LITE_BINARY_DIR ${BINARY_DIR})
set(LITE_SOURCE_DIR ${SOURCE_DIR})
endif()
message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
include_directories(${LITE_SOURCE_DIR})
include_directories(${LITE_BINARY_DIR})
function(external_lite_static_libs alias path)
add_library(${alias} STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET ${alias} PROPERTY IMPORTED_LOCATION
${path})
if (LITE_PROJECT)
add_dependencies(${alias} ${LITE_PROJECT})
endif()
endfunction()
external_lite_static_libs(lite_full_static ${LITE_BINARY_DIR}/lite/api/libapi_full_static.a)
add_definitions(-DPADDLE_WITH_LITE)
...@@ -207,11 +207,6 @@ if(LINUX) ...@@ -207,11 +207,6 @@ if(LINUX)
${GPU_COMMON_FLAGS}) ${GPU_COMMON_FLAGS})
endif(LINUX) endif(LINUX)
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif(UNIX AND NOT APPLE)
foreach(flag ${COMMON_FLAGS}) foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
......
...@@ -20,6 +20,11 @@ ...@@ -20,6 +20,11 @@
# for instance, protobuf libs path is <install_dir>/lib64 # for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems. # on CentOS, but <install_dir>/lib on other systems.
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif(UNIX AND NOT APPLE)
IF(WIN32) IF(WIN32)
SET(HOST_SYSTEM "win32") SET(HOST_SYSTEM "win32")
ELSE(WIN32) ELSE(WIN32)
......
...@@ -284,4 +284,8 @@ if(WITH_DGC) ...@@ -284,4 +284,8 @@ if(WITH_DGC)
list(APPEND third_party_deps extern_dgc) list(APPEND third_party_deps extern_dgc)
endif() endif()
if (WITH_LITE)
include(external/lite)
endif (WITH_LITE)
add_custom_target(third_party DEPENDS ${third_party_deps}) add_custom_target(third_party DEPENDS ${third_party_deps})
...@@ -21,6 +21,10 @@ if (ANAKIN_SUBGRAPH) ...@@ -21,6 +21,10 @@ if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin) add_subdirectory(anakin)
endif() endif()
if (WITH_LITE)
add_subdirectory(lite)
endif()
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES) get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
get_property(fluid_third_partys GLOBAL PROPERTY FLUID_THRID_PARTYS) get_property(fluid_third_partys GLOBAL PROPERTY FLUID_THRID_PARTYS)
......
...@@ -197,6 +197,12 @@ struct Argument { ...@@ -197,6 +197,12 @@ struct Argument {
DECL_ARGUMENT_FIELD(anakin_ops_filter, AnakinOpsFilter, DECL_ARGUMENT_FIELD(anakin_ops_filter, AnakinOpsFilter,
std::vector<std::string>); std::vector<std::string>);
DECL_ARGUMENT_FIELD(lite_passes_filter, LitePassesFilter,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(lite_ops_filter, LiteOpsFilter, std::vector<std::string>);
DECL_ARGUMENT_FIELD(lite_precision_mode, LitePrecisionMode,
AnalysisConfig::Precision);
// Memory optimized related. // Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
......
...@@ -128,6 +128,17 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -128,6 +128,17 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("program", pass->Set("program",
new framework::ProgramDesc *(&argument->main_program())); new framework::ProgramDesc *(&argument->main_program()));
} }
if (pass_name == "lite_subgraph_pass") {
bool enable_int8 =
argument->lite_precision_mode() == AnalysisConfig::Precision::kInt8;
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
pass->Set("lite_ops_filter",
new std::vector<std::string>(argument->lite_ops_filter()));
pass->Set("predictor_id", new int(argument->predictor_id()));
pass->Set("enable_int8", new bool(enable_int8));
pass->Set("use_gpu", new bool(argument->use_gpu()));
}
if (pass_name == "anakin_subgraph_pass") { if (pass_name == "anakin_subgraph_pass") {
pass->Set("program", pass->Set("program",
new framework::ProgramDesc *(&argument->main_program())); new framework::ProgramDesc *(&argument->main_program()));
......
...@@ -23,3 +23,12 @@ if (ANAKIN_SUBGRAPH) ...@@ -23,3 +23,12 @@ if (ANAKIN_SUBGRAPH)
file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n") file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n")
set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "") set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "")
endif() endif()
if (WITH_LITE)
cc_library(lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS ${analysis_deps} subgraph_util lite_op_teller)
set(analysis_deps ${analysis_deps} subgraph_util lite_subgraph_pass CACHE INTERNAL "")
set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
file(APPEND ${pass_file} "USE_PASS(lite_subgraph_pass);\n")
set(INFER_IR_PASSES ${INFER_IR_PASSES} lite_subgraph_pass CACHE INTERNAL "")
cc_test(lite_subgraph_pass_tester SRCS lite_subgraph_pass_tester.cc DEPS lite_subgraph_pass gtest glog)
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <fstream>
#include <iostream>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/lite/op_teller.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/fluid/inference/lite/engine.h"
namespace paddle {
namespace inference {
namespace analysis {
using framework::ir::Node;
using framework::ir::Agent;
using framework::ir::SubGraphFuser;
using framework::ir::Graph;
namespace lite {
std::string UniqueKey(const std::vector<std::string>& engine_inputs,
const std::vector<std::string>& engine_outputs,
const std::string& id) {
std::string engine_hash_key = "";
for (auto name : engine_inputs) {
engine_hash_key += name;
}
for (auto name : engine_outputs) {
engine_hash_key += name;
}
engine_hash_key += id;
auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
return engine_key;
}
std::vector<std::string> IOVarsFilter(const std::vector<Node*>& nodes) {
std::set<std::string> names;
for (const auto& node : nodes) {
if (node->IsVar() && !node->Var()->Persistable()) {
names.insert(node->Name());
}
}
return std::vector<std::string>(names.begin(), names.end());
}
void StrToBinaryFile(const std::string& path, const std::string& str) {
std::ofstream file(path.c_str(), std::ios::binary);
file.write(str.c_str(), str.size());
file.close();
}
void ModifyHostSubgraphOps(
framework::ProgramDesc* host_program, framework::BlockDesc* host_sub_block,
const std::vector<framework::OpDesc*>& subgraph_ops) {
for (auto* op_desc : subgraph_ops) {
auto* sub_block_op = host_sub_block->AppendOp();
sub_block_op->CopyFrom(*op_desc);
if (op_desc->HasAttr("sub_block")) {
int32_t global_sub_id = host_sub_block->ID();
auto* op_sub_block =
host_program->MutableBlock(op_desc->GetBlockAttrId("sub_block"));
op_sub_block->Proto()->set_parent_idx(global_sub_id);
}
}
}
void ModifyHostProgram(framework::ProgramDesc* host_program,
framework::BlockDesc* host_sub_block,
const std::unordered_set<Node*>& io_var_nodes,
const std::vector<framework::OpDesc*>& subgraph_ops) {
for (auto* var_node : io_var_nodes) {
auto* sub_block_var = host_sub_block->Var(var_node->Name());
sub_block_var->Proto()->CopyFrom(*var_node->Var()->Proto());
}
ModifyHostSubgraphOps(host_program, host_sub_block, subgraph_ops);
}
void AppendLiteSubBlocks(const std::vector<framework::OpDesc*>& subgraph_ops,
framework::ProgramDesc* engine_program,
framework::ProgramDesc* host_program,
const int32_t host_sub_id) {
std::unordered_map<int32_t, int32_t> sub_blocks_map;
std::unordered_set<int32_t> copied_host_ids;
sub_blocks_map[host_sub_id] = framework::kRootBlockIndex;
std::function<void(const std::vector<framework::OpDesc*>&)> append_sub_blocks;
append_sub_blocks = [&](const std::vector<framework::OpDesc*>& ops) {
for (auto* op_desc : ops) {
if (op_desc->HasAttr("sub_block")) {
int32_t host_op_sub_id = op_desc->GetBlockAttrId("sub_block");
if (copied_host_ids.count(host_op_sub_id)) continue;
size_t engine_block_size = engine_program->Size();
auto* host_op_sub_block = host_program->MutableBlock(host_op_sub_id);
auto* engine_op_sub_block =
engine_program->AppendBlock(*(op_desc->Block()));
for (auto* var : host_op_sub_block->AllVars()) {
auto* engine_var = engine_op_sub_block->Var(var->Name());
engine_var->Proto()->CopyFrom(*var->Proto());
}
for (auto* op : host_op_sub_block->AllOps()) {
auto* engine_op = engine_op_sub_block->AppendOp();
engine_op->Proto()->CopyFrom(*op->Proto());
}
sub_blocks_map[host_op_sub_id] = engine_block_size;
append_sub_blocks(host_op_sub_block->AllOps());
}
}
};
append_sub_blocks(subgraph_ops);
for (size_t i = 0; i < engine_program->Size(); i++) {
for (auto* op_desc : engine_program->Block(i).AllOps()) {
if (op_desc->HasAttr("sub_block")) {
int32_t id = op_desc->GetBlockAttrId("sub_block");
op_desc->SetAttr("sub_block", sub_blocks_map[id]);
}
}
}
}
// The modification of pass should be a process of framework::desc
// (initial) -> proto::desc (flush) -> framework::desc (final).
// Ir::Graph is limited to changing the main block, so the sub block
// needs to be processed here.
void ModifyEngineProgram(Node* merged_node,
framework::ProgramDesc* host_program,
framework::ProgramDesc* engine_program,
const int32_t host_sub_block_id,
const std::unordered_set<Node*>& io_var_nodes,
const std::vector<framework::OpDesc*>& subgraph_ops) {
// 1. Fill the main block of lite program.
framework::BlockDesc* engine_global_block =
engine_program->MutableBlock(framework::kRootBlockIndex);
PrependFeedOps(engine_global_block, IOVarsFilter(merged_node->inputs));
for (auto* var_node : io_var_nodes) {
framework::VarDesc* sub_block_var =
engine_global_block->Var(var_node->Name());
sub_block_var->Proto()->CopyFrom(*var_node->Var()->Proto());
}
for (auto* op_desc : subgraph_ops) {
auto* sub_block_op = engine_global_block->AppendOp();
sub_block_op->CopyFrom(*op_desc);
}
PrependFetchOps(engine_global_block, IOVarsFilter(merged_node->outputs));
// 2. Append sub blocks in the lite program.
AppendLiteSubBlocks(subgraph_ops, engine_program, host_program,
host_sub_block_id);
}
void OrganizeProgram(Node* merged_node, framework::ProgramDesc* host_program,
framework::ProgramDesc* engine_program,
std::vector<std::string>* repetitive_params) {
std::vector<framework::ir::Node*>& subgraph = *Agent(merged_node).subgraph();
PADDLE_ENFORCE_EQ(subgraph.empty(), false,
platform::errors::NotFound(
"No subgraph found in lite subgraph pass. Please use "
"the full model call from Analysis Predictor."));
const framework::BlockDesc& host_global_block =
host_program->Block(framework::kRootBlockIndex);
framework::BlockDesc* host_sub_block =
host_program->AppendBlock(host_global_block);
string::PrettyLogDetail("--- detect a sub-graph with %d nodes",
subgraph.size());
std::unordered_set<Node*> io_var_nodes = GetRelatedIOVarNodes(subgraph);
for (const auto* node : io_var_nodes) {
VLOG(3) << "IO Variable Name: " << node->Name();
}
std::vector<framework::OpDesc*> subgraph_ops;
for (auto* op_node : subgraph) {
subgraph_ops.push_back(op_node->Op());
}
ModifyHostProgram(host_program, host_sub_block, io_var_nodes, subgraph_ops);
ModifyEngineProgram(merged_node, host_program, engine_program,
host_sub_block->ID(), io_var_nodes, subgraph_ops);
*repetitive_params = ExtractParameters(io_var_nodes, true);
for (const auto& param : *repetitive_params) {
VLOG(3) << "Repetitive param: " << param;
}
host_program->Flush();
engine_program->Flush();
}
} // namespace lite
void LiteSubgraphPass::SetUpEngine(
framework::ProgramDesc* program,
const std::vector<std::string>& repetitive_params,
const std::string& unique_key, bool dump_model) const {
inference::lite::EngineConfig config;
auto* scope = param_scope();
// When the pass is started, only the persistent variables of the
// main block are read. Fluid seems to allow persistence variables
// in the sub block, but they are controlled by context, so the
// support is suspended here.
auto serialize_params = [](std::string* str, framework::Scope* scope,
const std::vector<std::string>& params) {
std::ostringstream os;
platform::CPUDeviceContext ctx;
for (const auto& param : params) {
VLOG(3) << "Serialize param: " << param;
PADDLE_ENFORCE_NOT_NULL(
scope->FindVar(param),
platform::errors::NotFound(
"Block should already have a '%s' variable", param));
auto* tensor = scope->FindVar(param)->GetMutable<framework::LoDTensor>();
framework::SerializeToStream(os, *tensor, ctx);
}
*str = os.str();
};
bool use_gpu = Get<bool>("use_gpu");
bool enable_int8 = Get<bool>("enable_int8");
lite_api::TargetType target_type = use_gpu ? TARGET(kCUDA) : TARGET(kX86);
paddle::lite_api::PrecisionType precision_type =
enable_int8 ? PRECISION(kInt8) : PRECISION(kInt64);
serialize_params(&config.param, scope, repetitive_params);
config.model = program->Proto()->SerializeAsString();
config.valid_places = {
paddle::lite::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
};
if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
lite::StrToBinaryFile("./param.bin", config.param);
}
inference::Singleton<inference::lite::EngineManager>::Global().Create(
unique_key, config);
}
void LiteSubgraphPass::BuildOperator(
Node* merged_node, framework::ProgramDesc* global_program,
std::vector<std::string>* repetitive_params) const {
framework::ProgramDesc engine_program;
const std::string id = std::to_string(Get<int>("predictor_id"));
const std::vector<std::string> input_names =
lite::IOVarsFilter(merged_node->inputs);
const std::vector<std::string> output_names =
lite::IOVarsFilter(merged_node->outputs);
const std::string unique_key = lite::UniqueKey(input_names, output_names, id);
lite::OrganizeProgram(merged_node, global_program, &engine_program,
repetitive_params);
SetUpEngine(&engine_program, *repetitive_params, unique_key);
auto* op_desc = merged_node->Op();
op_desc->SetInput("Xs", input_names);
op_desc->SetOutput("Ys", output_names);
op_desc->SetType("lite_engine");
op_desc->SetAttr("engine_key", unique_key);
op_desc->SetAttr("enable_int8", Get<bool>("enable_int8"));
op_desc->SetAttr("use_gpu", Get<bool>("use_gpu"));
}
void LiteSubgraphPass::ApplyImpl(framework::ir::Graph* graph) const {
framework::ir::FusePassBase::Init("lite_subgraph_pass", graph);
framework::ProgramDesc* global_program =
Get<framework::ProgramDesc*>("program");
auto& lite_ops_filter = Get<std::vector<std::string>>("lite_ops_filter");
auto teller = [&lite_ops_filter](const Node* node) {
if (!node->IsOp() || !node->Op())
return false;
else if (node->Op()->Type() == "feed" || node->Op()->Type() == "fetch")
return false;
else if (std::find(lite_ops_filter.begin(), lite_ops_filter.end(),
node->Op()->Type()) != lite_ops_filter.end())
return false;
return inference::lite::OpTeller::Global().Tell(node->Op()->Type(),
*node->Op());
};
SubGraphFuser fuser(graph, teller, 0 /* min_subgraph_size */, "lite_engine");
fuser();
std::vector<std::string> repetitive_params;
for (auto* node : graph->Nodes()) {
if (node->IsOp() && !Agent(node).subgraph()->empty()) {
BuildOperator(node, global_program, &repetitive_params);
std::unordered_set<const Node*> nodes2remove(
Agent(node).subgraph()->begin(), Agent(node).subgraph()->end());
framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
}
}
std::unordered_set<const Node*> nodes2remove;
for (auto* node : graph->Nodes()) {
if (node->IsOp() && Agent(node).deleted()) {
nodes2remove.insert(node);
}
}
framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
graph->Set(framework::ir::kRepetitiveParamAttr,
new std::vector<std::string>(repetitive_params));
}
} // namespace analysis
} // namespace inference
} // namespace paddle
REGISTER_PASS(lite_subgraph_pass,
paddle::inference::analysis::LiteSubgraphPass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
namespace paddle {
namespace inference {
namespace analysis {
class LiteSubgraphPass : public framework::ir::FusePassBase {
public:
void ApplyImpl(framework::ir::Graph* graph) const override;
private:
void BuildOperator(framework::ir::Node* merged_node,
framework::ProgramDesc* global_program,
std::vector<std::string>* repetitive_params) const;
void SetUpEngine(framework::ProgramDesc* program,
const std::vector<std::string>& repetitive_params,
const std::string& unique_key,
bool dump_model = false) const;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/lite/op_teller.h"
namespace paddle {
namespace inference {
namespace analysis {
namespace lite {
void StrToBinaryFile(const std::string& path, const std::string& str);
void ModifyHostSubgraphOps(framework::ProgramDesc* host_program,
framework::BlockDesc* host_sub_block,
const std::vector<framework::OpDesc*>& subgraph_ops);
void AppendLiteSubBlocks(const std::vector<framework::OpDesc*>& subgraph_ops,
framework::ProgramDesc* engine_program,
framework::ProgramDesc* host_program,
const int32_t host_sub_id);
}
TEST(LiteSubgraphPass, basic) {
framework::ProgramDesc host_program;
framework::ProgramDesc engine_program;
framework::BlockDesc* host_main_block = host_program.MutableBlock(0);
framework::BlockDesc* host_sub_block =
host_program.AppendBlock(*host_main_block);
framework::OpDesc* host_while_op = host_main_block->AppendOp();
host_main_block->Var("var_main");
host_sub_block->Var("var_sub");
host_while_op->SetType("while");
host_while_op->SetAttr("sub_block", host_sub_block);
framework::OpDesc* host_sub_block_op = host_sub_block->AppendOp();
host_sub_block_op->SetType("leaky_relu");
CHECK(inference::lite::OpTeller::Global().Tell("while", *host_while_op))
<< "Lite operator teller test failed.";
lite::AppendLiteSubBlocks({host_while_op}, &engine_program, &host_program,
host_sub_block->ID());
lite::ModifyHostSubgraphOps(&host_program, host_sub_block, {host_while_op});
lite::StrToBinaryFile("./", "test");
}
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -26,7 +26,7 @@ namespace analysis { ...@@ -26,7 +26,7 @@ namespace analysis {
using framework::ir::Node; using framework::ir::Node;
std::vector<std::string> ExtractParameters( std::vector<std::string> ExtractParameters(
const std::unordered_set<Node *> &nodes) { const std::unordered_set<Node *> &nodes, bool sorted) {
// We can judge whether a variable is a parameter by // We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable // its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it. // of the feed op output is true, so we have to identify it.
...@@ -50,9 +50,59 @@ std::vector<std::string> ExtractParameters( ...@@ -50,9 +50,59 @@ std::vector<std::string> ExtractParameters(
parameters.push_back(node->Name()); parameters.push_back(node->Name());
} }
} }
if (sorted) {
std::sort(parameters.begin(), parameters.end());
parameters.erase(std::unique(parameters.begin(), parameters.end()),
parameters.end());
}
return parameters; return parameters;
} }
std::unordered_set<Node *> GetRelatedIOVarNodes(
const std::vector<Node *> &nodes) {
std::unordered_set<Node *> io_nodes;
for (const auto &node : nodes) {
if (!node->IsOp()) continue;
for (const auto &in : node->inputs) {
io_nodes.insert(in);
}
for (const auto &out : node->outputs) {
io_nodes.insert(out);
}
}
return io_nodes;
}
void PrependFeedOps(framework::BlockDesc *global_block,
const std::vector<std::string> &feed_target_names,
std::string feed_holder_name) {
framework::VarDesc *feed_var = global_block->Var(feed_holder_name);
feed_var->SetType(paddle::framework::proto::VarType::FEED_MINIBATCH);
feed_var->SetPersistable(true);
for (size_t i = 0; i < feed_target_names.size(); i++) {
framework::OpDesc *feed_op = global_block->AppendOp();
feed_op->SetType("feed");
feed_op->SetInput("X", {feed_holder_name});
feed_op->SetOutput("Out", {feed_target_names[i]});
feed_op->SetAttr("col", static_cast<int>(i));
}
}
void PrependFetchOps(framework::BlockDesc *global_block,
const std::vector<std::string> &fetch_target_names,
std::string fetch_holder_name) {
framework::VarDesc *fetch_var = global_block->Var(fetch_holder_name);
fetch_var->SetType(paddle::framework::proto::VarType::FETCH_LIST);
fetch_var->SetPersistable(true);
for (size_t i = 0; i < fetch_target_names.size(); i++) {
framework::OpDesc *fetch_op = global_block->AppendOp();
fetch_op->SetType("fetch");
fetch_op->SetInput("X", {fetch_target_names[i]});
fetch_op->SetOutput("Out", {fetch_holder_name});
fetch_op->SetAttr("col", static_cast<int>(i));
}
}
void RenameAndGetOutputs( void RenameAndGetOutputs(
const std::vector<framework::ir::Node *> &subgraph_nodes, const std::vector<framework::ir::Node *> &subgraph_nodes,
framework::BlockDesc *block_desc, framework::BlockDesc *block_desc,
......
...@@ -30,10 +30,21 @@ limitations under the License. */ ...@@ -30,10 +30,21 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
using framework::ir::Node;
std::vector<std::string> ExtractParameters( std::vector<std::string> ExtractParameters(
const std::unordered_set<Node *> &nodes); const std::unordered_set<framework::ir::Node *> &nodes,
bool sorted = false);
std::unordered_set<framework::ir::Node *> GetRelatedIOVarNodes(
const std::vector<framework::ir::Node *> &nodes);
void PrependFeedOps(framework::BlockDesc *global_block,
const std::vector<std::string> &feed_target_names,
std::string feed_holder_name = "feed");
void PrependFetchOps(framework::BlockDesc *global_block,
const std::vector<std::string> &fetch_target_names,
std::string fetch_holder_name = "fetch");
void RenameAndGetOutputs( void RenameAndGetOutputs(
const std::vector<framework::ir::Node *> &subgraph_nodes, const std::vector<framework::ir::Node *> &subgraph_nodes,
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
namespace paddle { namespace paddle {
extern const std::vector<std::string> kTRTSubgraphPasses; extern const std::vector<std::string> kTRTSubgraphPasses;
extern const std::vector<std::string> kAnakinSubgraphPasses; extern const std::vector<std::string> kAnakinSubgraphPasses;
extern const std::vector<std::string> kLiteSubgraphPasses;
PassStrategy *AnalysisConfig::pass_builder() const { PassStrategy *AnalysisConfig::pass_builder() const {
if (!pass_builder_.get()) { if (!pass_builder_.get()) {
...@@ -128,6 +129,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -128,6 +129,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(anakin_passes_filter_); CP_MEMBER(anakin_passes_filter_);
CP_MEMBER(anakin_ops_filter_); CP_MEMBER(anakin_ops_filter_);
CP_MEMBER(use_lite_);
CP_MEMBER(lite_precision_mode_);
CP_MEMBER(lite_passes_filter_);
CP_MEMBER(lite_ops_filter_);
// profile related. // profile related.
CP_MEMBER(with_profile_); CP_MEMBER(with_profile_);
...@@ -351,6 +357,20 @@ void AnalysisConfig::Update() { ...@@ -351,6 +357,20 @@ void AnalysisConfig::Update() {
} }
} }
if (use_lite_) {
#ifndef PADDLE_WITH_LITE
LOG(WARNING) << "You tried to enable the lite subgraph "
"but did not have the option -DWITH_LITE compiled.";
#endif
pass_builder()->ClearPasses();
for (const auto &pass : kLiteSubgraphPasses) {
if (std::find(lite_passes_filter_.begin(), lite_passes_filter_.end(),
pass) == lite_passes_filter_.end()) {
pass_builder()->AppendPass(pass);
}
}
}
if (ir_debug_) { if (ir_debug_) {
pass_builder()->TurnOnDebug(); pass_builder()->TurnOnDebug();
} }
...@@ -395,6 +415,8 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -395,6 +415,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << cpu_math_library_num_threads_; ss << cpu_math_library_num_threads_;
ss << use_anakin_; ss << use_anakin_;
ss << anakin_min_subgraph_size_; ss << anakin_min_subgraph_size_;
ss << use_lite_;
return ss.str(); return ss.str();
} }
...@@ -484,6 +506,17 @@ void AnalysisConfig::EnableAnakinEngine( ...@@ -484,6 +506,17 @@ void AnalysisConfig::EnableAnakinEngine(
Update(); Update();
} }
void AnalysisConfig::EnableLiteEngine(
AnalysisConfig::Precision precision_mode,
const std::vector<std::string> &passes_filter,
const std::vector<std::string> &ops_filter) {
use_lite_ = true;
lite_precision_mode_ = precision_mode;
lite_passes_filter_ = passes_filter;
lite_ops_filter_ = ops_filter;
Update();
}
void AnalysisConfig::PartiallyRelease() { void AnalysisConfig::PartiallyRelease() {
prog_file_.clear(); prog_file_.clear();
prog_file_.shrink_to_fit(); prog_file_.shrink_to_fit();
......
...@@ -428,6 +428,13 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -428,6 +428,13 @@ void AnalysisPredictor::PrepareArgument() {
LOG(INFO) << "Anakin subgraph engine is enabled"; LOG(INFO) << "Anakin subgraph engine is enabled";
} }
if (config_.lite_engine_enabled()) {
argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
LOG(INFO) << "Lite subgraph engine is enabled";
}
if (config_.use_mkldnn_) { if (config_.use_mkldnn_) {
LOG(INFO) << "MKLDNN is enabled"; LOG(INFO) << "MKLDNN is enabled";
argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_);
......
...@@ -171,10 +171,19 @@ struct AnalysisConfig { ...@@ -171,10 +171,19 @@ struct AnalysisConfig {
std::vector<std::string> passes_filter = {}, std::vector<std::string> passes_filter = {},
std::vector<std::string> ops_filter = {}); std::vector<std::string> ops_filter = {});
void EnableLiteEngine(
AnalysisConfig::Precision precision_mode = Precision::kFloat32,
const std::vector<std::string>& passes_filter = {},
const std::vector<std::string>& ops_filter = {});
/** A boolean state indicating whether the Anakin sub-graph engine is used. /** A boolean state indicating whether the Anakin sub-graph engine is used.
*/ */
bool anakin_engine_enabled() const { return use_anakin_; } bool anakin_engine_enabled() const { return use_anakin_; }
/** A boolean state indicating whether the Lite sub-graph engine is used.
*/
bool lite_engine_enabled() const { return use_lite_; }
/** \brief Control whether to debug IR graph analysis phase. /** \brief Control whether to debug IR graph analysis phase.
* *
* This will generate DOT files for visualizing the computation graph after * This will generate DOT files for visualizing the computation graph after
...@@ -350,6 +359,11 @@ struct AnalysisConfig { ...@@ -350,6 +359,11 @@ struct AnalysisConfig {
std::vector<std::string> anakin_passes_filter_; std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_; std::vector<std::string> anakin_ops_filter_;
bool use_lite_{false};
std::vector<std::string> lite_passes_filter_;
std::vector<std::string> lite_ops_filter_;
Precision lite_precision_mode_;
// mkldnn related. // mkldnn related.
int mkldnn_cache_capacity_{0}; int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
......
...@@ -105,6 +105,12 @@ const std::vector<std::string> kAnakinSubgraphPasses({ ...@@ -105,6 +105,12 @@ const std::vector<std::string> kAnakinSubgraphPasses({
"fc_gru_fuse_pass", // "fc_gru_fuse_pass", //
}); });
const std::vector<std::string> kLiteSubgraphPasses({
#ifdef PADDLE_WITH_LITE
"lite_subgraph_pass",
#endif
});
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({ passes_.assign({
// "identity_scale_op_clean_pass", // // "identity_scale_op_clean_pass", //
...@@ -123,7 +129,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -123,7 +129,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add2_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", // "conv_elementwise_add_fuse_pass", //
#endif // #endif //
"transpose_flatten_concat_fuse_pass", "transpose_flatten_concat_fuse_pass", //
// following pass should be located in the last, since it will // following pass should be located in the last, since it will
// work on all fused ops. // work on all fused ops.
"runtime_context_cache_pass" "runtime_context_cache_pass"
......
...@@ -163,5 +163,6 @@ class GpuPassStrategy : public PassStrategy { ...@@ -163,5 +163,6 @@ class GpuPassStrategy : public PassStrategy {
extern const std::vector<std::string> kTRTSubgraphPasses; extern const std::vector<std::string> kTRTSubgraphPasses;
extern const std::vector<std::string> kAnakinSubgraphPasses; extern const std::vector<std::string> kAnakinSubgraphPasses;
extern const std::vector<std::string> kLiteSubgraphPasses;
} // namespace paddle } // namespace paddle
cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto)
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost)
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_CUDA
#define LITE_WITH_CUDA 1
#endif
#include "paddle/fluid/inference/lite/engine.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
namespace paddle {
namespace inference {
namespace lite {
bool EngineManager::Empty() const { return engines_.size() == 0; }
bool EngineManager::Has(const std::string& name) const {
if (engines_.count(name) == 0) {
return false;
}
return engines_.at(name).get() != nullptr;
}
paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
return engines_.at(name).get();
}
paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
auto* p = new paddle::lite::Predictor();
#ifdef PADDLE_WITH_CUDA
paddle::lite::Env<TARGET(kCUDA)>::Init();
#endif
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
return p;
}
void EngineManager::DeleteAll() {
for (auto& item : engines_) {
item.second.reset(nullptr);
}
}
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "lite/api/cxx_api.h"
namespace paddle {
namespace inference {
namespace lite {
struct EngineConfig {
std::string model;
std::string param;
paddle::lite::Place prefer_place;
std::vector<paddle::lite::Place> valid_places;
std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true};
};
class EngineManager {
public:
bool Empty() const;
bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name,
const EngineConfig& cfg);
void DeleteAll();
private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>>
engines_;
};
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/lite/op_teller.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace inference {
namespace lite {
// Just tell by the op_types.
struct SimpleOpTeller : public Teller {
SimpleOpTeller() {
const std::map<std::string, std::string>& op2path =
OpKernelInfoCollector::Global().GetOp2PathDict();
auto is_non_inst = [](const std::string& op) -> bool {
const std::vector<std::string> ops = {"feed", "fetch", "while"};
return std::find(ops.begin(), ops.end(), op) != ops.end();
};
for (const auto& op : op2path) {
if (!is_non_inst(op.first)) {
ops_.insert(op.first);
}
}
}
bool operator()(const std::string& op_type,
const framework::OpDesc& op_desc) override {
return ops_.count(op_type);
}
private:
std::unordered_set<std::string> ops_{};
};
struct SingleBlockOpTeller : public Teller {
SingleBlockOpTeller() { ops_.insert("while"); }
bool operator()(const std::string& op_type,
const framework::OpDesc& op_desc) override {
if (ops_.count(op_type)) {
SimpleOpTeller supported;
const int id = op_desc.GetBlockAttrId("sub_block");
const framework::BlockDesc& block_desc =
op_desc.Block()->Program()->Block(id);
const std::vector<framework::OpDesc*>& ops_sub_block =
block_desc.AllOps();
for (auto* op : ops_sub_block) {
if (!supported(op->Type(), *op) && !this->operator()(op->Type(), *op)) {
return false;
}
}
return true;
}
return false;
}
private:
std::unordered_set<std::string> ops_;
};
bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) {
for (auto& teller : tellers_) {
if ((*teller)(op_type, desc)) return true;
}
return false;
}
OpTeller::OpTeller() {
tellers_.emplace_back(new SimpleOpTeller);
tellers_.emplace_back(new SingleBlockOpTeller);
}
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace paddle {
namespace inference {
namespace lite {
/*
* Single Op teller definition.
* One can override this and define a more complex tell logic, considerring more
* issues such as op_desc.
*/
struct Teller {
virtual bool operator()(const std::string& op_type,
const framework::OpDesc& desc) = 0;
virtual ~Teller() = default;
};
/*
* A real example:
*
* struct SomeTeller : public Teller {
* bool operator()(const std::string& op_type,
* const framework::OpDesc& desc) override {
* return op_type == "fc" && desc.Inputs().size() == 2;
* }
*};
*/
/*
* class OpTeller helps to tell whether a fluid
* operator can be transformed to a TensorRT layer.
*/
class OpTeller {
public:
static OpTeller& Global() {
static std::unique_ptr<OpTeller> x(new OpTeller);
return *x;
}
bool Tell(const std::string& op_type, const framework::OpDesc& desc);
private:
OpTeller();
private:
std::vector<std::unique_ptr<Teller>> tellers_;
};
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <map>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/lite/engine.h"
namespace paddle {
namespace inference {
namespace lite {
namespace utils {
using paddle::lite_api::TargetType;
using paddle::lite_api::PrecisionType;
using paddle::lite_api::DataLayoutType;
template <typename DstLoD, typename SrcLoD>
void SetLoD(DstLoD* dst, const SrcLoD& src) {
dst->reserve(src.size());
dst->clear();
for (auto&& v : src) {
dst->emplace_back(v);
}
}
template void SetLoD<paddle::lite::LoD, framework::LoD>(
paddle::lite::LoD* dst, const framework::LoD& src);
template void SetLoD<framework::LoD, paddle::lite::LoD>(
framework::LoD* dst, const paddle::lite::LoD& src);
platform::Place GetNativePlace(const TargetType& type, int id = 0) {
switch (type) {
case TargetType::kHost:
case TargetType::kX86:
return platform::CPUPlace();
case TargetType::kCUDA:
return platform::CUDAPlace(id);
default:
LOG(FATAL) << "Error target type.";
return platform::Place();
}
}
TargetType GetLiteTargetType(const platform::Place& place) {
if (platform::is_cpu_place(place)) {
return TargetType::kHost;
}
return TargetType::kCUDA;
}
PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type) {
switch (type) {
case framework::proto::VarType_Type_FP32:
return PrecisionType::kFloat;
case framework::proto::VarType_Type_INT8:
return PrecisionType::kInt8;
case framework::proto::VarType_Type_INT32:
return PrecisionType::kInt32;
case framework::proto::VarType_Type_INT64:
return PrecisionType::kInt64;
default:
LOG(FATAL) << "Error precision type.";
return PrecisionType::kUnk;
}
}
framework::proto::VarType::Type GetNativePrecisionType(
const PrecisionType& type) {
switch (type) {
case PrecisionType::kFloat:
return framework::proto::VarType_Type_FP32;
case PrecisionType::kInt8:
return framework::proto::VarType_Type_INT8;
case PrecisionType::kInt32:
return framework::proto::VarType_Type_INT32;
case PrecisionType::kInt64:
return framework::proto::VarType_Type_INT64;
default:
LOG(FATAL) << "Error precision type.";
return static_cast<framework::proto::VarType::Type>(-1);
}
}
framework::DataLayout GetNativeLayoutType(const DataLayoutType& type) {
switch (type) {
case DataLayoutType::kNCHW:
return framework::DataLayout::kNCHW;
default:
LOG(FATAL) << "Error layout type.";
return static_cast<framework::DataLayout>(-1);
}
}
void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
const platform::Place& src_place, const void* src_data,
const size_t size, const platform::DeviceContext& ctx) {
const platform::CPUPlace cpu_place;
if (platform::is_cpu_place(dst_place) && platform::is_cpu_place(src_place)) {
memory::Copy(cpu_place, dst_data, cpu_place, src_data, size);
} else {
#ifdef PADDLE_WITH_CUDA
if (platform::is_cpu_place(dst_place) &&
platform::is_gpu_place(src_place)) {
LOG(FATAL) << "lite::MemoryCopy GPU->CPU is not yet implemented.";
} else if (platform::is_gpu_place(dst_place) &&
platform::is_cpu_place(src_place)) {
LOG(FATAL) << "lite::MemoryCopy CPU->GPU is not yet implemented.";
} else if (platform::is_gpu_place(dst_place) &&
platform::is_gpu_place(src_place)) {
auto gpu_place = boost::get<platform::CUDAPlace>(src_place);
memory::Copy(
gpu_place, dst_data, gpu_place, src_data, size,
static_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#else
LOG(FATAL) << "You must define PADDLE_WITH_CUDA for using CUDAPlace.";
#endif
}
}
void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
// Currently, Lite needs to explicitly specify the target type of
// the input tensor.
constexpr int empty_size = 0;
dst->mutable_data(GetLiteTargetType(src.place()), empty_size);
dst->set_precision(GetLitePrecisionType(src.type()));
SetLoD(dst->mutable_lod(), src.lod());
}
void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32;
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
dtype);
SetLoD(dst->mutable_lod(), src.lod());
}
template <>
void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
const platform::DeviceContext& ctx) {
InitDstTensor(dst, src);
const platform::Place& src_place = src.place();
const platform::Place& dst_place = GetNativePlace(dst->target());
const size_t bytes =
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(bytes);
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
}
template <>
void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src,
const platform::DeviceContext& ctx) {
InitDstTensor(dst, src);
const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place();
dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize()));
const size_t bytes =
static_cast<size_t>(src.numel()) * framework::SizeOfType(dst->type());
const void* src_data = src.raw_data();
// When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type());
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
}
} // namespace utils
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/api/paddle_place.h"
#include "lite/core/tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
namespace paddle {
namespace inference {
namespace lite {
namespace utils {
template <typename DstTensor, typename SrcTensor>
void TensorCopyAsync(DstTensor* dst, const SrcTensor& src,
const platform::DeviceContext& ctx);
} // namespace utils
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/lite/ut_helper.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace inference {
namespace lite {
using inference::lite::AddTensorToBlockDesc;
using inference::lite::CreateTensor;
using inference::lite::serialize_params;
void make_fake_model(std::string* model, std::string* param) {
framework::ProgramDesc program;
LOG(INFO) << "program.block size is " << program.Size();
auto* block_ = program.Proto()->mutable_blocks(0);
LOG(INFO) << "create block desc";
framework::BlockDesc block_desc(&program, block_);
auto* feed0 = block_desc.AppendOp();
feed0->SetType("feed");
feed0->SetInput("X", {"feed"});
feed0->SetOutput("Out", {"x"});
feed0->SetAttr("col", 0);
auto* feed1 = block_desc.AppendOp();
feed1->SetType("feed");
feed1->SetInput("X", {"feed"});
feed1->SetOutput("Out", {"y"});
feed1->SetAttr("col", 1);
LOG(INFO) << "create elementwise_add op";
auto* elt_add = block_desc.AppendOp();
elt_add->SetType("elementwise_add");
elt_add->SetInput("X", std::vector<std::string>({"x"}));
elt_add->SetInput("Y", std::vector<std::string>({"y"}));
elt_add->SetOutput("Out", std::vector<std::string>({"z"}));
elt_add->SetAttr("axis", -1);
LOG(INFO) << "create fetch op";
auto* fetch = block_desc.AppendOp();
fetch->SetType("fetch");
fetch->SetInput("X", std::vector<std::string>({"z"}));
fetch->SetOutput("Out", std::vector<std::string>({"out"}));
fetch->SetAttr("col", 0);
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4}), true);
AddTensorToBlockDesc(block_, "y", std::vector<int64_t>({2, 4}), true);
AddTensorToBlockDesc(block_, "z", std::vector<int64_t>({2, 4}), false);
AddTensorToBlockDesc(block_, "out", std::vector<int64_t>({2, 4}), false);
*block_->add_ops() = *feed0->Proto();
*block_->add_ops() = *feed1->Proto();
*block_->add_ops() = *elt_add->Proto();
*block_->add_ops() = *fetch->Proto();
framework::Scope scope;
#ifdef PADDLE_WITH_CUDA
platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place);
#else
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
#endif
// Prepare variables.
std::vector<std::string> repetitive_params{"x", "y"};
CreateTensor(&scope, "x", std::vector<int64_t>({2, 4}));
CreateTensor(&scope, "y", std::vector<int64_t>({2, 4}));
ASSERT_EQ(block_->ops_size(), 4);
*model = program.Proto()->SerializeAsString();
serialize_params(param, &scope, repetitive_params);
}
TEST(EngineManager, engine) {
ASSERT_EQ(
inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
true);
inference::lite::EngineConfig config;
make_fake_model(&(config.model), &(config.param));
LOG(INFO) << "prepare config";
const std::string unique_key("engine_0");
config.model_from_memory = true;
config.valid_places = {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
};
LOG(INFO) << "Create EngineManager";
inference::Singleton<inference::lite::EngineManager>::Global().Create(
unique_key, config);
LOG(INFO) << "Create EngineManager done";
ASSERT_EQ(
inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
false);
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key),
true);
paddle::lite::Predictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key);
CHECK_NOTNULL(engine_0);
inference::Singleton<inference::lite::EngineManager>::Global().DeleteAll();
CHECK(inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key) == nullptr)
<< "the engine_0 should be nullptr";
}
} // namespace lite
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
namespace paddle {
namespace inference {
namespace lite {
namespace utils {
using paddle::lite_api::TargetType;
using paddle::lite_api::PrecisionType;
using paddle::lite_api::DataLayoutType;
TEST(LiteEngineOp, GetNativePlace) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
platform::Place GetNativePlace(const TargetType& type, int id = 0);
EXPECT_TRUE(platform::is_cpu_place(GetNativePlace(TargetType::kHost)));
EXPECT_TRUE(platform::is_gpu_place(GetNativePlace(TargetType::kCUDA)));
ASSERT_DEATH(GetNativePlace(TargetType::kUnk), "");
}
TEST(LiteEngineOp, GetLiteTargetType) {
TargetType GetLiteTargetType(const platform::Place& place);
ASSERT_EQ(GetLiteTargetType(platform::CPUPlace()), TargetType::kHost);
ASSERT_EQ(GetLiteTargetType(platform::CUDAPlace(0)), TargetType::kCUDA);
}
TEST(LiteEngineOp, GetLitePrecisionType) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type);
ASSERT_EQ(GetLitePrecisionType(framework::proto::VarType_Type_FP32),
PrecisionType::kFloat);
ASSERT_EQ(GetLitePrecisionType(framework::proto::VarType_Type_INT8),
PrecisionType::kInt8);
ASSERT_EQ(GetLitePrecisionType(framework::proto::VarType_Type_INT32),
PrecisionType::kInt32);
ASSERT_DEATH(
GetLitePrecisionType(framework::proto::VarType_Type_SELECTED_ROWS), "");
}
TEST(LiteEngineOp, GetNativePrecisionType) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
framework::proto::VarType::Type GetNativePrecisionType(
const PrecisionType& type);
ASSERT_EQ(GetNativePrecisionType(PrecisionType::kFloat),
framework::proto::VarType_Type_FP32);
ASSERT_EQ(GetNativePrecisionType(PrecisionType::kInt8),
framework::proto::VarType_Type_INT8);
ASSERT_EQ(GetNativePrecisionType(PrecisionType::kInt32),
framework::proto::VarType_Type_INT32);
ASSERT_DEATH(GetNativePrecisionType(PrecisionType::kUnk), "");
}
TEST(LiteEngineOp, GetNativeLayoutType) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
framework::DataLayout GetNativeLayoutType(const DataLayoutType& type);
ASSERT_EQ(GetNativeLayoutType(DataLayoutType::kNCHW),
framework::DataLayout::kNCHW);
ASSERT_DEATH(GetNativeLayoutType(DataLayoutType::kNHWC), "");
}
void test_tensor_copy(const platform::DeviceContext& ctx) {
// Create LoDTensor.
std::vector<float> vector({1, 2, 3, 4});
framework::LoDTensor lod_tensor;
framework::TensorFromVector(vector, &lod_tensor);
framework::LoD lod({{0, 2, 4}});
lod_tensor.Resize({4, 1});
lod_tensor.set_lod(lod);
// Create lite::Tensor and copy.
paddle::lite::Tensor lite_tensor;
TensorCopyAsync(&lite_tensor, lod_tensor, ctx);
// Copy to LoDTensor.
framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx);
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
platform::GpuStreamSync(
static_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#endif
std::vector<float> result;
TensorToVector(lod_tensor_n, &result);
ASSERT_EQ(result, vector);
ASSERT_EQ(lod_tensor_n.lod(), lod_tensor.lod());
}
TEST(LiteEngineOp, TensorCopyAsync) {
auto* ctx_cpu =
platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
test_tensor_copy(*ctx_cpu);
#ifdef PADDLE_WITH_CUDA
auto* ctx_gpu =
platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0));
test_tensor_copy(*ctx_gpu);
#endif
}
} // namespace utils
} // namespace lite
} // namespace inference
} // namespace paddle
...@@ -352,7 +352,6 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -352,7 +352,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant_small_model") set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant_small_model")
if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR}) if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR})
inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "quant_small_model.tar.gz") inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "quant_small_model.tar.gz")
...@@ -362,6 +361,12 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -362,6 +361,12 @@ if(WITH_GPU AND TENSORRT_FOUND)
ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR})
endif() endif()
set(LITE_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lite")
download_data(${LITE_MODEL_INSTALL_DIR} "mul_model_fp32.tgz")
inference_analysis_test(lite_mul_model_test SRCS lite_mul_model_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${LITE_MODEL_INSTALL_DIR})
inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${RESNET50_MODEL_DIR}/model) ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cmath>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle {
namespace inference {
TEST(AnalysisPredictor, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "mul_model";
AnalysisConfig config;
config.EnableUseGpu(100, 0);
config.SetModel(model_dir);
config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32);
std::vector<PaddleTensor> inputs;
auto predictor = CreatePaddlePredictor(config);
std::vector<float> input({1});
PaddleTensor in;
in.shape = {1, 1};
in.data = PaddleBuf(static_cast<void*>(input.data()), 1 * sizeof(float));
in.dtype = PaddleDType::FLOAT32;
inputs.emplace_back(in);
std::vector<PaddleTensor> outputs;
ASSERT_TRUE(predictor->Run(inputs, &outputs));
const std::vector<float> truth_values = {
-0.00621776, -0.00620937, 0.00990623, -0.0039817, -0.00074315,
0.61229795, -0.00491806, -0.00068755, 0.18409646, 0.30090684};
const size_t expected_size = 1;
EXPECT_EQ(outputs.size(), expected_size);
float* data_o = static_cast<float*>(outputs[0].data.data());
for (size_t j = 0; j < outputs[0].data.length() / sizeof(float); ++j) {
EXPECT_LT(std::abs(data_o[j] - truth_values[j]), 10e-6);
}
}
} // namespace inference
} // namespace paddle
...@@ -38,7 +38,12 @@ if (ANAKIN_SUBGRAPH) ...@@ -38,7 +38,12 @@ if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin) add_subdirectory(anakin)
endif() endif()
if (WITH_LITE)
add_subdirectory(lite)
endif()
SET(OP_HEADER_DEPS xxhash executor) SET(OP_HEADER_DEPS xxhash executor)
if (WITH_GPU) if (WITH_GPU)
SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub)
endif() endif()
......
op_library(lite_engine_op DEPS lite_engine lite_tensor_utils)
cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis)
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lite/lite_engine_op.h"
#include <string>
#include <vector>
namespace paddle {
namespace operators {
class LiteEngineOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Xs", "A list of inputs.").AsDuplicable();
AddOutput("Ys", "A list of outputs.").AsDuplicable();
AddAttr<std::string>(
"engine_key",
"The engine_key here is used to distinguish different Lite Engines");
AddComment("Lite engine operator.");
}
};
class LiteInferVarType : public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext *ctx) const override {}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(lite_engine, ops::LiteEngineOp, ops::LiteEngineOpMaker);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace paddle {
namespace operators {
class LiteEngineOp : public framework::OperatorBase {
private:
std::vector<std::string> in_names_;
std::vector<std::string> out_names_;
paddle::lite::Predictor *engine_;
framework::proto::VarType::Type precision_;
bool use_gpu_;
public:
LiteEngineOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {
in_names_ = Inputs("Xs");
out_names_ = Outputs("Ys");
engine_ =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
Attr<std::string>("engine_key"));
if (Attr<bool>("enable_int8")) {
precision_ = framework::proto::VarType_Type_INT8;
} else {
precision_ = framework::proto::VarType_Type_FP32;
}
use_gpu_ = Attr<bool>("use_gpu");
}
protected:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
Execute(scope, dev_place);
}
void Execute(const framework::Scope &scope,
const platform::Place &dev_place) const {
const platform::DeviceContext *ctx =
platform::DeviceContextPool::Instance().Get(dev_place);
for (size_t i = 0; i < in_names_.size(); i++) {
const framework::LoDTensor &src_t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope,
in_names_[i]);
paddle::lite::Tensor *dst_t = engine_->GetInput(i);
VLOG(3) << "fluid -> lite: " << in_names_[i];
inference::lite::utils::TensorCopyAsync(dst_t, src_t, *ctx);
}
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(dev_place)) {
platform::GpuStreamSync(
static_cast<const platform::CUDADeviceContext *>(ctx)->stream());
}
#endif
VLOG(3) << "lite engine run";
engine_->Run();
VLOG(3) << "lite engine run done";
for (size_t i = 0; i < out_names_.size(); i++) {
const paddle::lite::Tensor &src_t = *(engine_->GetOutput(i));
framework::LoDTensor *dst_t =
&inference::analysis::GetFromScope<framework::LoDTensor>(
scope, out_names_[i]);
VLOG(3) << "lite -> fluid: " << out_names_[i];
inference::lite::utils::TensorCopyAsync(dst_t, src_t, *ctx);
}
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(dev_place)) {
platform::GpuStreamSync(
static_cast<const platform::CUDADeviceContext *>(ctx)->stream());
}
#endif
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/lite/lite_engine_op.h"
#include "paddle/fluid/operators/lite/ut_helper.h"
USE_NO_KERNEL_OP(lite_engine)
using paddle::inference::lite::AddTensorToBlockDesc;
using paddle::inference::lite::CreateTensor;
using paddle::inference::lite::serialize_params;
namespace paddle {
namespace operators {
TEST(LiteEngineOp, engine_op) {
framework::ProgramDesc program;
auto* block_ = program.Proto()->mutable_blocks(0);
framework::BlockDesc block_desc(&program, block_);
auto* feed0 = block_desc.AppendOp();
feed0->SetType("feed");
feed0->SetInput("X", {"feed"});
feed0->SetOutput("Out", {"x"});
feed0->SetAttr("col", 0);
auto* feed1 = block_desc.AppendOp();
feed1->SetType("feed");
feed1->SetInput("X", {"feed"});
feed1->SetOutput("Out", {"y"});
feed1->SetAttr("col", 1);
LOG(INFO) << "create elementwise_add op";
auto* elt_add = block_desc.AppendOp();
elt_add->SetType("elementwise_add");
elt_add->SetInput("X", std::vector<std::string>({"x"}));
elt_add->SetInput("Y", std::vector<std::string>({"y"}));
elt_add->SetOutput("Out", std::vector<std::string>({"z"}));
elt_add->SetAttr("axis", -1);
LOG(INFO) << "create fetch op";
auto* fetch = block_desc.AppendOp();
fetch->SetType("fetch");
fetch->SetInput("X", std::vector<std::string>({"z"}));
fetch->SetOutput("Out", std::vector<std::string>({"out"}));
fetch->SetAttr("col", 0);
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4}), true);
AddTensorToBlockDesc(block_, "y", std::vector<int64_t>({2, 4}), true);
AddTensorToBlockDesc(block_, "z", std::vector<int64_t>({2, 4}), false);
AddTensorToBlockDesc(block_, "out", std::vector<int64_t>({2, 4}), false);
*block_->add_ops() = *feed1->Proto();
*block_->add_ops() = *feed0->Proto();
*block_->add_ops() = *elt_add->Proto();
*block_->add_ops() = *fetch->Proto();
framework::Scope scope;
#ifdef PADDLE_WITH_CUDA
platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place);
#else
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
#endif
// Prepare variables.
CreateTensor(&scope, "x", std::vector<int64_t>({2, 4}), false);
CreateTensor(&scope, "y", std::vector<int64_t>({2, 4}), false);
CreateTensor(&scope, "out", std::vector<int64_t>({2, 4}), false);
ASSERT_EQ(block_->ops_size(), 4);
std::vector<std::string> repetitive_params{"x", "y"};
inference::lite::EngineConfig config;
config.valid_places = {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
};
serialize_params(&(config.param), &scope, repetitive_params);
config.model = program.Proto()->SerializeAsString();
LOG(INFO) << "create lite_engine desc";
framework::OpDesc engine_op_desc(nullptr);
engine_op_desc.SetType("lite_engine");
engine_op_desc.SetInput("Xs", std::vector<std::string>({"x", "y"}));
engine_op_desc.SetOutput("Ys", std::vector<std::string>({"out"}));
std::string engine_key = "engine_0";
engine_op_desc.SetAttr("engine_key", engine_key);
engine_op_desc.SetAttr("enable_int8", false);
engine_op_desc.SetAttr("use_gpu", true);
engine_op_desc.SetBlockAttr("sub_block", &block_desc);
inference::Singleton<inference::lite::EngineManager>::Global().Create(
engine_key, config);
LOG(INFO) << "create engine op";
auto engine_op = framework::OpRegistry::CreateOp(engine_op_desc);
LOG(INFO) << "engine_op " << engine_op.get();
// Execute them.
LOG(INFO) << "engine_op run";
engine_op->Run(scope, place);
LOG(INFO) << "done";
}
} // namespace operators
} // namespace paddle
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace inference {
namespace lite {
void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
const std::string& name,
const std::vector<int64_t>& shape,
bool persistable = false) {
using framework::proto::VarType;
auto* var = block->add_vars();
framework::VarDesc desc(name);
desc.SetType(VarType::LOD_TENSOR);
desc.SetDataType(VarType::FP32);
desc.SetShape(shape);
desc.SetPersistable(persistable);
*var = *desc.Proto();
}
void serialize_params(std::string* str, framework::Scope* scope,
const std::vector<std::string>& params) {
std::ostringstream os;
#ifdef PADDLE_WITH_CUDA
platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place);
#else
platform::CPUDeviceContext ctx;
#endif
for (const auto& param : params) {
PADDLE_ENFORCE_NOT_NULL(
scope->FindVar(param),
platform::errors::NotFound("Block should already have a '%s' variable",
param));
auto* tensor = scope->FindVar(param)->GetMutable<framework::LoDTensor>();
framework::SerializeToStream(os, *tensor, ctx);
}
*str = os.str();
}
/*
* Get a random float value between [low, high]
*/
float random(float low, float high) {
// static std::random_device rd;
static std::mt19937 mt(100);
std::uniform_real_distribution<double> dist(low, high);
return dist(mt);
}
void RandomizeTensor(framework::LoDTensor* tensor,
const platform::Place& place) {
auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0,
platform::errors::InvalidArgument(
"The input tensor dimension of the randomized tensor "
"function should be greater than zero."));
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
temp_tensor.Resize(dims);
auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
for (size_t i = 0; i < num_elements; i++) {
*(temp_data + i) = random(0., 1.);
}
TensorCopySync(temp_tensor, place, tensor);
}
void CreateTensor(framework::Scope* scope, const std::string& name,
const std::vector<int64_t>& shape, bool in_cuda = true) {
auto* var = scope->Var(name);
auto* tensor = var->GetMutable<framework::LoDTensor>();
auto dims = framework::make_ddim(shape);
tensor->Resize(dims);
platform::Place place;
if (in_cuda) {
#ifdef PADDLE_WITH_CUDA
place = platform::CUDAPlace(0);
#else
LOG(FATAL) << "You must define PADDLE_WITH_CUDA for using CUDAPlace.";
#endif
} else {
place = platform::CPUPlace();
}
RandomizeTensor(tensor, place);
}
} // namespace lite
} // namespace inference
} // namespace paddle
...@@ -349,6 +349,16 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) { ...@@ -349,6 +349,16 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
error_code, CudaErrorWebsite()); error_code, CudaErrorWebsite());
} }
void GpuStreamSync(cudaStream_t stream) {
auto error_code = cudaStreamSynchronize(stream);
PADDLE_ENFORCE_CUDA_SUCCESS(
error_code,
platform::errors::External(
"cudaStreamSynchronize failed in paddle::platform::GpuStreamSync "
"error code : %d, %s",
error_code, CudaErrorWebsite()));
}
void RaiseNonOutOfMemoryError(cudaError_t *status) { void RaiseNonOutOfMemoryError(cudaError_t *status) {
if (*status == cudaErrorMemoryAllocation) { if (*status == cudaErrorMemoryAllocation) {
*status = cudaSuccess; *status = cudaSuccess;
...@@ -363,5 +373,6 @@ void RaiseNonOutOfMemoryError(cudaError_t *status) { ...@@ -363,5 +373,6 @@ void RaiseNonOutOfMemoryError(cudaError_t *status) {
PADDLE_ENFORCE_CUDA_SUCCESS(*status); PADDLE_ENFORCE_CUDA_SUCCESS(*status);
} }
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -101,6 +101,9 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src, ...@@ -101,6 +101,9 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
//! Set memory dst with value count size asynchronously //! Set memory dst with value count size asynchronously
void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream); void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream);
//! Blocks until stream has completed all operations.
void GpuStreamSync(cudaStream_t stream);
//! Raise error if status is not cudaSuccess or OOM, otherwise reset status. //! Raise error if status is not cudaSuccess or OOM, otherwise reset status.
void RaiseNonOutOfMemoryError(cudaError_t *status); void RaiseNonOutOfMemoryError(cudaError_t *status);
......
...@@ -203,6 +203,7 @@ function cmake_base() { ...@@ -203,6 +203,7 @@ function cmake_base() {
-DPY_VERSION=${PY_VERSION:-2.7} -DPY_VERSION=${PY_VERSION:-2.7}
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build}
-DWITH_GRPC=${grpc_flag} -DWITH_GRPC=${grpc_flag}
-DWITH_LITE=${WITH_LITE:-OFF}
======================================== ========================================
EOF EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because # Disable UNITTEST_USE_VIRTUALENV in docker because
...@@ -234,7 +235,8 @@ EOF ...@@ -234,7 +235,8 @@ EOF
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \ -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
-DPY_VERSION=${PY_VERSION:-2.7} \ -DPY_VERSION=${PY_VERSION:-2.7} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} \
-DWITH_GRPC=${grpc_flag} -DWITH_GRPC=${grpc_flag} \
-DWITH_LITE=${WITH_LITE:-OFF}
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册