未验证 提交 0a42986c 编写于 作者: W Wilber 提交者: GitHub

Cherry-pick. (#28454)

上级 78d68d59
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
if(NOT LINUX OR NOT WITH_MKL) if(NOT LINUX)
message("Paddle-lite will not build because the required Linux and MKL do not exist.") message("Paddle-lite will not build because the required Linux do not exist.")
set(WITH_LITE OFF) set(WITH_LITE OFF)
return() return()
endif() endif()
...@@ -22,9 +22,11 @@ if(XPU_SDK_ROOT) ...@@ -22,9 +22,11 @@ if(XPU_SDK_ROOT)
set(LITE_WITH_XPU ON) set(LITE_WITH_XPU ON)
include_directories("${XPU_SDK_ROOT}/XTDK/include") include_directories("${XPU_SDK_ROOT}/XTDK/include")
include_directories("${XPU_SDK_ROOT}/XTCL/include") include_directories("${XPU_SDK_ROOT}/XTCL/include")
add_definitions(-DPADDLE_WITH_XPU) add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/") LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/") LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
set(XPURT_LIB ${XPU_SDK_ROOT}/XTDK/runtime/shlib/libxpurt.so)
set(XPUAPI_LIB ${XPU_SDK_ROOT}/XTDK/shlib/libxpuapi.so)
endif() endif()
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
...@@ -42,30 +44,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -42,30 +44,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
endif() endif()
# No quotes, so cmake can resolve it as a command with arguments. # No quotes, so cmake can resolve it as a command with arguments.
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j) if(WITH_ARM)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
-DLITE_WITH_CUDA=${WITH_GPU} message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
-DWITH_MKLDNN=OFF set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF
-DLITE_WITH_X86=ON -DLITE_WITH_CUDA=OFF
-DLITE_WITH_PROFILE=OFF -DWITH_MKLDNN=OFF
-DWITH_LITE=OFF -DLITE_WITH_X86=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON
-DWITH_PYTHON=OFF -DLITE_WITH_PROFILE=OFF
-DWITH_TESTING=OFF -DARM_TARGET_OS=armlinux
-DLITE_BUILD_EXTRA=ON -DWITH_LITE=ON
-DCUDNN_ROOT=${CUDNN_ROOT} -DWITH_PYTHON=OFF
-DLITE_WITH_STATIC_CUDA=OFF -DWITH_TESTING=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME} -DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=${LITE_WITH_XPU} -DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT} -DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=OFF) -DLITE_WITH_ARM=ON)
ExternalProject_Add(
ExternalProject_Add(
${LITE_PROJECT} ${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git" GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG ${LITE_GIT_TAG} GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR} PREFIX ${LITE_SOURCES_DIR}
PATCH_COMMAND mkdir -p ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code && touch ${LITE_SOURCES_DIR}/src/extern_lite-build/lite/gen_code/__generated_code__.cc
UPDATE_COMMAND "" UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND} BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND "" INSTALL_COMMAND ""
...@@ -81,7 +83,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -81,7 +83,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS} ${LITE_OPTIONAL_ARGS}
) )
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
else()
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=${WITH_GPU}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=${CUDNN_ROOT}
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
-DLITE_WITH_ARM=OFF)
ExternalProject_Add(
${LITE_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS}
)
endif()
ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR) ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR)
ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR) ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR)
set(LITE_BINARY_DIR ${BINARY_DIR}) set(LITE_BINARY_DIR ${BINARY_DIR})
...@@ -103,8 +149,8 @@ function(external_lite_libs alias path) ...@@ -103,8 +149,8 @@ function(external_lite_libs alias path)
endif() endif()
endfunction() endfunction()
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so) external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/inference_lite_lib/cxx/lib/libpaddle_full_api_shared.so) set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
add_definitions(-DPADDLE_WITH_LITE) add_definitions(-DPADDLE_WITH_LITE)
add_definitions(-DLITE_WITH_LOG) add_definitions(-DLITE_WITH_LOG)
...@@ -125,7 +125,7 @@ function(copy_part_of_thrid_party TARGET DST) ...@@ -125,7 +125,7 @@ function(copy_part_of_thrid_party TARGET DST)
if (LITE_BINARY_DIR) if (LITE_BINARY_DIR)
set(dst_dir "${DST}/third_party/install/lite") set(dst_dir "${DST}/third_party/install/lite")
copy(${TARGET} copy(${TARGET}
SRCS ${LITE_BINARY_DIR}/inference_lite_lib/* SRCS ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/*
DSTS ${dst_dir}) DSTS ${dst_dir})
endif() endif()
endfunction() endfunction()
......
...@@ -219,6 +219,10 @@ struct Argument { ...@@ -219,6 +219,10 @@ struct Argument {
DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t); DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
int);
private: private:
std::unordered_set<std::string> valid_fields_; std::unordered_set<std::string> valid_fields_;
}; };
......
...@@ -151,6 +151,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -151,6 +151,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu())); pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size", pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size())); new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
} }
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") { if (pass_name == "fc_fuse_pass") {
......
...@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool enable_int8 = Get<bool>("enable_int8"); bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu"); bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size"); int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
lite_api::TargetType target_type; lite_api::TargetType target_type;
if (use_gpu) { if (use_gpu) {
...@@ -251,7 +252,11 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -251,7 +252,11 @@ void LiteSubgraphPass::SetUpEngine(
} else if (use_xpu) { } else if (use_xpu) {
target_type = TARGET(kXPU); target_type = TARGET(kXPU);
} else { } else {
#ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM);
#else
target_type = TARGET(kX86); target_type = TARGET(kX86);
#endif
} }
paddle::lite_api::PrecisionType precision_type = paddle::lite_api::PrecisionType precision_type =
...@@ -263,11 +268,12 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -263,11 +268,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the // Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects // input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible. // whether tensor sharing is feasible.
paddle::lite::Place({target_type, precision_type}), paddle::lite_api::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kInt64)}), paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite::Place({target_type, PRECISION(kFloat)}), paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
}; };
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size; config.xpu_l3_workspace_size = xpu_l3_workspace_size;
if (dump_model) { if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./model.bin", config.model);
......
...@@ -356,7 +356,7 @@ void AnalysisConfig::Update() { ...@@ -356,7 +356,7 @@ void AnalysisConfig::Update() {
} }
if (use_xpu_) { if (use_xpu_) {
#ifndef PADDLE_WITH_XPU #ifndef LITE_SUBGRAPH_WITH_XPU
PADDLE_THROW(platform::errors::Unavailable( PADDLE_THROW(platform::errors::Unavailable(
"You tried to use an XPU device, but Paddle was not compiled " "You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime.")); "with XPU-runtime."));
......
...@@ -232,8 +232,17 @@ bool AnalysisPredictor::PrepareExecutor() { ...@@ -232,8 +232,17 @@ bool AnalysisPredictor::PrepareExecutor() {
void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) { void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id=" std::vector<std::vector<int>> inputs_shape;
<< platform::get_cur_mkldnn_session_id(); for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(inputs[i].shape);
}
MkldnnPreSet(inputs_shape);
#endif
}
void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode. // In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) { if (config_.mkldnn_cache_capacity_ > 0) {
VLOG(2) << "In mkldnn cache clear mode."; VLOG(2) << "In mkldnn cache clear mode.";
...@@ -243,9 +252,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) { ...@@ -243,9 +252,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_.mkldnn_cache_capacity_); config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape. // Set current_input_shape for caching dynamic shape.
std::stringstream ss; std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) { for (size_t i = 0; i < inputs_shape.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) { for (size_t j = 0; j < inputs_shape[i].size(); ++j) {
ss << inputs[i].shape[j] << "-"; ss << inputs_shape[i][j] << "-";
} }
} }
VLOG(2) << "Set input shape=" << ss.str(); VLOG(2) << "Set input shape=" << ss.str();
...@@ -445,6 +454,8 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -445,6 +454,8 @@ void AnalysisPredictor::PrepareArgument() {
} }
if (config_.lite_engine_enabled()) { if (config_.lite_engine_enabled()) {
argument_.SetCpuMathLibraryNumThreads(
config_.cpu_math_library_num_threads());
argument_.SetLitePrecisionMode(config_.lite_precision_mode_); argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_); argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_); argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
...@@ -656,6 +667,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor( ...@@ -656,6 +667,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool AnalysisPredictor::ZeroCopyRun() { bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) {
std::vector<std::vector<int>> shape_vector;
auto names = GetInputNames();
for (size_t i = 0; i < names.size(); ++i) {
auto in_tensor = GetInputTensor(names[i]);
shape_vector.emplace_back(in_tensor->shape());
}
MkldnnPreSet(shape_vector);
}
#endif
executor_->Run(); executor_->Run();
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
...@@ -664,6 +687,9 @@ bool AnalysisPredictor::ZeroCopyRun() { ...@@ -664,6 +687,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread // recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service. // conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1); paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML) && defined(_LINUX) #if defined(PADDLE_WITH_MKLML) && defined(_LINUX)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
......
...@@ -311,6 +311,17 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -311,6 +311,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors /// \param[in] inputs tensors
/// ///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs); void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void MkldnnPreSet(const std::vector<std::vector<int>> &inputs_shape);
/// ///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input. /// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
/// ///
......
...@@ -4,6 +4,6 @@ endif() ...@@ -4,6 +4,6 @@ endif()
cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash) cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS}) cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context) cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS})
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis) cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils) cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)
...@@ -16,12 +16,16 @@ ...@@ -16,12 +16,16 @@
#define LITE_WITH_CUDA 1 #define LITE_WITH_CUDA 1
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef LITE_SUBGRAPH_WITH_XPU
#define LITE_WITH_XPU 1 #define LITE_WITH_XPU 1
#endif #endif
#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif
#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/lite/engine.h"
#include "lite/api/paddle_use_passes.h" #include <utility>
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const { ...@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return engines_.at(name).get() != nullptr; return engines_.at(name).get() != nullptr;
} }
paddle::lite::Predictor* EngineManager::Get(const std::string& name) const { paddle::lite_api::PaddlePredictor* EngineManager::Get(
const std::string& name) const {
return engines_.at(name).get(); return engines_.at(name).get();
} }
paddle::lite::Predictor* EngineManager::Create(const std::string& name, paddle::lite_api::PaddlePredictor* EngineManager::Create(
const EngineConfig& cfg) { const std::string& name, const EngineConfig& cfg) {
if (cfg.valid_places.front().target == TARGET(kCUDA)) { // config info for predictor.
#ifdef PADDLE_WITH_CUDA paddle::lite_api::CxxConfig lite_cxx_config;
paddle::lite::Env<TARGET(kCUDA)>::Init(); lite_cxx_config.set_model_buffer(cfg.model.c_str(), cfg.model.size(),
cfg.param.c_str(), cfg.param.size());
lite_cxx_config.set_valid_places(cfg.valid_places);
#ifdef PADDLE_WITH_ARM
set_threads.set_threads(cfg.cpu_math_library_num_threads);
#else
lite_cxx_config.set_x86_math_library_num_threads(
cfg.cpu_math_library_num_threads);
#endif #endif
} else if (cfg.valid_places.front().target == TARGET(kXPU)) {
#ifdef PADDLE_WITH_XPU #ifdef LITE_SUBGRAPH_WITH_XPU
paddle::lite::TargetWrapper<TARGET(kXPU)>::workspace_l3_size_per_thread = lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size; cfg.xpu_l3_workspace_size);
#endif #endif
}
auto* p = new paddle::lite::Predictor(); // create predictor
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes, std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
cfg.model_type, cfg.model_from_memory); paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
engines_[name].reset(p); engines_[name] = std::move(p);
return p; return engines_[name].get();
} }
void EngineManager::DeleteAll() { void EngineManager::DeleteAll() {
for (auto& item : engines_) { for (auto& item : engines_) {
item.second.reset(nullptr); item.second.reset();
} }
} }
......
...@@ -23,12 +23,9 @@ ...@@ -23,12 +23,9 @@
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall" #pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h" #include "lite/api/paddle_place.h"
#include "lite/core/context.h" #include "lite/api/paddle_use_passes.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
namespace paddle { namespace paddle {
...@@ -38,25 +35,33 @@ namespace lite { ...@@ -38,25 +35,33 @@ namespace lite {
struct EngineConfig { struct EngineConfig {
std::string model; std::string model;
std::string param; std::string param;
paddle::lite::Place prefer_place; std::vector<paddle::lite_api::Place> valid_places;
std::vector<paddle::lite::Place> valid_places;
std::vector<std::string> neglected_passes; std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf}; lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true}; bool model_from_memory{true};
// for xpu
size_t xpu_l3_workspace_size; size_t xpu_l3_workspace_size;
// for x86 or arm
int cpu_math_library_num_threads{1};
// for cuda
bool use_multi_stream{false};
}; };
class EngineManager { class EngineManager {
public: public:
bool Empty() const; bool Empty() const;
bool Has(const std::string& name) const; bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const; paddle::lite_api::PaddlePredictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name, paddle::lite_api::PaddlePredictor* Create(const std::string& name,
const EngineConfig& cfg); const EngineConfig& cfg);
void DeleteAll(); void DeleteAll();
private: private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>> std::unordered_map<std::string,
std::shared_ptr<paddle::lite_api::PaddlePredictor>>
engines_; engines_;
}; };
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h" #include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map> #include <map>
#include <memory> #include <memory>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -45,6 +46,7 @@ platform::Place GetNativePlace(const TargetType& type, int id = 0) { ...@@ -45,6 +46,7 @@ platform::Place GetNativePlace(const TargetType& type, int id = 0) {
switch (type) { switch (type) {
case TargetType::kHost: case TargetType::kHost:
case TargetType::kX86: case TargetType::kX86:
case TargetType::kARM:
return platform::CPUPlace(); return platform::CPUPlace();
case TargetType::kCUDA: case TargetType::kCUDA:
return platform::CUDAPlace(id); return platform::CUDAPlace(id);
...@@ -134,16 +136,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data, ...@@ -134,16 +136,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
} }
} }
void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) { void* GetLiteTensorDataPtr(paddle::lite_api::Tensor* src,
PrecisionType precision_type,
TargetType target_type) {
void* res{nullptr};
switch (precision_type) {
case PrecisionType::kFloat:
res = static_cast<void*>(src->mutable_data<float>(target_type));
break;
case PrecisionType::kInt8:
res = static_cast<void*>(src->mutable_data<int8_t>(target_type));
break;
case PrecisionType::kInt32:
res = static_cast<void*>(src->mutable_data<int32_t>(target_type));
break;
case PrecisionType::kInt64:
res = static_cast<void*>(src->mutable_data<int64_t>(target_type));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."));
break;
}
return res;
}
int64_t GetLiteTensorNumel(const paddle::lite_api::Tensor& tensor) {
auto shape = tensor.shape();
int64_t numel = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
return numel;
}
void InitDstTensor(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src) {
// Currently, Lite needs to explicitly specify the target type of // Currently, Lite needs to explicitly specify the target type of
// the input tensor. // the input tensor.
constexpr int empty_size = 0; constexpr int empty_size = 0;
dst->mutable_data(GetLiteTargetType(src.place()), empty_size); dst->Resize({empty_size});
dst->set_precision(GetLitePrecisionType(src.type())); GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
SetLoD(dst->mutable_lod(), src.lod()); GetLiteTargetType(src.place()));
dst->SetPrecision(GetLitePrecisionType(src.type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod);
} }
void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) {
constexpr framework::proto::VarType::Type dtype = constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32; framework::proto::VarType_Type_FP32;
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()), dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
...@@ -152,7 +193,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { ...@@ -152,7 +193,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
} }
template <> template <>
void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
InitDstTensor(dst, src); InitDstTensor(dst, src);
const platform::Place& src_place = src.place(); const platform::Place& src_place = src.place();
...@@ -161,52 +203,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, ...@@ -161,52 +203,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type()); static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
dst->Resize(framework::vectorize(src.dims())); dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>(); const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(bytes); void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type(); << ", dst = " << dst << ", src_type = " << src.type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << dst->memory_size(); VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
template <> template <>
void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src, void TensorCopyAsync(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize())); dst->Resize(paddle::framework::make_ddim(src.shape()));
InitDstTensor(dst, src); InitDstTensor(dst, src);
const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place(); const platform::Place& dst_place = dst->place();
const size_t bytes = int64_t src_numel = GetLiteTensorNumel(src);
static_cast<size_t>(src.numel()) * framework::SizeOfType(dst->type()); const size_t bytes = src_numel * framework::SizeOfType(dst->type());
const void* src_data = src.raw_data(); const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here. // When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type()); void* dst_data = dst->mutable_data(dst_place, dst->type());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type(); << ", dst = " << dst << ", src_type = " << dst->type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << src.memory_size(); VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
template <> template <>
void TensorDataShare(paddle::lite::Tensor* dst, framework::LoDTensor* src) { void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
const size_t bytes =
static_cast<size_t>(src->numel()) * framework::SizeOfType(src->type());
auto buf = std::make_shared<paddle::lite::Buffer>(paddle::lite::Buffer(
src->data<void>(), GetLiteTargetType(src->place()), src->memory_size()));
dst->Resize(framework::vectorize(src->dims())); dst->Resize(framework::vectorize(src->dims()));
dst->set_precision(GetLitePrecisionType(src->type())); dst->ShareExternalMemory(src->data<void>(), src->memory_size(),
SetLoD(dst->mutable_lod(), src->lod()); GetLiteTargetType(src->place()));
dst->ResetBuffer(buf, bytes); dst->SetPrecision(GetLitePrecisionType(src->type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod);
} }
template <> template <>
void TensorDataShare(framework::LoDTensor* dst, paddle::lite::Tensor* src) { void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
constexpr framework::proto::VarType::Type dtype = constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32; framework::proto::VarType_Type_FP32;
void* src_raw_data = src->raw_data(); void* src_raw_data =
GetLiteTensorDataPtr(src, GetLitePrecisionType(dtype), src->target());
size_t memory_size = GetLiteTensorNumel(*src) * sizeof(float);
std::shared_ptr<memory::allocation::Allocation> holder( std::shared_ptr<memory::allocation::Allocation> holder(
new memory::allocation::Allocation(src_raw_data, src->memory_size(), new memory::allocation::Allocation(src_raw_data, memory_size,
GetNativePlace(src->target()))); GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->dims().Vectorize())); dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, dtype); dst->ResetHolderWithType(holder, dtype);
} }
......
...@@ -101,10 +101,10 @@ TEST(EngineManager, engine) { ...@@ -101,10 +101,10 @@ TEST(EngineManager, engine) {
config.model_from_memory = true; config.model_from_memory = true;
config.valid_places = { config.valid_places = {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif #endif
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
}; };
LOG(INFO) << "Create EngineManager"; LOG(INFO) << "Create EngineManager";
...@@ -117,7 +117,7 @@ TEST(EngineManager, engine) { ...@@ -117,7 +117,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has( ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key), unique_key),
true); true);
paddle::lite::Predictor* engine_0 = paddle::lite_api::PaddlePredictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get( inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key); unique_key);
CHECK_NOTNULL(engine_0); CHECK_NOTNULL(engine_0);
......
...@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) { ...@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC)); EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC));
} }
template <typename T>
void test_lite_tensor_data_ptr(PrecisionType precision_type) {
void* GetLiteTensorDataPtr(paddle::lite_api::Tensor * src,
PrecisionType precision_type,
TargetType target_type);
const int count = 4;
paddle::lite::Tensor lite_tensor;
lite_tensor.Resize({count});
auto* lite_tensor_data = lite_tensor.mutable_data<T>();
for (size_t i = 0; i < count; ++i) {
lite_tensor_data[i] = i;
}
paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
T* data = static_cast<T*>(GetLiteTensorDataPtr(
&lite_api_tensor, precision_type, TargetType::kHost));
for (size_t i = 0; i < count; ++i) {
CHECK_EQ(data[i], static_cast<T>(i)) << "the i-th num is not correct.";
}
}
TEST(LiteEngineOp, GetLiteTensorDataPtr) {
test_lite_tensor_data_ptr<int64_t>(PrecisionType::kInt64);
test_lite_tensor_data_ptr<int32_t>(PrecisionType::kInt32);
test_lite_tensor_data_ptr<int8_t>(PrecisionType::kInt8);
EXPECT_ANY_THROW(test_lite_tensor_data_ptr<double>(PrecisionType::kUnk));
}
void test_tensor_copy(const platform::DeviceContext& ctx) { void test_tensor_copy(const platform::DeviceContext& ctx) {
// Create LoDTensor. // Create LoDTensor.
std::vector<float> vector({1, 2, 3, 4}); std::vector<float> vector({1, 2, 3, 4});
...@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) { ...@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod); lod_tensor.set_lod(lod);
// Create lite::Tensor and copy. // Create lite::Tensor and copy.
paddle::lite::Tensor lite_tensor; paddle::lite::Tensor lite_tensor;
TensorCopyAsync(&lite_tensor, lod_tensor, ctx); paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorCopyAsync(&lite_api_tensor, lod_tensor, ctx);
// Copy to LoDTensor. // Copy to LoDTensor.
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
platform::GpuStreamSync( platform::GpuStreamSync(
...@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) { ...@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod); lod_tensor.set_lod(lod);
// Create lite::Tensor and share. // Create lite::Tensor and share.
paddle::lite::Tensor lite_tensor; paddle::lite::Tensor lite_tensor;
TensorDataShare(&lite_tensor, &lod_tensor); paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorDataShare(&lite_api_tensor, &lod_tensor);
// Copy to LoDTensor. // Copy to LoDTensor.
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
std::vector<float> result; std::vector<float> result;
TensorToVector(lod_tensor_n, ctx, &result); TensorToVector(lod_tensor_n, ctx, &result);
ASSERT_EQ(result, vector); ASSERT_EQ(result, vector);
......
...@@ -25,9 +25,13 @@ namespace inference { ...@@ -25,9 +25,13 @@ namespace inference {
TEST(AnalysisPredictor, use_gpu) { TEST(AnalysisPredictor, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "model"; std::string model_dir = FLAGS_infer_model + "/" + "model";
AnalysisConfig config; AnalysisConfig config;
#if defined(PADDLE_WITH_CUDA)
config.EnableUseGpu(100, 0); config.EnableUseGpu(100, 0);
#elif defined(LITE_SUBGRAPH_WITH_XPU)
config.EnableXpu(100);
#endif
config.SetModel(model_dir + "/model", model_dir + "/params"); config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32); config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
std::vector<PaddleTensor> inputs; std::vector<PaddleTensor> inputs;
auto predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor(config);
...@@ -39,7 +43,7 @@ TEST(AnalysisPredictor, use_gpu) { ...@@ -39,7 +43,7 @@ TEST(AnalysisPredictor, use_gpu) {
std::vector<float> input(input_num, 1); std::vector<float> input(input_num, 1);
PaddleTensor in; PaddleTensor in;
in.shape = {1, 3, 318, 318}; in.shape = {batch, channel, height, width};
in.data = in.data =
PaddleBuf(static_cast<void*>(input.data()), input_num * sizeof(float)); PaddleBuf(static_cast<void*>(input.data()), input_num * sizeof(float));
in.dtype = PaddleDType::FLOAT32; in.dtype = PaddleDType::FLOAT32;
......
...@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase { ...@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase {
private: private:
std::vector<std::string> in_names_; std::vector<std::string> in_names_;
std::vector<std::string> out_names_; std::vector<std::string> out_names_;
paddle::lite::Predictor *engine_; paddle::lite_api::PaddlePredictor *engine_;
framework::proto::VarType::Type precision_; framework::proto::VarType::Type precision_;
bool use_gpu_; bool use_gpu_;
bool zero_copy_; bool zero_copy_;
...@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase { ...@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase {
framework::LoDTensor src_t = framework::LoDTensor src_t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, inference::analysis::GetFromScope<framework::LoDTensor>(scope,
in_names_[i]); in_names_[i]);
paddle::lite::Tensor *dst_t = engine_->GetInput(i); paddle::lite_api::Tensor dst_t = *(engine_->GetInput(i));
VLOG(3) << "== fluid -> lite (" << in_names_[i] << " -> " VLOG(3) << "== fluid -> lite (" << in_names_[i] << " -> "
<< engine_->GetInputNames()[i] << ")"; << engine_->GetInputNames()[i] << ")";
inference::lite::utils::TensorCopy(dst_t, &src_t, *ctx, zero_copy_); inference::lite::utils::TensorCopy(&dst_t, &src_t, *ctx, zero_copy_);
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(dev_place)) { if (platform::is_gpu_place(dev_place)) {
...@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase { ...@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase {
engine_->Run(); engine_->Run();
VLOG(3) << "lite engine run done"; VLOG(3) << "lite engine run done";
for (size_t i = 0; i < out_names_.size(); i++) { for (size_t i = 0; i < out_names_.size(); i++) {
paddle::lite::Tensor src_t = *(engine_->GetOutput(i)); paddle::lite_api::Tensor src_t = *(engine_->GetOutput(i));
framework::LoDTensor *dst_t = framework::LoDTensor *dst_t =
&inference::analysis::GetFromScope<framework::LoDTensor>( &inference::analysis::GetFromScope<framework::LoDTensor>(
scope, out_names_[i]); scope, out_names_[i]);
......
...@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) { ...@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) {
inference::lite::EngineConfig config; inference::lite::EngineConfig config;
config.valid_places = { config.valid_places = {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif #endif
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
}; };
serialize_params(&(config.param), &scope, repetitive_params); serialize_params(&(config.param), &scope, repetitive_params);
config.model = program.Proto()->SerializeAsString(); config.model = program.Proto()->SerializeAsString();
......
...@@ -387,6 +387,8 @@ void BindAnalysisConfig(py::module *m) { ...@@ -387,6 +387,8 @@ void BindAnalysisConfig(py::module *m) {
.def("params_file", &AnalysisConfig::params_file) .def("params_file", &AnalysisConfig::params_file)
.def("enable_use_gpu", &AnalysisConfig::EnableUseGpu, .def("enable_use_gpu", &AnalysisConfig::EnableUseGpu,
py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0) py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0)
.def("enable_xpu", &AnalysisConfig::EnableXpu,
py::arg("l3_workspace_size"))
.def("disable_gpu", &AnalysisConfig::DisableGpu) .def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("use_gpu", &AnalysisConfig::use_gpu) .def("use_gpu", &AnalysisConfig::use_gpu)
.def("gpu_device_id", &AnalysisConfig::gpu_device_id) .def("gpu_device_id", &AnalysisConfig::gpu_device_id)
...@@ -427,8 +429,8 @@ void BindAnalysisConfig(py::module *m) { ...@@ -427,8 +429,8 @@ void BindAnalysisConfig(py::module *m) {
.def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled) .def("tensorrt_oss_enabled", &AnalysisConfig::tensorrt_oss_enabled)
.def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled) .def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)
.def("enable_lite_engine", &AnalysisConfig::EnableLiteEngine, .def("enable_lite_engine", &AnalysisConfig::EnableLiteEngine,
py::arg("zero_copy") = false,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("zero_copy") = false,
py::arg("passes_filter") = std::vector<std::string>(), py::arg("passes_filter") = std::vector<std::string>(),
py::arg("ops_filter") = std::vector<std::string>()) py::arg("ops_filter") = std::vector<std::string>())
.def("lite_engine_enabled", &AnalysisConfig::lite_engine_enabled) .def("lite_engine_enabled", &AnalysisConfig::lite_engine_enabled)
......
...@@ -261,6 +261,10 @@ else: ...@@ -261,6 +261,10 @@ else:
if '${WITH_LITE}' == 'ON': if '${WITH_LITE}' == 'ON':
shutil.copy('${LITE_SHARED_LIB}', libs_path) shutil.copy('${LITE_SHARED_LIB}', libs_path)
package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name] package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name]
if '${XPU_SDK_ROOT}':
shutil.copy('${XPUAPI_LIB}', libs_path)
shutil.copy('${XPURT_LIB}', libs_path)
package_data['paddle.libs'] += ['libxpuapi.so', 'libxpurt.so']
if '${WITH_PSLIB}' == 'ON': if '${WITH_PSLIB}' == 'ON':
shutil.copy('${PSLIB_LIB}', libs_path) shutil.copy('${PSLIB_LIB}', libs_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册