提交 a157f1c7 编写于 作者: J jingqinghe
...@@ -16,6 +16,7 @@ else() ...@@ -16,6 +16,7 @@ else()
set(paddle_known_gpu_archs8 "30 35 50 52 60 61") set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75") set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
endif() endif()
###################################################################################### ######################################################################################
...@@ -188,6 +189,10 @@ elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x ...@@ -188,6 +189,10 @@ elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs10}) set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
endif() endif()
add_definitions("-DPADDLE_CUDA_BINVER=\"${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}\"") add_definitions("-DPADDLE_CUDA_BINVER=\"${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}\"")
......
...@@ -19,7 +19,7 @@ SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc/src/extern_dgc") ...@@ -19,7 +19,7 @@ SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc/src/extern_dgc")
SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc") SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc")
SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE) SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE)
SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE) SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE)
SET(DGC_URL "http://fleet.bj.bcebos.com/collective_ef2216a.tgz") SET(DGC_URL "https://fleet.bj.bcebos.com/dgc/collective_f66ef73.tgz")
INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR})
cache_third_party(extern_dgc cache_third_party(extern_dgc
...@@ -30,7 +30,7 @@ ExternalProject_Add( ...@@ -30,7 +30,7 @@ ExternalProject_Add(
extern_dgc extern_dgc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
"${DGC_DOWNLOAD_CMD}" "${DGC_DOWNLOAD_CMD}"
URL_MD5 "2f67549fd5f1262383d83289abc4f88f" URL_MD5 "94e6fa1bc97169d0e1aad44570fe3251"
PREFIX "${DGC_PREFIX_DIR}" PREFIX "${DGC_PREFIX_DIR}"
SOURCE_DIR "${DGC_SOURCES_DIR}" SOURCE_DIR "${DGC_SOURCES_DIR}"
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite) set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
if(NOT LITE_GIT_TAG) if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG dfdfa6440c83bf0b415f9f5a9ff84842ce0bb0fa) set(LITE_GIT_TAG 6d2b2a4028a58715b01887b04eb9bff8432eb184)
endif() endif()
if(NOT CUDA_ARCH_NAME) if(NOT CUDA_ARCH_NAME)
......
...@@ -19,8 +19,8 @@ SET(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn) ...@@ -19,8 +19,8 @@ SET(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn) SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_REPOSITORY https://github.com/intel/mkl-dnn.git) SET(MKLDNN_REPOSITORY https://github.com/oneapi-src/oneDNN.git)
SET(MKLDNN_TAG 1ea812f4f5aa1bd989372a23ab50d0f0f81ee677) SET(MKLDNN_TAG 64a48f9565aa72f6359917b3406328075a409939)
# Introduce variables: # Introduce variables:
# * CMAKE_INSTALL_LIBDIR # * CMAKE_INSTALL_LIBDIR
......
...@@ -18,7 +18,7 @@ SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc) ...@@ -18,7 +18,7 @@ SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
set(WARPCTC_REPOSITORY https://github.com/baidu-research/warp-ctc.git) set(WARPCTC_REPOSITORY https://github.com/baidu-research/warp-ctc.git)
set(WARPCTC_TAG bc29dcfff07ced1c7a19a4ecee48e5ad583cef8e) set(WARPCTC_TAG fc7f226b93758216a03b1be9d24593a12819b984)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE) CACHE PATH "Warp-ctc Directory" FORCE)
......
...@@ -28,7 +28,15 @@ function(CheckCompilerCXX11Flag) ...@@ -28,7 +28,15 @@ function(CheckCompilerCXX11Flag)
endfunction() endfunction()
CheckCompilerCXX11Flag() CheckCompilerCXX11Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if (WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
# safe_set_flag # safe_set_flag
# #
# Set a compile flag only if compiler is support # Set a compile flag only if compiler is support
......
...@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME) ...@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
# No unit test should exceed 2 minutes. # No unit test should exceed 2 minutes.
if (APPLE OR WIN32) if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else() else()
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
endif() endif()
...@@ -748,7 +748,7 @@ function(py_test TARGET_NAME) ...@@ -748,7 +748,7 @@ function(py_test TARGET_NAME)
endif() endif()
if (APPLE OR WIN32) if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else() else()
# No unit test should exceed 2 minutes in Linux. # No unit test should exceed 2 minutes in Linux.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
......
...@@ -138,12 +138,17 @@ function(op_library TARGET) ...@@ -138,12 +138,17 @@ function(op_library TARGET)
# And for detail pybind information, please see generated paddle/pybind/pybind.h. # And for detail pybind information, please see generated paddle/pybind/pybind.h.
file(READ ${TARGET}.cc TARGET_CONTENT) file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}") string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}") # [ \t\r\n]* is used for blank characters
string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}")
if (one_register STREQUAL "") if (one_register STREQUAL "")
string(REPLACE "_op" "" TARGET "${TARGET}") string(REPLACE "_op" "" TARGET "${TARGET}")
else () else ()
string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}") string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
string(REPLACE "," "" TARGET "${TARGET}") string(REPLACE "," "" TARGET "${TARGET}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}")
endif() endif()
# pybind USE_NO_KERNEL_OP # pybind USE_NO_KERNEL_OP
......
...@@ -243,9 +243,10 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) ...@@ -243,9 +243,10 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
ENDIF() ENDIF()
if(WITH_GPU) if(WITH_GPU)
include(external/cub) # download cub if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
list(APPEND third_party_deps extern_cub) include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
set(CUDAERROR_URL "http://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE) set(CUDAERROR_URL "http://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage
endif(WITH_GPU) endif(WITH_GPU)
......
...@@ -49,7 +49,8 @@ std::vector<std::string> PD_GetGradOpDescStrs( ...@@ -49,7 +49,8 @@ std::vector<std::string> PD_GetGradOpDescStrs(
for (size_t i = 0; i < op_num; ++i) { for (size_t i = 0; i < op_num; ++i) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true, grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true,
"Cannot serialize message."); paddle::platform::errors::Unavailable(
"Cannot serialize operator desc message."));
} }
} }
return ret; return ret;
......
...@@ -36,7 +36,10 @@ message AMPConfig { ...@@ -36,7 +36,10 @@ message AMPConfig {
repeated string custom_black_varnames = 9; repeated string custom_black_varnames = 9;
} }
message LocalSGDConfig { optional int32 k_steps = 1 [ default = 4 ]; } message LocalSGDConfig {
optional int32 k_steps = 1 [ default = 1 ];
optional int32 begin_step = 2 [ default = 1 ];
}
message GradientMergeConfig { message GradientMergeConfig {
optional int32 k_steps = 1 [ default = 1 ]; optional int32 k_steps = 1 [ default = 1 ];
...@@ -52,6 +55,8 @@ message DGCConfig { ...@@ -52,6 +55,8 @@ message DGCConfig {
message LarsConfig { message LarsConfig {
optional float lars_coeff = 1 [ default = 0.001 ]; optional float lars_coeff = 1 [ default = 0.001 ];
optional float lars_weight_decay = 2 [ default = 0.0005 ]; optional float lars_weight_decay = 2 [ default = 0.0005 ];
optional float epsilon = 3 [ default = 0.0 ];
repeated string exclude_from_weight_decay = 4;
} }
message LambConfig { message LambConfig {
......
...@@ -25,7 +25,7 @@ bool NCCLWrapper::is_initialized_ = false; ...@@ -25,7 +25,7 @@ bool NCCLWrapper::is_initialized_ = false;
void NCCLWrapper::InitNCCL() { void NCCLWrapper::InitNCCL() {
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL)
PADDLE_ENFORCE(platform::dynload::ncclCommInitRank( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitRank(
&(nccl_info_.comm_), nccl_info_.global_ranks_, nccl_info_.nccl_id_, &(nccl_info_.comm_), nccl_info_.global_ranks_, nccl_info_.nccl_id_,
nccl_info_.my_global_rank_)); nccl_info_.my_global_rank_));
#endif #endif
...@@ -41,7 +41,8 @@ void NCCLWrapper::SetNCCLId(const NCCLInfo& nccl_info) { ...@@ -41,7 +41,8 @@ void NCCLWrapper::SetNCCLId(const NCCLInfo& nccl_info) {
NCCLInfo NCCLWrapper::GetNCCLId() { NCCLInfo NCCLWrapper::GetNCCLId() {
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL)
PADDLE_ENFORCE(platform::dynload::ncclGetUniqueId(&(nccl_info_.nccl_id_))); PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::ncclGetUniqueId(&(nccl_info_.nccl_id_)));
#endif #endif
return nccl_info_; return nccl_info_;
} }
...@@ -52,8 +53,8 @@ void NCCLWrapper::SetRankInfo(const int local_rank, const int global_rank, ...@@ -52,8 +53,8 @@ void NCCLWrapper::SetRankInfo(const int local_rank, const int global_rank,
nccl_info_.local_rank_ = local_rank; nccl_info_.local_rank_ = local_rank;
nccl_info_.my_global_rank_ = global_rank; nccl_info_.my_global_rank_ = global_rank;
nccl_info_.global_ranks_ = ranks; nccl_info_.global_ranks_ = ranks;
PADDLE_ENFORCE(cudaSetDevice(local_rank)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(local_rank));
PADDLE_ENFORCE(cudaStreamCreate(&(nccl_info_.stream_))); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamCreate(&(nccl_info_.stream_)));
#endif #endif
return; return;
} }
...@@ -65,7 +66,7 @@ void NCCLWrapper::SyncVar(const int root_rank, const Scope& scope, ...@@ -65,7 +66,7 @@ void NCCLWrapper::SyncVar(const int root_rank, const Scope& scope,
auto var = scope.FindVar(name); auto var = scope.FindVar(name);
LoDTensor* tensor = var->GetMutable<LoDTensor>(); LoDTensor* tensor = var->GetMutable<LoDTensor>();
int32_t total_size = tensor->numel(); int32_t total_size = tensor->numel();
PADDLE_ENFORCE(platform::dynload::ncclBcast( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast(
reinterpret_cast<void*>(tensor->data<float>()), total_size, ncclFloat, reinterpret_cast<void*>(tensor->data<float>()), total_size, ncclFloat,
root_rank, nccl_info_.comm_, nccl_info_.stream_)); root_rank, nccl_info_.comm_, nccl_info_.stream_));
cudaStreamSynchronize(nccl_info_.stream_); cudaStreamSynchronize(nccl_info_.stream_);
......
...@@ -102,6 +102,8 @@ if(WITH_MKLDNN) ...@@ -102,6 +102,8 @@ if(WITH_MKLDNN)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn) pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn)
pass_library(fc_mkldnn_pass inference DIR mkldnn) pass_library(fc_mkldnn_pass inference DIR mkldnn)
pass_library(cpu_quantize_placement_pass base DIR mkldnn) pass_library(cpu_quantize_placement_pass base DIR mkldnn)
pass_library(cpu_quantize_pass inference DIR mkldnn) pass_library(cpu_quantize_pass inference DIR mkldnn)
...@@ -162,4 +164,6 @@ endif() ...@@ -162,4 +164,6 @@ endif()
cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass) cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass)
cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass) cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass)
cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass)
cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
endif () endif ()
...@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()( ...@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()(
return op; return op;
} }
PDNode *patterns::Bfloat16Placement::operator()(
const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>();
if (!bfloat16_enabled_op_types.empty()) {
supported_op_types = bfloat16_enabled_op_types;
}
auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types);
return op;
}
PDNode *patterns::OrphanedBfloat16::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
prev_op->LinksTo({prev_out});
op->LinksFrom({prev_out}).LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::LastBfloat16Ops::operator()() {
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
op->LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::FirstBfloat16Ops::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
auto *op_in = pattern->NewNode(op_in_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
prev_op->LinksTo({op_in});
op->LinksFrom({op_in});
return op;
}
PDNode *patterns::MKLDNNInPlace::operator()() { PDNode *patterns::MKLDNNInPlace::operator()() {
const std::unordered_set<std::string> &supported_op_types = { const std::unordered_set<std::string> &supported_op_types = {
"abs", "abs",
......
...@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase { ...@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase {
PATTERN_DECL_NODE(op); PATTERN_DECL_NODE(op);
}; };
struct Bfloat16Placement : public PatternBase {
Bfloat16Placement(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "bfloat16_placement") {}
PDNode* operator()(
const std::unordered_set<std::string>& bfloat16_enabled_op_types);
PATTERN_DECL_NODE(op);
};
struct OrphanedBfloat16 : public PatternBase {
OrphanedBfloat16(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "orphaned_bfloat16") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(prev_out);
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct LastBfloat16Ops : public PatternBase {
LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "last_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct FirstBfloat16Ops : public PatternBase {
FirstBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "first_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(op_in);
PATTERN_DECL_NODE(op);
};
// Pattern used for enforcing inplace computation for in-place computation // Pattern used for enforcing inplace computation for in-place computation
// supporting DNNL ops. softmax, batch_norm and layer_norm // supporting DNNL ops. softmax, batch_norm and layer_norm
struct MKLDNNInPlace : public PatternBase { struct MKLDNNInPlace : public PatternBase {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void UnlinkNodes(ir::Node* a, ir::Node* b) {
a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
a->outputs.end());
b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
b->inputs.end());
}
void CPUBFloat16Pass::SetInputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::FirstBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"first_bfloat16_ops"};
bfloat16_ops();
int quantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_in, op_in, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
if (op->Op()->Type() != "conv2d" && prev_op->Op()->Type() != "quantize") {
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
// create a quantize op node
OpDesc q_desc;
q_desc.SetType("quantize");
q_desc.SetInput("Input", std::vector<std::string>({op_in->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("Scale", 1.f);
q_desc.SetAttr("bfloat16", true);
q_desc.SetAttr("output_format", Has("data_layout")
? Get<std::string>("data_layout")
: "NCHW");
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.
std::string op_input_name;
for (auto name : op->Op()->InputNames()) {
for (auto input_name : op->Op()->Input(name)) {
if (input_name == op_in->Name()) op_input_name = name;
}
}
PADDLE_ENFORCE_NE(
op_input_name.empty(), true,
platform::errors::NotFound(
"Operator before operator should have input as op output"));
op->Op()->SetInput(op_input_name,
std::vector<std::string>({quantize_out_node->Name()}));
UnlinkNodes(op_in, op);
IR_NODE_LINK_TO(op_in, quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
IR_NODE_LINK_TO(quantize_out_node, op);
quantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d quantize op before bfloat16 op",
quantize_counter);
}
void CPUBFloat16Pass::SetOutputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::LastBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"last_bfloat16_ops"};
bfloat16_ops();
int force_fp32_counter = 0, dequantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_out, op_out, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, bfloat16_ops);
if ((op->Op()->HasAttr("force_fp32_output") ||
op->Op()->HasProtoAttr("force_fp32_output")) &&
!op->Op()->GetAttrIfExists<bool>("fuse_residual_connection")) {
op->Op()->SetAttr("force_fp32_output", true);
force_fp32_counter++;
} else if (op->Op()->Type() != "prior_box") {
// Create dequantize input variable
VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
// create a dequantize op node for output.
OpDesc deq_desc;
deq_desc.SetType("dequantize");
deq_desc.SetInput("Input",
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetOutput("Output", std::vector<std::string>({op_out->Name()}));
deq_desc.SetAttr("Scale", 1.0f);
auto dequantize_op = g->CreateOpNode(&deq_desc);
std::string op_output_name;
for (auto name : op->Op()->OutputNames()) {
for (auto output_name : op->Op()->Output(name)) {
if (output_name == op_out->Name()) op_output_name = name;
}
}
PADDLE_ENFORCE_NE(
op_output_name.empty(), true,
platform::errors::NotFound(
"Operator after operator should have input as op output"));
op->Op()->SetOutput(op_output_name, std::vector<std::string>(
{dequantize_in_node->Name()}));
UnlinkNodes(op, op_out);
IR_NODE_LINK_TO(op, dequantize_in_node);
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
IR_NODE_LINK_TO(dequantize_op, op_out);
dequantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d dequantize op and used %d force_fp32_output",
dequantize_counter, force_fp32_counter);
}
void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const {
SetInputDataType(graph);
SetOutputDataType(graph);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class CPUBFloat16Pass : public Pass {
protected:
void SetInputDataType(ir::Graph* graph) const;
void SetOutputDataType(ir::Graph* graph) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, bool use_mkldnn,
const std::string& mkldnn_data_type = "float32",
const bool force_fp32_output = false) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("name", name);
if (type == "conv2d") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("force_fp32_output", force_fp32_output);
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "dropout") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "fc") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "concat") {
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "matmul" || type == "elementwise_add") {
op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
}
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
graph->reset(pass->Apply(graph->release()));
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
ProgramDesc BuildProgramDesc(bool use_mkldnn) {
ProgramDesc prog;
for (auto& v : variable_names) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dropout", "Dropout1", {"z"}, {"a"}, use_mkldnn, "float32");
SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn, "bfloat16");
SetOp(&prog, "pool2d", "Pool1", {"b"}, {"c"}, use_mkldnn, "bfloat16");
SetOp(&prog, "conv2d", "Conv1", {"c"}, {"d"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout2", {"d"}, {"e"}, use_mkldnn, "float32");
SetOp(&prog, "transpose2", "Transpose1", {"e"}, {"f"}, use_mkldnn,
"bfloat16");
SetOp(&prog, "reshape2", "Reshape1", {"f"}, {"g"}, use_mkldnn, "bfloat16");
SetOp(&prog, "concat", "Concat1", {"g"}, {"h"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout3", {"h"}, {"i"}, use_mkldnn, "float32");
return prog;
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int transpose_count, int quant_count, int dequant_count,
int added_nodes_count) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names, &original_nodes_num,
&current_nodes_num);
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int conv2d_nodes_count = 0;
int pool2d_nodes_count = 0;
int transpose2_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "conv2d") {
conv2d_nodes_count++;
} else if (op->Type() == "pool2d") {
pool2d_nodes_count++;
} else if (op->Type() == "transpose2") {
transpose2_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(conv2d_nodes_count, conv_count);
EXPECT_EQ(pool2d_nodes_count, pool_count);
EXPECT_EQ(transpose2_nodes_count, transpose_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, quantize) {
bool use_mkldnn = true;
// 1 quantize + 1 dequantize
int added_nodes = 2;
MainTest(BuildProgramDesc(use_mkldnn), 2, 1, 1, 1, 2, added_nodes);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void CPUBfloat16PlacementPass::SetMkldnnDataType(
ir::Graph* graph, int* bfloat16_operators) const {
const auto& op_types_list =
Get<std::unordered_set<std::string>>("bfloat16_enabled_op_types");
// set mkldnn_data_type to bfloat16 to all operators that are in
// bfloat16_enabled_op_types vector or they are included to Bfloat16Placement
// pattern
GraphPatternDetector gpd;
patterns::Bfloat16Placement bfloat16_placement_pattern{gpd.mutable_pattern(),
"bfloat16_placement"};
bfloat16_placement_pattern(op_types_list);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_placement_pattern);
if ((op->Op()->HasAttr("mkldnn_data_type") ||
op->Op()->HasProtoAttr("mkldnn_data_type")) &&
!platform::HasOpINT8DataType(op->Op())) {
op->Op()->SetAttr("mkldnn_data_type", std::string("bfloat16"));
(*bfloat16_operators)++;
}
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::RemoveOrhanedOperators(
ir::Graph* graph, int* bfloat16_operators) const {
// find orphaned bfloat16 operator that is between two float32 operators
// revert mkldnn_data_type attr to float32
GraphPatternDetector gpd;
patterns::OrphanedBfloat16 orphaned_bfloat16_pattern{gpd.mutable_pattern(),
"orphaned_bfloat16"};
orphaned_bfloat16_pattern();
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, orphaned_bfloat16_pattern);
op->Op()->SetAttr("mkldnn_data_type", std::string("float32"));
bfloat16_operators--;
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const {
int bfloat16_operators = 0;
SetMkldnnDataType(graph, &bfloat16_operators);
RemoveOrhanedOperators(graph, &bfloat16_operators);
PrettyLogDetail("--- marked %d operators to bfloat16 ",
bfloat16_operators);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_placement_pass,
paddle::framework::ir::CPUBfloat16PlacementPass)
// a vector of operator type names with bfloat16 support ("conv2d" etc.)
// the second param is the default value for this vector
.DefaultPassAttr("bfloat16_enabled_op_types",
new std::unordered_set<std::string>());
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* Specifies which operators should be run on bfloat16.
*/
class CPUBfloat16PlacementPass : public Pass {
protected:
void SetMkldnnDataType(ir::Graph* graph, int* bfloat16_operators) const;
void RemoveOrhanedOperators(ir::Graph* graph, int* bfloat16_operators) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator mkldnn_data_type
// ---------------------------------------
// (a,b)->concat->c float32
// c->conv->f float32
// f->relu->g float32
// g->pool->h float32
// h->conv->k float32
// k->pool->l float32
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "f", "g", "h", "k", "l"})) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"});
SetOp(&prog, "conv2d", "conv1", {"c"}, {"f"});
SetOp(&prog, "relu", "relu1", {"f"}, {"g"});
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"});
SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"});
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"});
return prog;
}
void MainTest(std::initializer_list<std::string> bfloat16_enabled_op_types,
unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
pass->Set("bfloat16_enabled_op_types",
new std::unordered_set<std::string>(bfloat16_enabled_op_types));
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
TEST(Bfloat16PlacementPass, enable_all) {
MainTest({"conv2d", "pool2d", "relu", "concat"}, 6);
}
TEST(Bfloat16PlacementPass, enabled_conv_and_pool) {
// 2 conv2d + 2 pool2 - 1 orphaned conv2d
MainTest({"conv2d", "pool2d"}, 3);
}
TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); }
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_placement_pass);
...@@ -615,6 +615,16 @@ static int BuildFusionV2(Graph* graph, const std::string& name_scope, ...@@ -615,6 +615,16 @@ static int BuildFusionV2(Graph* graph, const std::string& name_scope,
GET_IR_NODE_FROM_SUBGRAPH(transpose2_qkv_out, transpose2_qkv_out, GET_IR_NODE_FROM_SUBGRAPH(transpose2_qkv_out, transpose2_qkv_out,
multihead_pattern); multihead_pattern);
// If weights or biases in qkv's fc are shared by multiple multihead_matmul
// patterns, we do not support this kind of fusion, this pass will not take
// effect.
bool is_fc_params_shared =
mul0_w->outputs.size() > 1 || mul1_w->outputs.size() > 1 ||
mul2_w->outputs.size() > 1 || eltadd0_b->outputs.size() > 1 ||
eltadd1_b->outputs.size() > 1 || eltadd2_b->outputs.size() > 1;
if (is_fc_params_shared) {
return;
}
fuse_creater(input0, mul0, mul1, mul2, mul0_out, mul1_out, mul2_out, mul0_w, fuse_creater(input0, mul0, mul1, mul2, mul0_out, mul1_out, mul2_out, mul0_w,
mul1_w, mul2_w, eltadd0_b, eltadd1_b, eltadd2_b, eltadd_qk_b, mul1_w, mul2_w, eltadd0_b, eltadd1_b, eltadd2_b, eltadd_qk_b,
reshape2_0, reshape2_qkv_out, scale, scale_out); reshape2_0, reshape2_qkv_out, scale, scale_out);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h" #include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { ...@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS(transpose_flatten_concat_fuse_pass, REGISTER_PASS(transpose_flatten_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass); paddle::framework::ir::TransposeFlattenConcatFusePass);
REGISTER_PASS_CAPABILITY(transpose_flatten_concat_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("transpose", 0)
.EQ("transpose2", 0)
.EQ("flatten", 0)
.EQ("concat", 0)
.EQ("fusion_transpose_flatten_concat", 0));
...@@ -69,7 +69,8 @@ class OpInfo { ...@@ -69,7 +69,8 @@ class OpInfo {
const OpCreator& Creator() const { const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_, PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator's Creator has not been registered"); platform::errors::NotFound(
"Operator's Creator has not been registered."));
return creator_; return creator_;
} }
...@@ -79,11 +80,12 @@ class OpInfo { ...@@ -79,11 +80,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown"; std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
grad_op_maker_, grad_op_maker_,
"Operator %s's GradOpMaker has not been " platform::errors::NotFound(
"registered.\nPlease check whether %s_op has " "Operator %s's GradOpMaker has not been "
"grad_op.\nIf not, please set stop_gradient to True " "registered.\nPlease check whether (%s) operator has "
"for its input and output variables using var.stop_gradient=True.", "gradient operator.\nIf not, please set stop_gradient to be True "
type.c_str(), type.c_str()); "for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str()));
return grad_op_maker_; return grad_op_maker_;
} }
...@@ -100,11 +102,12 @@ class OpInfo { ...@@ -100,11 +102,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown"; std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
dygraph_grad_op_maker_, dygraph_grad_op_maker_,
"Operator %s's DygraphGradOpMaker has not been " platform::errors::NotFound(
"registered.\nPlease check whether %s_op has " "Operator %s's DygraphGradOpMaker has not been "
"grad_op.\nIf not, please set stop_gradient to True " "registered.\nPlease check whether (%s) operator has "
"for its input and output variables using var.stop_gradient=True.", "gradient operator.\nIf not, please set stop_gradient to be True "
type.c_str(), type.c_str()); "for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str()));
return dygraph_grad_op_maker_; return dygraph_grad_op_maker_;
} }
...@@ -130,14 +133,17 @@ class OpInfoMap { ...@@ -130,14 +133,17 @@ class OpInfoMap {
} }
void Insert(const std::string& type, const OpInfo& info) { void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type); PADDLE_ENFORCE_NE(Has(type), true,
platform::errors::AlreadyExists(
"Operator (%s) has been registered.", type));
map_.insert({type, info}); map_.insert({type, info});
} }
const OpInfo& Get(const std::string& type) const { const OpInfo& Get(const std::string& type) const {
auto op_info_ptr = GetNullable(type); auto op_info_ptr = GetNullable(type);
PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered", PADDLE_ENFORCE_NOT_NULL(
type); op_info_ptr,
platform::errors::NotFound("Operator (%s) is not registered.", type));
return *op_info_ptr; return *op_info_ptr;
} }
......
...@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const { ...@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const {
cur_loc += OpKernelType::kLibBits; cur_loc += OpKernelType::kLibBits;
int customized_value = key.customized_type_value_; int customized_value = key.customized_type_value_;
PADDLE_ENFORCE(customized_value < (1 << OpKernelType::kCustomizeBits)); PADDLE_ENFORCE_LT(customized_value, (1 << OpKernelType::kCustomizeBits),
platform::errors::Unavailable(
"Too many custom OpKernel attribute values, expected "
"maximum value is %d, received value is %d.",
(1 << OpKernelType::kCustomizeBits), customized_value));
customized_value = customized_value << cur_loc; customized_value = customized_value << cur_loc;
cur_loc += OpKernelType::kCustomizeBits; cur_loc += OpKernelType::kCustomizeBits;
PADDLE_ENFORCE(cur_loc < 64); PADDLE_ENFORCE_LT(cur_loc, 64,
platform::errors::Unavailable(
"Too many OpKernel attribute values, expected maximum "
"value is 64, received value is %d.",
cur_loc));
std::hash<int> hasher; std::hash<int> hasher;
return hasher(place + data_type + data_layout + library_type + return hasher(place + data_type + data_layout + library_type +
......
...@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput( ...@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names; std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) { auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name); PADDLE_ENFORCE_EQ(
names.count(name), 0,
platform::errors::AlreadyExists("Attribute [%s] is duplicated.", name));
names.insert(name); names.insert(name);
}; };
for (auto& attr : proto_->attrs()) { for (auto& attr : proto_->attrs()) {
......
...@@ -54,9 +54,10 @@ class Registrar { ...@@ -54,9 +54,10 @@ class Registrar {
template <typename... ARGS> template <typename... ARGS>
struct OperatorRegistrar : public Registrar { struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) { explicit OperatorRegistrar(const char* op_type) {
if (OpInfoMap::Instance().Has(op_type)) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("'%s' is registered more than once.", op_type); OpInfoMap::Instance().Has(op_type), false,
} platform::errors::AlreadyExists(
"Operator '%s' is registered more than once.", op_type));
static_assert(sizeof...(ARGS) != 0, static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass"); "OperatorRegistrar should be invoked at least by OpClass");
OpInfo info; OpInfo info;
......
...@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { ...@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
AddInput("input", "input of cosine op").AsDuplicable(); AddInput("input", "input of cosine op").AsDuplicable();
AddOutput("output", "output of cosine op").AsIntermediate(); AddOutput("output", "output of cosine op").AsIntermediate();
auto my_checker = [](int i) { auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!"); PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument(
"'test_attr' must be even!"));
}; };
AddAttr<int>("test_attr", "a simple test attribute") AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker); .AddCustomChecker(my_checker);
......
...@@ -152,10 +152,10 @@ class OpVersionRegistrar { ...@@ -152,10 +152,10 @@ class OpVersionRegistrar {
return instance; return instance;
} }
OpVersion& Register(const std::string& op_type) { OpVersion& Register(const std::string& op_type) {
if (op_version_map_.find(op_type) != op_version_map_.end()) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("'%s' is registered in operator version more than once.", op_version_map_.find(op_type), op_version_map_.end(),
op_type); platform::errors::AlreadyExists(
} "'%s' is registered in operator version more than once.", op_type));
op_version_map_.insert({op_type, OpVersion()}); op_version_map_.insert({op_type, OpVersion()});
return op_version_map_[op_type]; return op_version_map_[op_type];
} }
......
此差异已折叠。
...@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) { ...@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) {
EXPECT_TRUE( EXPECT_TRUE(
ex_msg.find( ex_msg.find(
"The Input Variable(Other) of " "The Input Variable(Other) of "
"indicate_other_data_type_test Op used to " "(indicate_other_data_type_test) Operator used to "
"determine kernel data type " "determine kernel data type "
"is empty or not LoDTensor or SelectedRows or LoDTensorArray") != "is empty or not LoDTensor or SelectedRows or LoDTensorArray.") !=
std::string::npos); std::string::npos);
} }
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
......
...@@ -20,7 +20,10 @@ namespace framework { ...@@ -20,7 +20,10 @@ namespace framework {
void ReaderBase::ReadNext(std::vector<LoDTensor> *out) { void ReaderBase::ReadNext(std::vector<LoDTensor> *out) {
std::lock_guard<std::mutex> lock(mu_); std::lock_guard<std::mutex> lock(mu_);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning); PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning,
platform::errors::Unavailable(
"The current reader has stopped running and cannot "
"continue to read the next batch of data."));
ReadNextImpl(out); ReadNextImpl(out);
} }
......
...@@ -32,17 +32,21 @@ struct RWLock { ...@@ -32,17 +32,21 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); } ~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() { inline void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0, PADDLE_ENFORCE_EQ(
"acquire read lock failed"); pthread_rwlock_rdlock(&lock_), 0,
platform::errors::External("The pthread failed to acquire read lock."));
} }
inline void WRLock() { inline void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0, PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed"); platform::errors::External(
"The pthread failed to acquire write lock."));
} }
inline void UNLock() { inline void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed"); PADDLE_ENFORCE_EQ(
pthread_rwlock_unlock(&lock_), 0,
platform::errors::External("The pthread failed to unlock."));
} }
private: private:
......
...@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) { ...@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) {
VLOG(5) << "Can't read [" << length << "] from file" VLOG(5) << "Can't read [" << length << "] from file"
<< "file seems breakem"; << "file seems breakem";
PADDLE_THROW("Model load error, file seems breaken"); PADDLE_THROW(platform::errors::Unavailable(
"Model load failed, istream state error."));
} }
} }
...@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) { ...@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) {
sizeof(char) * tensor_number_mark.size()); sizeof(char) * tensor_number_mark.size());
std::string str_read_tensor_number_mark(tensor_number_mark_buffer, std::string str_read_tensor_number_mark(tensor_number_mark_buffer,
tensor_number_mark.size()); tensor_number_mark.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(tensor_number_mark, str_read_tensor_number_mark,
tensor_number_mark, str_read_tensor_number_mark, platform::errors::InvalidArgument(
"Tensor number mark not match, expect [%s], but read from file is [%]", "Tensor number mark does not match, expect mark is "
tensor_number_mark, str_read_tensor_number_mark); "[%s], but the mark read from file is [%s].",
tensor_number_mark, str_read_tensor_number_mark));
size_t tensor_number = 0; size_t tensor_number = 0;
istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number)); istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number));
...@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) { ...@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) {
std::string str_read_tensor_name_mark(name_mark_buffer, std::string str_read_tensor_name_mark(name_mark_buffer,
tensor_name_mark.size()); tensor_name_mark.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(tensor_name_mark, str_read_tensor_name_mark,
tensor_name_mark, str_read_tensor_name_mark, platform::errors::InvalidArgument(
"Tensor name mark not match, expect [%s], but read from file is [%]", "Tensor name mark does not match, expect mark is [%s], "
tensor_name_mark, str_read_tensor_name_mark); "but the mark read from file is [%s].",
tensor_name_mark, str_read_tensor_name_mark));
size_t tensor_name_length = 0; size_t tensor_name_length = 0;
istre.read(reinterpret_cast<char*>(&tensor_name_length), istre.read(reinterpret_cast<char*>(&tensor_name_length),
...@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk( ...@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NOT_NULL(
var_ptr, nullptr, var_ptr, platform::errors::NotFound("Variable (%s) is not found when "
"Variable find error, when save model, can't not find vairable [%s], " "saving model, please make sure "
"Please make sure you have run StartUpProgram", "that exe.run(startup_program) has "
vec_tensor_name_list[i]); "been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>(); Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed," platform::errors::PreconditionNotMet(
"Please make sure you have run StartUpProgram", "Paramter [%s] is not initialzed, please make sure "
vec_tensor_name_list[i]); "that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
map_tensor[vec_tensor_name_list[i]] = tensor; map_tensor[vec_tensor_name_list[i]] = tensor;
} }
...@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk( ...@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk(
Tensor* tensor = var_ptr->GetMutable<LoDTensor>(); Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed," platform::errors::PreconditionNotMet(
"Please make sure you have run StartUpProgram", "Paramter [%s] is not initialzed, please make sure "
vec_var_base_list[i]->Name()); "that exe.run(startup_program) has been executed.",
vec_var_base_list[i]->Name()));
map_tensor[vec_var_base_list[i]->Name()] = tensor; map_tensor[vec_var_base_list[i]->Name()] = tensor;
} }
...@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk( ...@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto it = map_load_tensor.find(vec_tensor_name_list[i]); auto it = map_load_tensor.find(vec_tensor_name_list[i]);
PADDLE_ENFORCE(it != map_load_tensor.end(), PADDLE_ENFORCE_NE(it, map_load_tensor.end(),
"Paramete not found in Model file, " platform::errors::NotFound(
"Can not find [%s] in model file [%s]", "Parameter (%s) not found in model file (%s).",
vec_tensor_name_list[i], file_name); vec_tensor_name_list[i], file_name));
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NOT_NULL(
var_ptr, nullptr, var_ptr,
"Parameter not created, when load model, can't not find parameter [%s] " platform::errors::PreconditionNotMet(
"please make sure you have run StartUpProgram", "Parameter (%s) is not created when loading model, "
vec_tensor_name_list[i]); "please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>(); Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NE(tensor, nullptr, PADDLE_ENFORCE_NOT_NULL(
"Paramter [%s] not initialzed " tensor,
"please make sure you have run startUpProgram", platform::errors::PreconditionNotMet(
vec_tensor_name_list[i]); "Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed " platform::errors::PreconditionNotMet(
"please make sure you have run StartUpProgram", "Paramter [%s] is not initialzed, "
vec_tensor_name_list[i]); "please make sure that exe.run(startup_program) has "
"been executed.v",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
tensor->dims(), it->second->dims(), tensor->dims(), it->second->dims(),
"Shape not matching: the Program requires a parameter with a shape of " platform::errors::InvalidArgument(
"(%s), " "Shape does not match, the program requires a parameter with a "
"while the loaded parameter (namely [ %s ]) has a shape of (%s).", "shape of "
tensor->dims(), vec_tensor_name_list[i], it->second->dims()); "(%s), while the loaded parameter (namely [ %s ]) has a shape of "
"(%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims()));
TensorCopySync(*(it->second.get()), tensor->place(), tensor); TensorCopySync(*(it->second.get()), tensor->place(), tensor);
...@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name,
MkDirRecursively(DirName(file_name).c_str()); MkDirRecursively(DirName(file_name).c_str());
std::ofstream fout(file_name, std::ios::binary); std::ofstream fout(file_name, std::ios::binary);
if (!fout) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("File open error. Can not open file [%s]", file_name); fout.is_open(), true,
} platform::errors::Unavailable("File (%s) open failed.", file_name));
// first 256 byte for reserve for fulture upgrade // first 256 byte for reserve for fulture upgrade
char* kReserveBuffer = new char[model_file_reserve_size]; char* kReserveBuffer = new char[model_file_reserve_size];
...@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name,
TensorCopySync(*tensor, platform::CPUPlace(), &temp); TensorCopySync(*tensor, platform::CPUPlace(), &temp);
data_ptr = temp.data<void>(); data_ptr = temp.data<void>();
#else #else
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Tensor is in CUDA device, but paddle not compile with CUDA, this " "Tensor is in CUDA device, but paddle not compiled with CUDA."));
"should not happen");
#endif #endif
} }
fout.write(static_cast<const char*>(data_ptr), fout.write(static_cast<const char*>(data_ptr),
...@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name,
} }
if (!fout) { if (!fout) {
PADDLE_THROW("Model save failed, data write to model file [%s] error", PADDLE_THROW(platform::errors::Unavailable(
file_name); "Model save failed, error when writing data into model file [%s].",
file_name));
} }
fout.close(); fout.close();
...@@ -316,9 +329,9 @@ bool LoadTensorFromDisk( ...@@ -316,9 +329,9 @@ bool LoadTensorFromDisk(
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) { std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) {
std::ifstream fin(file_name, std::ios::binary); std::ifstream fin(file_name, std::ios::binary);
if (!fin) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("File open error. Can not open model file [%s]", file_name); fin.is_open(), true,
} platform::errors::Unavailable("File (%s) open failed.", file_name));
ReadReserveBuffer(fin); ReadReserveBuffer(fin);
...@@ -331,7 +344,8 @@ bool LoadTensorFromDisk( ...@@ -331,7 +344,8 @@ bool LoadTensorFromDisk(
uint32_t version; uint32_t version;
fin.read(reinterpret_cast<char*>(&version), sizeof(version)); fin.read(reinterpret_cast<char*>(&version), sizeof(version));
CheckInStreamState(fin, sizeof(version)); CheckInStreamState(fin, sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument(
"Only version 0 tensor is supported."));
proto::VarType::TensorDesc desc; proto::VarType::TensorDesc desc;
{ {
// int32_t size // int32_t size
...@@ -344,7 +358,7 @@ bool LoadTensorFromDisk( ...@@ -344,7 +358,7 @@ bool LoadTensorFromDisk(
CheckInStreamState(fin, sizeof(size)); CheckInStreamState(fin, sizeof(size));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
desc.ParseFromArray(buf.get(), size), true, desc.ParseFromArray(buf.get(), size), true,
platform::errors::InvalidArgument("Cannot parse tensor desc")); platform::errors::InvalidArgument("Parse tensor desc failed."));
} }
{ // read tensor { // read tensor
......
...@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows, ...@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
// the 1st field, unit32_t version for SelectedRows // the 1st field, unit32_t version for SelectedRows
uint32_t version; uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version)); is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U,
platform::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
} }
{ {
// the 2st field, rows information // the 2st field, rows information
...@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown, ...@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
auto iter = id_to_index_.find(key); auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) { if (iter == id_to_index_.end()) {
rwlock_->UNLock(); rwlock_->UNLock();
if (!auto_grown) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("key %d not found", key); auto_grown, true,
} platform::errors::NotFound("Input key(%lld) is not found.", key));
rwlock_->WRLock(); rwlock_->WRLock();
auto map_size = id_to_index_.size(); auto map_size = id_to_index_.size();
auto vector_size = rows_.size(); auto vector_size = rows_.size();
if (map_size != vector_size) { if (map_size != vector_size) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"id_to_index_ size %d should have the same size with rows_ %d", "Row map size(%zu) should be equal to rows size(%zu).", map_size,
map_size, vector_size); vector_size));
} }
auto write_iter = id_to_index_.find(key); auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) { if (write_iter == id_to_index_.end()) {
int row_num = rows_.size(); int row_num = rows_.size();
if (row_num == value_->dims()[0]) { if (row_num == value_->dims()[0]) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num); PADDLE_THROW(platform::errors::InvalidArgument(
"Selected rows is full, then length exceed the length of first "
"dimension (%d).",
row_num));
} }
// key logic to put a key into id_to_index_ // key logic to put a key into id_to_index_
rows_.push_back(key); rows_.push_back(key);
...@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() { ...@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() {
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown, bool is_test) { bool auto_grown, bool is_test) {
PADDLE_ENFORCE(value->IsInitialized(), PADDLE_ENFORCE_EQ(value->IsInitialized(), true,
"The value tensor should be initialized."); platform::errors::InvalidArgument(
"The value tensor is not initialized."));
if (ids.numel() == 0) { if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!"; VLOG(3) << "keys is empty, please check data!";
} else { } else {
int64_t value_width = value_->numel() / value_->dims()[0]; int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(
"output tensor should have the same shape with table " value_width, value->numel() / value->dims()[0],
"except the dims[0]."); platform::errors::InvalidArgument(
"Output tensor should have the same shape with table "
"except the first dimmension, excepted value width not counting "
"the first dimension is %d, actual value width is %d.",
value_width, value->numel() / value->dims()[0]));
for (int i = 0; i < ids.numel(); ++i) { for (int i = 0; i < ids.numel(); ++i) {
auto id = ids.data<int64_t>()[i]; auto id = ids.data<int64_t>()[i];
int64_t index = AutoGrownIndex(id, auto_grown, is_test); int64_t index = AutoGrownIndex(id, auto_grown, is_test);
......
...@@ -82,7 +82,8 @@ class SelectedRows { ...@@ -82,7 +82,8 @@ class SelectedRows {
int64_t Index(int64_t key) const { int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key); auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) { if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key); PADDLE_THROW(platform::errors::NotFound(
"Input id (%lld) is not in current rows table.", key));
} }
return static_cast<int64_t>(std::distance(rows_.begin(), it)); return static_cast<int64_t>(std::distance(rows_.begin(), it));
} }
......
...@@ -25,20 +25,22 @@ namespace framework { ...@@ -25,20 +25,22 @@ namespace framework {
std::vector<DDim> InferShapeContext::GetReaderDims( std::vector<DDim> InferShapeContext::GetReaderDims(
const std::string &name) const { const std::string &name) const {
const std::vector<std::string> &arg_names = Inputs(name); const std::vector<std::string> &arg_names = Inputs(name);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
arg_names.size(), 1UL, platform::errors::InvalidArgument(
"Reader input '%s' should hold one element, but now it holds %d", name, "Reader input '%s' should hold one element, but now it "
arg_names.size()); "holds %d elements.",
name, arg_names.size()));
return this->GetRepeatedDims(arg_names[0]); return this->GetRepeatedDims(arg_names[0]);
} }
void InferShapeContext::SetReaderDims(const std::string &name, void InferShapeContext::SetReaderDims(const std::string &name,
const std::vector<DDim> &dims) { const std::vector<DDim> &dims) {
const std::vector<std::string> &arg_names = Outputs(name); const std::vector<std::string> &arg_names = Outputs(name);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
arg_names.size(), 1UL, platform::errors::InvalidArgument(
"Reader output '%s' should hold one element, but now it holds %d", name, "Reader output '%s' should hold one element, but now "
arg_names.size()); "it holds %d elements.",
name, arg_names.size()));
return this->SetRepeatedDims(arg_names[0], dims); return this->SetRepeatedDims(arg_names[0], dims);
} }
......
...@@ -19,13 +19,17 @@ namespace paddle { ...@@ -19,13 +19,17 @@ namespace paddle {
namespace framework { namespace framework {
extern size_t SizeOfType(proto::VarType::Type type); extern size_t SizeOfType(proto::VarType::Type type);
void Tensor::check_memory_size() const { void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(holder_, platform::errors::PreconditionNotMet(
holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); "Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
numel() * SizeOfType(type()), memory_size(), numel() * SizeOfType(type()), memory_size(),
"Tensor's dims_ is out of bound. Call Tensor::mutable_data " platform::errors::PreconditionNotMet(
"first to re-allocate memory.\n" "Tensor's dimension is out of bound."
"or maybe the required data-type mismatches the data already stored."); "Tensor's dimension must be equal or less than the size of its "
"memory."
"But received Tensor's dimension is d%, memory's size is %d.",
numel() * SizeOfType(type()), memory_size()));
} }
Tensor::Tensor(const proto::VarType::Type& dtype) : type_(dtype), offset_(0) {} Tensor::Tensor(const proto::VarType::Type& dtype) : type_(dtype), offset_(0) {}
...@@ -37,15 +41,21 @@ size_t Tensor::memory_size() const { ...@@ -37,15 +41,21 @@ size_t Tensor::memory_size() const {
void* Tensor::mutable_data(const platform::Place& place, void* Tensor::mutable_data(const platform::Place& place,
proto::VarType::Type type, size_t requested_size) { proto::VarType::Type type, size_t requested_size) {
type_ = type; type_ = type;
PADDLE_ENFORCE_GE(numel(), 0, PADDLE_ENFORCE_GE(
"When calling this method, the Tensor's numel must be " numel(), 0,
"equal or larger than zero. " platform::errors::PreconditionNotMet(
"Please check Tensor::dims, or Tensor::Resize has been " "The Tensor's element number must be equal or greater than zero. "
"called first. The Tensor's shape is [", "The Tensor's shape is [",
dims(), "] now"); dims(), "] now"));
size_t size = numel() * SizeOfType(type); size_t size = numel() * SizeOfType(type);
if (requested_size) { if (requested_size) {
PADDLE_ENFORCE_GE(requested_size, size); PADDLE_ENFORCE_GE(
requested_size, size,
platform::errors::InvalidArgument(
"The requested memory size is less than the memory size of Tensor. "
"But received requested memory size is d%, "
"memory size of Tensor is %d.",
requested_size, size));
size = requested_size; size = requested_size;
} }
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
...@@ -62,8 +72,8 @@ void* Tensor::mutable_data(const platform::Place& place, ...@@ -62,8 +72,8 @@ void* Tensor::mutable_data(const platform::Place& place,
void* Tensor::mutable_data(const platform::Place& place, void* Tensor::mutable_data(const platform::Place& place,
size_t requested_size) { size_t requested_size) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(this->holder_, platform::errors::PreconditionNotMet(
this->holder_, "Cannot invoke mutable data if current hold nothing."); "The tensor is not initialized."));
return mutable_data(place, type_, requested_size); return mutable_data(place, type_, requested_size);
} }
...@@ -75,12 +85,20 @@ Tensor& Tensor::ShareDataWith(const Tensor& src) { ...@@ -75,12 +85,20 @@ Tensor& Tensor::ShareDataWith(const Tensor& src) {
Tensor Tensor::Slice(int64_t begin_idx, int64_t end_idx) const { Tensor Tensor::Slice(int64_t begin_idx, int64_t end_idx) const {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE_GE(begin_idx, 0, PADDLE_ENFORCE_GE(
"The start row index must be greater than 0."); begin_idx, 0,
PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound."); platform::errors::OutOfRange("The start row index must be greater than 0."
"But received the start index is d%.",
begin_idx));
PADDLE_ENFORCE_LE(
end_idx, dims_[0],
platform::errors::OutOfRange("The end row index is out of bound."));
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
begin_idx, end_idx, begin_idx, end_idx,
"The start row index must be lesser than the end row index."); platform::errors::InvalidArgument(
"The start row index must be less than the end row index."
"But received the start index = %d, the end index = %d.",
begin_idx, end_idx));
if (dims_[0] == 1) { if (dims_[0] == 1) {
return *this; return *this;
......
...@@ -131,13 +131,17 @@ class Tensor { ...@@ -131,13 +131,17 @@ class Tensor {
const platform::Place& place() const { const platform::Place& place() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::place() is called."); holder_,
platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::place() is called."));
return holder_->place(); return holder_->place();
} }
proto::VarType::Type type() const { proto::VarType::Type type() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::type() is called."); holder_,
platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::type() is called."));
return type_; return type_;
} }
......
...@@ -43,9 +43,13 @@ inline T* Tensor::data() { ...@@ -43,9 +43,13 @@ inline T* Tensor::data() {
check_memory_size(); check_memory_size();
bool valid = bool valid =
std::is_same<T, void>::value || type_ == DataTypeTrait<T>::DataType(); std::is_same<T, void>::value || type_ == DataTypeTrait<T>::DataType();
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
valid, "Tensor holds the wrong type, it holds %s, but desires to be %s", valid, true,
DataTypeToString(type_), DataTypeToString(DataTypeTrait<T>::DataType())); platform::errors::InvalidArgument(
"Tensor holds the wrong type, it holds %s, but desires to be %s",
DataTypeToString(type_),
DataTypeToString(DataTypeTrait<T>::DataType())));
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_); offset_);
} }
...@@ -69,9 +73,12 @@ inline T* Tensor::mutable_data(const platform::Place& place, ...@@ -69,9 +73,12 @@ inline T* Tensor::mutable_data(const platform::Place& place,
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
int rank = src.dims().size(); int rank = src.dims().size();
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
rank, 2, rank, 2, platform::errors::InvalidArgument(
"'ReshapeToMatrix()' is only used for flatten high rank " "'ReshapeToMatrix()' is only used for flatten high rank "
"tensors to matrixs. Can not be used in reshaping vectors."); "tensors to matrixs. The dimensions of Tensor must be "
"greater or equal than 2. "
"But received dimensions of Tensor is %d",
rank));
if (rank == 2) { if (rank == 2) {
return src; return src;
} }
......
...@@ -41,7 +41,7 @@ TEST(Tensor, DataAssert) { ...@@ -41,7 +41,7 @@ TEST(Tensor, DataAssert) {
std::string ex_msg = err.what(); std::string ex_msg = err.what();
EXPECT_TRUE(ex_msg.find("holder_ should not be null") != std::string::npos); EXPECT_TRUE(ex_msg.find("holder_ should not be null") != std::string::npos);
EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call "
"Tensor::mutable_data first.") != "Tensor::mutable_data firstly.") !=
std::string::npos); std::string::npos);
} }
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
...@@ -157,7 +157,7 @@ TEST(Tensor, ShareDataWith) { ...@@ -157,7 +157,7 @@ TEST(Tensor, ShareDataWith) {
EXPECT_TRUE(ex_msg.find("holder_ should not be null") != EXPECT_TRUE(ex_msg.find("holder_ should not be null") !=
std::string::npos); std::string::npos);
EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call "
"Tensor::mutable_data first.") != "Tensor::mutable_data firstly.") !=
std::string::npos); std::string::npos);
} }
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
......
...@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place); auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place);
auto ctx_place = ctx.GetPlace(); auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Source place and context place do not match, source "
"place is %s, context place is %s.",
src_gpu_place, ctx_gpu_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
...@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place); auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace(); auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Destination place and context place do not match, "
"destination place is %s, context place is %s.",
dst_gpu_place, ctx_gpu_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
...@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace(); auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) { if (platform::is_same_place(src_place, dst_place)) {
...@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream); stream);
} else { } else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place."); PADDLE_THROW(platform::errors::Unavailable(
"Context place dose not match the source and destination place."));
} }
} }
} }
else { // NOLINT else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); PADDLE_THROW(platform::errors::Unimplemented(
"Copying from %s to %s is not supported.", src_place, dst_place));
} }
#endif #endif
} }
...@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
nullptr); nullptr);
} }
else { // NOLINT else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
} }
#endif #endif
} }
...@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor, ...@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst, void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
const platform::Place& dst_place) { const platform::Place& dst_place) {
// vector types not currently supported // vector types not currently supported
PADDLE_ENFORCE_LE(type.lanes, 1, "vector types not currently supported"); PADDLE_ENFORCE_LE(type.lanes, 1,
platform::errors::Unimplemented(
"Vector type is not supported currently."));
switch (type.bits) { switch (type.bits) {
case 8: case 8:
...@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst, ...@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
return static_cast<void*>(dst->mutable_data<int8_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int8_t>(dst_place));
if (type.code == kDLUInt) if (type.code == kDLUInt)
return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place)); return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 16: case 16:
if (type.code == kDLInt) if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int16_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int16_t>(dst_place));
if (type.code == kDLFloat) if (type.code == kDLFloat)
return static_cast<void*>( return static_cast<void*>(
dst->mutable_data<paddle::platform::float16>(dst_place)); dst->mutable_data<paddle::platform::float16>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 32: case 32:
if (type.code == kDLInt) if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int32_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int32_t>(dst_place));
if (type.code == kDLFloat) if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<float>(dst_place)); return static_cast<void*>(dst->mutable_data<float>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 64: case 64:
if (type.code == kDLInt) if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int64_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int64_t>(dst_place));
if (type.code == kDLFloat) if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<double>(dst_place)); return static_cast<void*>(dst->mutable_data<double>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
default: default:
PADDLE_THROW("Unsupport type.bits %d", type.bits); PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported DLDataType.bits %d.", type.bits));
} }
} }
......
...@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) { ...@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
dst->resize(src.numel()); dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data()); auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true); PADDLE_ENFORCE_EQ(
platform::is_cpu_place(src.place()), true,
platform::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
src.place()));
memory::Copy(dst_place, dst_ptr, memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size); BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);
......
...@@ -42,7 +42,8 @@ void ThreadPool::Init() { ...@@ -42,7 +42,8 @@ void ThreadPool::Init() {
num_threads = FLAGS_dist_threadpool_size; num_threads = FLAGS_dist_threadpool_size;
VLOG(1) << "set dist_threadpool_size to " << num_threads; VLOG(1) << "set dist_threadpool_size to " << num_threads;
} }
PADDLE_ENFORCE_GT(num_threads, 0); PADDLE_ENFORCE_GT(num_threads, 0, platform::errors::InvalidArgument(
"The number of threads is 0."));
threadpool_.reset(new ThreadPool(num_threads)); threadpool_.reset(new ThreadPool(num_threads));
} }
} }
...@@ -83,7 +84,8 @@ void ThreadPool::TaskLoop() { ...@@ -83,7 +84,8 @@ void ThreadPool::TaskLoop() {
} }
if (tasks_.empty()) { if (tasks_.empty()) {
PADDLE_THROW("This thread has no task to Run"); PADDLE_THROW(platform::errors::Unavailable(
"Current thread has no task to Run."));
} }
// pop a task from the task queue // pop a task from the task queue
......
...@@ -91,7 +91,8 @@ class ThreadPool { ...@@ -91,7 +91,8 @@ class ThreadPool {
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
if (!running_) { if (!running_) {
PADDLE_THROW("enqueue on stopped ThreadPool"); PADDLE_THROW(platform::errors::Unavailable(
"Task is enqueued into stopped ThreadPool."));
} }
tasks_.push(std::move(task)); tasks_.push(std::move(task));
} }
......
...@@ -43,8 +43,9 @@ void VarDesc::SetTensorDescNum(size_t num) { ...@@ -43,8 +43,9 @@ void VarDesc::SetTensorDescNum(size_t num) {
} break; } break;
default: default:
PADDLE_THROW( PADDLE_THROW(
"Setting 'sub_tensor_number' is not supported by the type of var %s.", platform::errors::Unavailable("Setting 'sub_tensor_number' is not "
this->Name()); "supported by the %s type variable.",
this->Name()));
} }
} }
...@@ -55,8 +56,9 @@ size_t VarDesc::GetTensorDescNum() const { ...@@ -55,8 +56,9 @@ size_t VarDesc::GetTensorDescNum() const {
break; break;
default: default:
PADDLE_THROW( PADDLE_THROW(
"Getting 'sub_tensor_number' is not supported by the type of var %s.", platform::errors::Unavailable("Getting 'sub_tensor_number' is not "
this->Name()); "supported by the %s type variable.",
this->Name()));
} }
} }
...@@ -133,9 +135,9 @@ void VarDesc::SetLoDLevel(int32_t lod_level) { ...@@ -133,9 +135,9 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
desc_.mutable_type()->mutable_tensor_array()->set_lod_level(lod_level); desc_.mutable_type()->mutable_tensor_array()->set_lod_level(lod_level);
break; break;
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Setting 'lod_level' is not supported by the type of var %s.", "Setting 'lod_level' is not supported by the %s type variable.",
this->Name()); this->Name()));
} }
} }
...@@ -157,9 +159,9 @@ void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) { ...@@ -157,9 +159,9 @@ void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
} }
} break; } break;
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Setting 'lod_levels' is not supported by the type of var %s.", "Setting 'lod_levels' is not supported by the %s type variable",
this->Name()); this->Name()));
} }
} }
...@@ -170,9 +172,9 @@ int32_t VarDesc::GetLoDLevel() const { ...@@ -170,9 +172,9 @@ int32_t VarDesc::GetLoDLevel() const {
case proto::VarType::LOD_TENSOR_ARRAY: case proto::VarType::LOD_TENSOR_ARRAY:
return desc_.type().tensor_array().lod_level(); return desc_.type().tensor_array().lod_level();
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Getting 'lod_level' is not supported by the type of var %s.", "Getting 'lod_level' is not supported by the %s type variable.",
this->Name()); this->Name()));
} }
} }
...@@ -187,15 +189,19 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const { ...@@ -187,15 +189,19 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
return res; return res;
break; break;
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Getting 'lod_levels' is not supported by the type of var %s.", "Getting 'lod_levels' is not supported by the %s type variable.",
this->Name()); this->Name()));
} }
} }
const proto::VarType::TensorDesc &VarDesc::tensor_desc() const { const proto::VarType::TensorDesc &VarDesc::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set."); desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
PADDLE_ENFORCE_EQ(
desc_.type().has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
switch (desc_.type().type()) { switch (desc_.type().type()) {
case proto::VarType::SELECTED_ROWS: case proto::VarType::SELECTED_ROWS:
return desc_.type().selected_rows(); return desc_.type().selected_rows();
...@@ -204,14 +210,16 @@ const proto::VarType::TensorDesc &VarDesc::tensor_desc() const { ...@@ -204,14 +210,16 @@ const proto::VarType::TensorDesc &VarDesc::tensor_desc() const {
case proto::VarType::LOD_TENSOR_ARRAY: case proto::VarType::LOD_TENSOR_ARRAY:
return desc_.type().tensor_array().tensor(); return desc_.type().tensor_array().tensor();
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Getting 'tensor_desc' is not supported by the type of var %s.", "Getting 'tensor_desc' is not supported by the %s type variable.",
this->Name()); this->Name()));
} }
} }
std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const { std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set."); PADDLE_ENFORCE_EQ(
desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
std::vector<proto::VarType::TensorDesc> res; std::vector<proto::VarType::TensorDesc> res;
res.reserve(GetTensorDescNum()); res.reserve(GetTensorDescNum());
switch (desc_.type().type()) { switch (desc_.type().type()) {
...@@ -221,16 +229,19 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const { ...@@ -221,16 +229,19 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
} }
return res; return res;
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Getting 'tensor_descs' is not supported by the type of var " "Getting 'tensor_descs' is not supported by the %s type variable.",
"%s.", this->Name()));
this->Name());
} }
} }
proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() { proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set."); desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
PADDLE_ENFORCE_EQ(
desc_.type().has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
switch (desc_.type().type()) { switch (desc_.type().type()) {
case proto::VarType::SELECTED_ROWS: case proto::VarType::SELECTED_ROWS:
return desc_.mutable_type()->mutable_selected_rows(); return desc_.mutable_type()->mutable_selected_rows();
...@@ -240,15 +251,19 @@ proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() { ...@@ -240,15 +251,19 @@ proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() {
return desc_.mutable_type()->mutable_tensor_array()->mutable_tensor(); return desc_.mutable_type()->mutable_tensor_array()->mutable_tensor();
default: default:
PADDLE_THROW( PADDLE_THROW(
"Getting 'mutable_tensor_desc' is not supported by the type of var " platform::errors::Unavailable("Getting 'mutable_tensor_desc' is not "
"%s.", "supported by the %s type variable.",
this->Name()); this->Name()));
} }
} }
std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() { std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set."); desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
PADDLE_ENFORCE_EQ(
desc_.type().has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
std::vector<proto::VarType::TensorDesc *> res; std::vector<proto::VarType::TensorDesc *> res;
res.reserve(GetTensorDescNum()); res.reserve(GetTensorDescNum());
switch (desc_.type().type()) { switch (desc_.type().type()) {
...@@ -259,10 +274,9 @@ std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() { ...@@ -259,10 +274,9 @@ std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() {
} }
return res; return res;
default: default:
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Getting 'tensor_descs' is not supported by the type of var " "Getting 'tensor_descs' is not supported by the %s type variable.",
"%s.", this->Name()));
this->Name());
} }
} }
......
...@@ -40,7 +40,8 @@ inline proto::VarType::Type ToVarType(int type) { ...@@ -40,7 +40,8 @@ inline proto::VarType::Type ToVarType(int type) {
case proto::VarType::READER: case proto::VarType::READER:
return static_cast<proto::VarType::Type>(type); return static_cast<proto::VarType::Type>(type);
default: default:
PADDLE_THROW("ToVarType:Unsupported type %d", type); PADDLE_THROW(platform::errors::Unavailable(
"ToVarType method Unsupported type %d.", type));
} }
} }
...@@ -66,7 +67,8 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) { ...@@ -66,7 +67,8 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
visitor(var.Get<FetchList>()); visitor(var.Get<FetchList>());
return; return;
default: default:
PADDLE_THROW("Not supported visit type, %s", ToTypeName(var.Type())); PADDLE_THROW(platform::errors::Unavailable("Not supported visit type %s.",
ToTypeName(var.Type())));
} }
} }
......
...@@ -46,12 +46,14 @@ struct VarIdToTypeIndexMapInitializerImpl { ...@@ -46,12 +46,14 @@ struct VarIdToTypeIndexMapInitializerImpl {
static_assert(!std::is_same<Type, void>::value, "Type cannot be void"); static_assert(!std::is_same<Type, void>::value, "Type cannot be void");
constexpr int kId = VarTypeTrait<Type>::kId; constexpr int kId = VarTypeTrait<Type>::kId;
auto type = std::type_index(typeid(Type)); auto type = std::type_index(typeid(Type));
PADDLE_ENFORCE(id_to_type->count(kId) == 0, PADDLE_ENFORCE_EQ(
"Registered duplicate type id %d for type %s", kId, id_to_type->count(kId), 0,
type.name()); platform::errors::AlreadyExists(
PADDLE_ENFORCE(type_to_id->count(type) == 0, "Registered duplicate type id %d for type %s.", kId, type.name()));
"Registered duplicate type_index %s for id %d", type.name(), PADDLE_ENFORCE_EQ(
kId); type_to_id->count(type), 0,
platform::errors::AlreadyExists(
"Registered duplicate type index %s for id %d.", type.name(), kId));
id_to_type->emplace(kId, type); id_to_type->emplace(kId, type);
type_to_id->emplace(type, kId); type_to_id->emplace(type, kId);
VarIdToTypeIndexMapInitializerImpl<kStart + 1, kEnd, VarIdToTypeIndexMapInitializerImpl<kStart + 1, kEnd,
...@@ -79,15 +81,17 @@ struct VarIdToTypeIndexMapHolder { ...@@ -79,15 +81,17 @@ struct VarIdToTypeIndexMapHolder {
public: public:
static const std::type_index &ToTypeIndex(int var_id) { static const std::type_index &ToTypeIndex(int var_id) {
auto it = Instance().id_to_type_map_.find(var_id); auto it = Instance().id_to_type_map_.find(var_id);
PADDLE_ENFORCE(it != Instance().id_to_type_map_.end(), PADDLE_ENFORCE_NE(it, Instance().id_to_type_map_.end(),
"VarId %d is not registered.", var_id); platform::errors::NotFound(
"Variable Id %d is not registered.", var_id));
return it->second; return it->second;
} }
static int ToTypeId(const std::type_index &type) { static int ToTypeId(const std::type_index &type) {
auto it = Instance().type_to_id_map_.find(type); auto it = Instance().type_to_id_map_.find(type);
PADDLE_ENFORCE(it != Instance().type_to_id_map_.end(), PADDLE_ENFORCE_NE(it, Instance().type_to_id_map_.end(),
"VarType %s is not registered.", type.name()); platform::errors::NotFound(
"Variable Type %s is not registered.", type.name()));
return it->second; return it->second;
} }
......
...@@ -50,11 +50,11 @@ void InitializeVariable(Variable *var, proto::VarType::Type var_type) { ...@@ -50,11 +50,11 @@ void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
} else if (var_type == proto::VarType::RAW) { } else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in operator // GetMutable will be called in operator
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Variable type %d is not in " "Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, " "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, RAW]", "LOD_RANK_TABLE, PLACE_LIST, READER, RAW].",
var_type); var_type));
} }
} }
...@@ -76,7 +76,8 @@ void CopyVariable(const Variable &src_var, Variable *dst_var) { ...@@ -76,7 +76,8 @@ void CopyVariable(const Variable &src_var, Variable *dst_var) {
auto *dst_t = tmp_grad_slr->mutable_value(); auto *dst_t = tmp_grad_slr->mutable_value();
framework::TensorCopy(src_t, cpu_place, dst_t); framework::TensorCopy(src_t, cpu_place, dst_t);
} else { } else {
PADDLE_THROW("unknown var type to copy"); PADDLE_THROW(
platform::errors::Unavailable("Unknown variable type to copy."));
} }
} }
......
...@@ -27,8 +27,9 @@ Analyzer::Analyzer() {} ...@@ -27,8 +27,9 @@ Analyzer::Analyzer() {}
void Analyzer::Run(Argument *argument) { RunAnalysis(argument); } void Analyzer::Run(Argument *argument) { RunAnalysis(argument); }
void Analyzer::RunAnalysis(Argument *argument) { void Analyzer::RunAnalysis(Argument *argument) {
PADDLE_ENFORCE(argument->analysis_passes_valid(), PADDLE_ENFORCE_EQ(argument->analysis_passes_valid(), true,
"analsis_passes is not valid in the argument."); platform::errors::InvalidArgument(
"analsis_passes is not valid in the argument."));
const bool disable_logs = argument->disable_logs(); const bool disable_logs = argument->disable_logs();
for (auto &pass : argument->analysis_passes()) { for (auto &pass : argument->analysis_passes()) {
if (!disable_logs) { if (!disable_logs) {
...@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) { ...@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) {
continue; continue;
auto *ptr = PassRegistry::Global().Retreive(pass); auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass); PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet(
"no analysis pass called %s", pass));
ptr->Run(argument); ptr->Run(argument);
} }
} }
......
...@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) { ...@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) {
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
CHECK(predictor->Run(slots, &outputs)); CHECK(predictor->Run(slots, &outputs));
PADDLE_ENFORCE_EQ(outputs.size(), 1UL); PADDLE_ENFORCE_EQ(outputs.size(), 1UL,
platform::errors::PreconditionNotMet(
"Output size should be 1, but got %d", outputs.size()));
// Check the output buffer size and result of each tid. // Check the output buffer size and result of each tid.
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL); PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL,
platform::errors::PreconditionNotMet(
"Output's data length should be 33168 but got %d",
outputs.front().data.length()));
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
......
...@@ -76,53 +76,62 @@ struct Argument { ...@@ -76,53 +76,62 @@ struct Argument {
} }
} }
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ #define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \ public: \
type__& field__() { \ type__& field__() { \
PADDLE_ENFORCE(Has(#field__), "There is no such field"); \ PADDLE_ENFORCE_EQ( \
return field__##_; \ Has(#field__), true, \
} \ platform::errors::PreconditionNotMet("There is no such field")); \
void Set##Field(const type__& x) { \ return field__##_; \
field__##_ = x; \ } \
valid_fields_.insert(#field__); \ void Set##Field(const type__& x) { \
} \ field__##_ = x; \
DECL_ARGUMENT_FIELD_VALID(field__); \ valid_fields_.insert(#field__); \
type__* field__##_ptr() { return &field__##_; } \ } \
\ DECL_ARGUMENT_FIELD_VALID(field__); \
private: \ type__* field__##_ptr() { return &field__##_; } \
\
private: \
type__ field__##_; type__ field__##_;
#define DECL_ARGUMENT_FIELD_VALID(field__) \ #define DECL_ARGUMENT_FIELD_VALID(field__) \
bool field__##_valid() { return Has(#field__); } bool field__##_valid() { return Has(#field__); }
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \ #define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \ public: \
type__& field__() { \ type__& field__() { \
PADDLE_ENFORCE_NOT_NULL(field__##_); \ PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \
PADDLE_ENFORCE(Has(#field__)); \ "filed should not be null.")); \
return *static_cast<type__*>(field__##_.get()); \ PADDLE_ENFORCE_EQ( \
} \ Has(#field__), true, \
void Set##Field(type__* x) { \ platform::errors::PreconditionNotMet("There is no such field")); \
field__##_ = \ return *static_cast<type__*>(field__##_.get()); \
unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \ } \
valid_fields_.insert(#field__); \ void Set##Field(type__* x) { \
} \ field__##_ = \
void Set##Field##NotOwned(type__* x) { \ unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \
valid_fields_.insert(#field__); \ valid_fields_.insert(#field__); \
field__##_ = unique_ptr_t(x, [](void* x) {}); \ } \
} \ void Set##Field##NotOwned(type__* x) { \
DECL_ARGUMENT_FIELD_VALID(field__); \ valid_fields_.insert(#field__); \
type__* field__##_ptr() { \ field__##_ = unique_ptr_t(x, [](void* x) {}); \
PADDLE_ENFORCE(Has(#field__)); \ } \
return static_cast<type__*>(field__##_.get()); \ DECL_ARGUMENT_FIELD_VALID(field__); \
} \ type__* field__##_ptr() { \
type__* Release##Field() { \ PADDLE_ENFORCE_EQ( \
PADDLE_ENFORCE(Has(#field__)); \ Has(#field__), true, \
valid_fields_.erase(#field__); \ platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.release()); \ return static_cast<type__*>(field__##_.get()); \
} \ } \
\ type__* Release##Field() { \
private: \ PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
\
private: \
unique_ptr_t field__##_; unique_ptr_t field__##_;
DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int);
...@@ -218,13 +227,19 @@ struct Argument { ...@@ -218,13 +227,19 @@ struct Argument {
DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t); DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
int);
private: private:
std::unordered_set<std::string> valid_fields_; std::unordered_set<std::string> valid_fields_;
}; };
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \ #define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
PADDLE_ENFORCE(argument__->Has(#fieldname__), \ PADDLE_ENFORCE_EQ( \
"the argument field [%s] should be set", #fieldname__); argument__->Has(#fieldname__), true, \
platform::errors::PreconditionNotMet( \
"the argument field [%s] should be set", #fieldname__));
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
......
...@@ -73,12 +73,15 @@ struct DataTypeNamer { ...@@ -73,12 +73,15 @@ struct DataTypeNamer {
template <typename T> template <typename T>
const std::string &repr() const { const std::string &repr() const {
auto x = std::type_index(typeid(T)); auto x = std::type_index(typeid(T));
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation"); PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(x); return dic_.at(x);
} }
const std::string &repr(const std::type_index &type) const { // NOLINT const std::string &repr(const std::type_index &type) const { // NOLINT
PADDLE_ENFORCE(dic_.count(type), "unknown type for representation"); PADDLE_ENFORCE_GT(dic_.count(type), 0,
platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(type); return dic_.at(type);
} }
...@@ -116,7 +119,9 @@ template <typename T> ...@@ -116,7 +119,9 @@ template <typename T>
class OrderedRegistry { class OrderedRegistry {
public: public:
T *Register(const std::string &name, T *x) { T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name); PADDLE_ENFORCE_EQ(dic_.count(name), 0,
platform::errors::PreconditionNotMet(
"There exists duplicate key [%s]", name));
dic_[name] = elements_.size(); dic_[name] = elements_.size();
elements_.emplace_back(std::unique_ptr<T>(x)); elements_.emplace_back(std::unique_ptr<T>(x));
return elements_.back().get(); return elements_.back().get();
...@@ -136,14 +141,20 @@ class OrderedRegistry { ...@@ -136,14 +141,20 @@ class OrderedRegistry {
template <typename T> template <typename T>
T &GetFromScope(const framework::Scope &scope, const std::string &name) { T &GetFromScope(const framework::Scope &scope, const std::string &name) {
framework::Variable *var = scope.FindVar(name); framework::Variable *var = scope.FindVar(name);
PADDLE_ENFORCE(var != nullptr); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"The var which name is %s should not be nullptr.", name));
return *var->GetMutable<T>(); return *var->GetMutable<T>();
} }
static framework::proto::ProgramDesc LoadProgramDesc( static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) { const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary); std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(fin.is_open(), "Cannot open file %s", model_path); PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end); fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' '); std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg); fin.seekg(0, std::ios::beg);
...@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) { ...@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) {
static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) { static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) {
std::string opt_cache_dir = model_root + "/_opt_cache/"; std::string opt_cache_dir = model_root + "/_opt_cache/";
if (!PathExists(opt_cache_dir)) { if (!PathExists(opt_cache_dir)) {
PADDLE_ENFORCE(MKDIR(opt_cache_dir.c_str()) != -1, PADDLE_ENFORCE_NE(
"Can not create optimize cache directory: %s, Make sure you " MKDIR(opt_cache_dir.c_str()), -1,
"have permission to write", platform::errors::PreconditionNotMet(
opt_cache_dir); "Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
opt_cache_dir));
} }
return opt_cache_dir; return opt_cache_dir;
} }
......
...@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) { ...@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) {
graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program())); graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
if (argument->Has("scope")) { if (argument->Has("scope")) {
auto *scope_ptr = argument->scope_ptr(); auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr); PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
} }
...@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument,
std::string optim_cache_dir = argument->optim_cache_dir(); std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid = bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8); !(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid, PADDLE_ENFORCE_EQ(
"When you are in TRT INT8 mode, and load model from " int8_valid, true,
"memory, you should set optim_cache_dir using " platform::errors::PreconditionNotMet(
"config.SetOptimCacheDir()"); "When you are in TRT INT8 mode, and load model from "
PADDLE_ENFORCE(!(model_from_memory && use_static_engine), "memory, you should set optim_cache_dir using "
"When you are using Paddle-TRT, and also using load model " "config.SetOptimCacheDir()"));
"from memory, you should set the use_static to false."); PADDLE_ENFORCE_EQ(
!(model_from_memory && use_static_engine), true,
platform::errors::PreconditionNotMet(
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false."));
if (!optim_cache_dir.empty()) { if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir)); pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
...@@ -150,6 +156,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -150,6 +156,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu())); pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size", pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size())); new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
} }
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") { if (pass_name == "fc_fuse_pass") {
......
...@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool enable_int8 = Get<bool>("enable_int8"); bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu"); bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size"); int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
lite_api::TargetType target_type; lite_api::TargetType target_type;
if (use_gpu) { if (use_gpu) {
...@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the // Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects // input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible. // whether tensor sharing is feasible.
paddle::lite::Place({target_type, precision_type}), paddle::lite_api::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kInt64)}), paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite::Place({target_type, PRECISION(kFloat)}), paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
}; };
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size; config.xpu_l3_workspace_size = xpu_l3_workspace_size;
if (dump_model) { if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./model.bin", config.model);
......
...@@ -123,7 +123,9 @@ void RenameAndGetOutputs( ...@@ -123,7 +123,9 @@ void RenameAndGetOutputs(
auto add_block_var = [&](const std::string &graph_arg, auto add_block_var = [&](const std::string &graph_arg,
const std::string &block_arg) { const std::string &block_arg) {
auto arg_var_node = graph_var_map.find(graph_arg); auto arg_var_node = graph_var_map.find(graph_arg);
PADDLE_ENFORCE(arg_var_node != graph_var_map.end()); PADDLE_ENFORCE_NE(arg_var_node, graph_var_map.end(),
platform::errors::InvalidArgument(
"Can not find %s in graph_var_map", graph_arg));
auto *var_t = block_desc->Var(block_arg); auto *var_t = block_desc->Var(block_arg);
var_t->SetShape(arg_var_node->second->Var()->GetShape()); var_t->SetShape(arg_var_node->second->Var()->GetShape());
var_t->SetDataType(arg_var_node->second->Var()->GetDataType()); var_t->SetDataType(arg_var_node->second->Var()->GetDataType());
...@@ -133,7 +135,10 @@ void RenameAndGetOutputs( ...@@ -133,7 +135,10 @@ void RenameAndGetOutputs(
framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); framework::proto::OpDesc *op = block_desc->Op(index)->Proto();
framework::OpDesc op_desc(*op, nullptr); framework::OpDesc op_desc(*op, nullptr);
auto correspond_node = subgraph_nodes[index]; auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type(),
platform::errors::PreconditionNotMet(
"We should get %s, but get %s", op->type(),
correspond_node->Name()));
std::unordered_map<std::string, size_t> var2id; std::unordered_map<std::string, size_t> var2id;
std::unordered_map<std::string, framework::ir::Node *> in_vars; std::unordered_map<std::string, framework::ir::Node *> in_vars;
......
...@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::vector<std::string> *repetitive_params) const { std::vector<std::string> *repetitive_params) const {
auto *op_desc = node->Op(); auto *op_desc = node->Op();
auto &subgraph = *framework::ir::Agent(node).subgraph(); auto &subgraph = *framework::ir::Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty()); PADDLE_ENFORCE_EQ(subgraph.empty(), false,
platform::errors::PreconditionNotMet(
"The subgraph should not be empty."));
framework::ProgramDesc *program_desc = framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program"); Get<framework::ProgramDesc *>("program");
...@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// to Tensor. // to Tensor.
std::vector<std::string> output_mapping; std::vector<std::string> output_mapping;
for (auto name : output_names) { for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0); PADDLE_ENFORCE_NE(output_name_map.count(name), 0,
platform::errors::PreconditionNotMet(
"The output_name_map should have %s", name));
output_mapping.push_back(output_name_map[name]); output_mapping.push_back(output_name_map[name]);
} }
PADDLE_ENFORCE(!output_mapping.empty()); PADDLE_ENFORCE_EQ(output_mapping.empty(), false,
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), platform::errors::PreconditionNotMet(
"the block has no var-desc"); "The output_mapping should not be empty."));
PADDLE_ENFORCE_EQ(
!block_desc.Proto()->vars().empty(), true,
platform::errors::PreconditionNotMet("the block has no var-desc"));
// Set attrs // Set attrs
op_desc->SetType("tensorrt_engine"); op_desc->SetType("tensorrt_engine");
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include <memory>
#include <utility>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h"
...@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) { ...@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
// Apply passes. // Apply passes.
IRPassManager the_ir_manager(argument); IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph)); graph = the_ir_manager.Apply(std::move(graph));
PADDLE_ENFORCE_GT(graph->Nodes().size(), 0); PADDLE_ENFORCE_GT(
graph->Nodes().size(), 0,
platform::errors::PreconditionNotMet(
"The graph nodes size should be greater than 0, but got 0"));
argument->SetMainGraph(graph.release()); argument->SetMainGraph(graph.release());
CollectFusionStatis(argument); CollectFusionStatis(argument);
} }
......
...@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if (!argument->scope_valid()) { if (!argument->scope_valid()) {
argument->SetScope(new framework::Scope); argument->SetScope(new framework::Scope);
} }
PADDLE_ENFORCE(argument->use_gpu_valid()); PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
// The load program should run on the same device with the inference program, // The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying // so that the parameters will on the same device, or they will keep copying
...@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument->model_from_memory_valid() && argument->model_from_memory()); argument->model_from_memory_valid() && argument->model_from_memory());
argument->SetMainProgram(program.release()); argument->SetMainProgram(program.release());
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::PreconditionNotMet(
"either model_dir or (program path and parameter path) should be set."); "either model_dir or (program path and parameter path) should be "
"set."));
} }
auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program())); auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program()));
argument->SetMainGraph(graph.release()); argument->SetMainGraph(graph.release());
auto *scope_ptr = argument->scope_ptr(); auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr); PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
} }
......
...@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) { ...@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) {
std::unordered_set<const framework::ir::Node*> invalid_nodes; std::unordered_set<const framework::ir::Node*> invalid_nodes;
int valid_op = 0; int valid_op = 0;
for (auto* node : graph.Nodes()) { for (auto* node : graph.Nodes()) {
PADDLE_ENFORCE_NOT_NULL(node); PADDLE_ENFORCE_NOT_NULL(node, platform::errors::PreconditionNotMet(
"The node should not be nullptr."));
if (is_valid_node(node)) { if (is_valid_node(node)) {
invalid_nodes.insert(node); invalid_nodes.insert(node);
} else if (node->IsOp()) { } else if (node->IsOp()) {
......
...@@ -23,8 +23,12 @@ namespace inference { ...@@ -23,8 +23,12 @@ namespace inference {
namespace analysis { namespace analysis {
void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE(argument->scope_valid()); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(argument->use_gpu_valid()); argument->scope_valid(), true,
platform::errors::PreconditionNotMet("The scope field should be valid"));
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
platform::Place place; platform::Place place;
...@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
LOG(INFO) << "Sync params from CPU to GPU"; LOG(INFO) << "Sync params from CPU to GPU";
PADDLE_ENFORCE(argument->gpu_device_id_valid()); PADDLE_ENFORCE_EQ(argument->gpu_device_id_valid(), true,
platform::errors::PreconditionNotMet(
"The gpu_device_id field should be valid"));
place = platform::CUDAPlace(argument->gpu_device_id()); place = platform::CUDAPlace(argument->gpu_device_id());
auto *scope = argument->scope_ptr(); auto *scope = argument->scope_ptr();
...@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
continue; continue;
} }
auto *var = scope->FindLocalVar(var_name); auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE(var != nullptr); PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"The var should not be nullptr"));
if (var->IsType<framework::LoDTensor>() || if (var->IsType<framework::LoDTensor>() ||
var->IsType<framework::Tensor>()) { var->IsType<framework::Tensor>()) {
auto *t = var->GetMutable<framework::LoDTensor>(); auto *t = var->GetMutable<framework::LoDTensor>();
......
...@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse( ...@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse(
// modify the graph // modify the graph
for (auto input_node : node->inputs) { for (auto input_node : node->inputs) {
PADDLE_ENFORCE(input_node->IsVar()); PADDLE_ENFORCE_EQ(input_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The input node should be a variable."));
std::string input_node_name = input_node->Name(); std::string input_node_name = input_node->Name();
if (reuse_table.count(input_node_name) && if (reuse_table.count(input_node_name) &&
reuse_table.at(input_node_name) != input_node_name) { reuse_table.at(input_node_name) != input_node_name) {
...@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse( ...@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse(
// modify the graph // modify the graph
for (auto out_node : node->outputs) { for (auto out_node : node->outputs) {
PADDLE_ENFORCE(out_node->IsVar()); PADDLE_ENFORCE_EQ(out_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The output node should be a variable."));
std::string out_node_name = out_node->Name(); std::string out_node_name = out_node->Name();
if (reuse_table.count(out_node_name) && if (reuse_table.count(out_node_name) &&
reuse_table.at(out_node_name) != out_node_name) { reuse_table.at(out_node_name) != out_node_name) {
......
...@@ -53,12 +53,10 @@ if(WITH_TESTING) ...@@ -53,12 +53,10 @@ if(WITH_TESTING)
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_fluid_shared inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_fluid_shared
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${PYTHON_TESTS_DIR}/book) ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${PYTHON_TESTS_DIR}/book)
set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification) set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
set_tests_properties(test_api_impl PROPERTIES LABELS "RUN_TYPE=DIST")
elseif(WIN32) elseif(WIN32)
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps} inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${PYTHON_TESTS_DIR}/book) ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${PYTHON_TESTS_DIR}/book)
set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification) set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
set_tests_properties(test_api_impl PROPERTIES LABELS "RUN_TYPE=DIST")
endif() endif()
endif() endif()
......
...@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() { ...@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet."); platform::errors::PreconditionNotMet(
"MkldnnQuantizer was not enabled yet."));
return mkldnn_quantizer_config_.get(); return mkldnn_quantizer_config_.get();
} }
......
...@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope( ...@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope(
if (parent_scope) { if (parent_scope) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
parent_scope, parent_scope,
"Both program and parent_scope should be set in Clone mode."); platform::errors::PreconditionNotMet(
"Both program and parent_scope should be set in Clone mode."));
scope_ = parent_scope; scope_ = parent_scope;
status_is_cloned_ = true; status_is_cloned_ = true;
} else { } else {
...@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() { ...@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare(sub_scope_, *inference_program_, 0, executor_->Prepare(sub_scope_, *inference_program_, 0,
config_.use_feed_fetch_ops_); config_.use_feed_fetch_ops_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_); PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
return true; return true;
} }
...@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
timer.tic(); timer.tic();
// set feed variable // set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get(); framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(scope, "The scope should not be nullptr."); PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) { if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed"; LOG(ERROR) << "fail to set feed";
return false; return false;
...@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetches_.size()); outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) { for (size_t i = 0; i < fetches_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col")); int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i); PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
...@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() {
if (!config_.model_dir().empty()) { if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir()); argument_.SetModelDir(config_.model_dir());
} else { } else {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(config_.params_file().empty(), false,
!config_.params_file().empty(), platform::errors::PreconditionNotMet(
"Either model_dir or (param_file, prog_file) should be set."); "Either model_dir or param_file should be set."));
PADDLE_ENFORCE(!config_.prog_file().empty()); PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or prog_file should be set."));
std::string dir = inference::analysis::GetDirRoot(config_.prog_file()); std::string dir = inference::analysis::GetDirRoot(config_.prog_file());
argument_.SetModelProgramPath(config_.prog_file()); argument_.SetModelProgramPath(config_.prog_file());
...@@ -461,6 +471,8 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -461,6 +471,8 @@ void AnalysisPredictor::PrepareArgument() {
} }
if (config_.lite_engine_enabled()) { if (config_.lite_engine_enabled()) {
argument_.SetCpuMathLibraryNumThreads(
config_.cpu_math_library_num_threads());
argument_.SetLitePrecisionMode(config_.lite_precision_mode_); argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_); argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_); argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
...@@ -501,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -501,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
PrepareArgument(); PrepareArgument();
Analyzer().Run(&argument_); Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid()); PADDLE_ENFORCE_EQ(
argument_.scope_valid(), true,
platform::errors::InvalidArgument("The argument scope should be valid."));
VLOG(5) << "to prepare executor"; VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
inference_program_.reset( inference_program_.reset(
...@@ -523,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -523,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
FLAGS_minloglevel = 2; // GLOG_ERROR FLAGS_minloglevel = 2; // GLOG_ERROR
} }
VLOG(3) << "create AnalysisConfig"; VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE(config.is_valid(), PADDLE_ENFORCE_EQ(
"Note: Each config can only be used for one predictor."); config.is_valid(), true,
platform::errors::InvalidArgument(
"Note: Each config can only be used for one predictor."));
if (config.use_gpu()) { if (config.use_gpu()) {
static std::once_flag gflags_initialized; static std::once_flag gflags_initialized;
...@@ -621,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() { ...@@ -621,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() {
} }
void AnalysisPredictor::PrepareFeedFetch() { void AnalysisPredictor::PrepareFeedFetch() {
PADDLE_ENFORCE_NOT_NULL(sub_scope_); PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::InvalidArgument(
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_); CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) { for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") { if (op->Type() == "feed") {
...@@ -644,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() { ...@@ -644,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() {
} }
void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) { void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope); PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument(
"The scope should not be nullptr."));
auto *var = scope->Var("feed"); auto *var = scope->Var("feed");
var->GetMutable<framework::FeedList>(); var->GetMutable<framework::FeedList>();
var = scope->Var("fetch"); var = scope->Var("fetch");
...@@ -665,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() { ...@@ -665,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() {
std::vector<std::string> names = GetInputNames(); std::vector<std::string> names = GetInputNames();
for (std::string name : names) { for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name); auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "input %s does not exist.", name); PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"Input %s does not exist.", name));
input_shapes[name] = var->GetShape(); input_shapes[name] = var->GetShape();
} }
return input_shapes; return input_shapes;
...@@ -681,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() { ...@@ -681,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor( std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) { const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"The variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res( std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope()))); new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = true; res->input_or_output_ = true;
...@@ -698,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor( ...@@ -698,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor( std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
const std::string &name) { const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"he variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res( std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope()))); new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = false; res->input_or_output_ = false;
...@@ -759,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() { ...@@ -759,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() {
std::string pb_content; std::string pb_content;
// Read binary // Read binary
std::ifstream fin(filename, std::ios::in | std::ios::binary); std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin.is_open()), "Cannot open file %s", PADDLE_ENFORCE_EQ(
filename); static_cast<bool>(fin.is_open()), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
filename));
fin.seekg(0, std::ios::end); fin.seekg(0, std::ios::end);
pb_content.resize(fin.tellg()); pb_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg); fin.seekg(0, std::ios::beg);
...@@ -777,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() { ...@@ -777,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() {
bool AnalysisPredictor::LoadParameters() { bool AnalysisPredictor::LoadParameters() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(), PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
"The inference program should be loaded first."); platform::errors::PreconditionNotMet(
"The inference program should be loaded first."));
const auto &global_block = inference_program_->MutableBlock(0); const auto &global_block = inference_program_->MutableBlock(0);
...@@ -853,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() { ...@@ -853,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() {
#if PADDLE_WITH_TENSORRT #if PADDLE_WITH_TENSORRT
bool AnalysisPredictor::SaveTrtCalibToDisk() { bool AnalysisPredictor::SaveTrtCalibToDisk() {
PADDLE_ENFORCE(config_.tensorrt_engine_enabled(), PADDLE_ENFORCE_EQ(config_.tensorrt_engine_enabled(), true,
"This func can be invoked only in trt mode"); platform::errors::PreconditionNotMet(
"This func can be invoked only in trt mode"));
auto &block = inference_program_->Block(0); auto &block = inference_program_->Block(0);
for (auto &op_desc : block.AllOps()) { for (auto &op_desc : block.AllOps()) {
if (op_desc->Type() == "tensorrt_engine") { if (op_desc->Type() == "tensorrt_engine") {
......
...@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) { ...@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
if (other.length() && other.data()) if (other.length() && other.data())
memcpy(data_, other.data(), other.length()); memcpy(data_, other.data(), other.length());
else if (other.length()) else if (other.length())
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid argument, null pointer data with length %u is passed", "Invalid argument, null pointer data with length %u is passed",
other.length()); other.length()));
length_ = other.length(); length_ = other.length();
memory_owned_ = true; memory_owned_ = true;
...@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) { ...@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) {
length_ = length; length_ = length;
memory_owned_ = true; memory_owned_ = true;
} else { } else {
PADDLE_THROW("The memory is allocated externally, can not Resized"); PADDLE_THROW(platform::errors::PreconditionNotMet(
"The memory is allocated externally, can not Resized"));
} }
} }
...@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) { ...@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) {
void PaddleBuf::Free() { void PaddleBuf::Free() {
if (memory_owned_ && data_) { if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0UL); PADDLE_ENFORCE_GT(
length_, 0UL,
platform::errors::PreconditionNotMet(
"The memory used in PaddleBuf %d should be greater than 0",
length_));
delete[] static_cast<char *>(data_); delete[] static_cast<char *>(data_);
data_ = nullptr; data_ = nullptr;
length_ = 0; length_ = 0;
......
...@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init( ...@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init(
if (parent_scope) { if (parent_scope) {
scope_ = parent_scope; scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope()); sub_scope_ = &(parent_scope->NewScope());
PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail"); PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
} else { } else {
paddle::framework::InitDevices(false); paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope()); scope_.reset(new paddle::framework::Scope());
...@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() { ...@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_)); std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread. // Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here. // TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get())); PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(cls.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) { if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init"; LOG(ERROR) << "fail to call Init";
return nullptr; return nullptr;
...@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false; return false;
} }
PADDLE_ENFORCE_NOT_NULL(input_ptr); PADDLE_ENFORCE_NOT_NULL(input_ptr,
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data()); platform::errors::InvalidArgument(
"The input_ptr should not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(
inputs[i].data.data(),
platform::errors::InvalidArgument(
"The data of input tensor should not be null."));
if (platform::is_cpu_place(place_)) { if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(), std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
...@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
platform::CPUPlace(), inputs[i].data.data(), platform::CPUPlace(), inputs[i].data.data(),
inputs[i].data.length(), dev_ctx->stream()); inputs[i].data.length(), dev_ctx->stream());
#else #else
PADDLE_THROW("Not compile with CUDA, should not reach here."); PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif #endif
} }
...@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetchs_.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col")); int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i); PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var); auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
...@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG(3) << "create NativePaddlePredictor"; VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memory // 1. GPU memory
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
config.fraction_of_gpu_memory, 0.f, platform::errors::InvalidArgument(
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"); "fraction_of_gpu_memory in the config should be set "
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); "to range (0., 1.]"));
PADDLE_ENFORCE_GE(config.device, 0,
platform::errors::PreconditionNotMet(
"Invalid device id %d, the device id should be "
"greater than or equal to 0.",
config.device));
std::vector<std::string> flags; std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f || if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) { config.fraction_of_gpu_memory <= 0.95f) {
...@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config)); std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get())); dynamic_cast<NativePaddlePredictor *>(predictor.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) { if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr; return nullptr;
} }
......
...@@ -21,15 +21,21 @@ ...@@ -21,15 +21,21 @@
namespace paddle { namespace paddle {
void ZeroCopyTensor::Reshape(const std::vector<int> &shape) { void ZeroCopyTensor::Reshape(const std::vector<int> &shape) {
PADDLE_ENFORCE(!name_.empty(), PADDLE_ENFORCE_EQ(
"Need to SetName first, so that the corresponding tensor can " name_.empty(), false,
"be retrieved."); platform::errors::PreconditionNotMet(
PADDLE_ENFORCE(input_or_output_, "Need to SetName first, so that the corresponding tensor can "
"Can't reshape the output tensor, it is readonly"); "be retrieved."));
PADDLE_ENFORCE(scope_); PADDLE_ENFORCE_EQ(input_or_output_, true,
platform::errors::PermissionDenied(
"Can't reshape the output tensor, it is readonly"));
PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
auto *scope = static_cast<framework::Scope *>(scope_); auto *scope = static_cast<framework::Scope *>(scope_);
auto *var = scope->FindVar(name_); auto *var = scope->FindVar(name_);
PADDLE_ENFORCE(var, "No tensor called [%s] in the runtime scope", name_); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"No tensor called [%s] in the runtime scope", name_));
auto *tensor = var->GetMutable<framework::LoDTensor>(); auto *tensor = var->GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(shape)); tensor->Resize(framework::make_ddim(shape));
} }
...@@ -45,8 +51,10 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) { ...@@ -45,8 +51,10 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
EAGER_GET_TENSOR; EAGER_GET_TENSOR;
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
tensor->numel(), 0, tensor->numel(), 0,
"You should call ZeroCopyTensor::Reshape(const std::vector<int> &shape)" platform::errors::PreconditionNotMet(
"function before retrieving mutable_data from input tensor."); "You should call ZeroCopyTensor::Reshape(const std::vector<int> "
"&shape)"
"function before retrieving mutable_data from input tensor."));
switch (static_cast<int>(place)) { switch (static_cast<int>(place)) {
case static_cast<int>(PaddlePlace::kCPU): { case static_cast<int>(PaddlePlace::kCPU): {
return tensor->mutable_data<T>(platform::CPUPlace()); return tensor->mutable_data<T>(platform::CPUPlace());
...@@ -55,7 +63,8 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) { ...@@ -55,7 +63,8 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
return tensor->mutable_data<T>(platform::CUDAPlace(device_)); return tensor->mutable_data<T>(platform::CUDAPlace(device_));
} }
default: default:
PADDLE_THROW("Unsupported place: %d", static_cast<int>(place)); PADDLE_THROW(platform::errors::Unavailable("Unsupported place: %d",
static_cast<int>(place)));
break; break;
} }
return nullptr; return nullptr;
...@@ -96,10 +105,11 @@ PaddleDType ZeroCopyTensor::type() const { ...@@ -96,10 +105,11 @@ PaddleDType ZeroCopyTensor::type() const {
template <typename T> template <typename T>
void ZeroCopyTensor::copy_from_cpu(const T *data) { void ZeroCopyTensor::copy_from_cpu(const T *data) {
EAGER_GET_TENSOR; EAGER_GET_TENSOR;
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(tensor->numel(), 0,
tensor->numel(), 0, platform::errors::PreconditionNotMet(
"You should call ZeroCopyTensor::Reshape(const std::vector<int> &shape)" "You should call ZeroCopyTensor::Reshape(const "
"function before copying data from cpu."); "std::vector<int> &shape)"
"function before copying data from cpu."));
size_t ele_size = tensor->numel() * sizeof(T); size_t ele_size = tensor->numel() * sizeof(T);
if (place_ == PaddlePlace::kCPU) { if (place_ == PaddlePlace::kCPU) {
...@@ -116,7 +126,8 @@ void ZeroCopyTensor::copy_from_cpu(const T *data) { ...@@ -116,7 +126,8 @@ void ZeroCopyTensor::copy_from_cpu(const T *data) {
memory::Copy(gpu_place, static_cast<void *>(t_data), platform::CPUPlace(), memory::Copy(gpu_place, static_cast<void *>(t_data), platform::CPUPlace(),
data, ele_size, dev_ctx->stream()); data, ele_size, dev_ctx->stream());
#else #else
PADDLE_THROW("Not compiled with CUDA, should not reach here."); PADDLE_THROW(platform::errors::Unavailable(
"Not compiled with CUDA, should not reach here."));
#endif #endif
} }
} }
...@@ -141,7 +152,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) { ...@@ -141,7 +152,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
cudaStreamSynchronize(dev_ctx->stream()); cudaStreamSynchronize(dev_ctx->stream());
#else #else
PADDLE_THROW("Not compile with CUDA, should not reach here."); PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif #endif
} }
} }
...@@ -176,20 +188,27 @@ template PD_INFER_DECL uint8_t *ZeroCopyTensor::mutable_data<uint8_t>( ...@@ -176,20 +188,27 @@ template PD_INFER_DECL uint8_t *ZeroCopyTensor::mutable_data<uint8_t>(
PaddlePlace place); PaddlePlace place);
void *ZeroCopyTensor::FindTensor() const { void *ZeroCopyTensor::FindTensor() const {
PADDLE_ENFORCE(!name_.empty(), PADDLE_ENFORCE_EQ(
"Need to SetName first, so that the corresponding tensor can " name_.empty(), false,
"be retrieved."); platform::errors::PreconditionNotMet(
PADDLE_ENFORCE(scope_); "Need to SetName first, so that the corresponding tensor can "
"be retrieved."));
PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
auto *scope = static_cast<framework::Scope *>(scope_); auto *scope = static_cast<framework::Scope *>(scope_);
auto *var = scope->FindVar(name_); auto *var = scope->FindVar(name_);
PADDLE_ENFORCE(var, "No tensor called [%s] in the runtime scope", name_); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"No tensor called [%s] in the runtime scope", name_));
auto *tensor = var->GetMutable<framework::LoDTensor>(); auto *tensor = var->GetMutable<framework::LoDTensor>();
return tensor; return tensor;
} }
std::vector<int> ZeroCopyTensor::shape() const { std::vector<int> ZeroCopyTensor::shape() const {
EAGER_GET_TENSOR; EAGER_GET_TENSOR;
PADDLE_ENFORCE(tensor_, "not found tensor called %s in the scope", name_); PADDLE_ENFORCE_NOT_NULL(
tensor_, platform::errors::PreconditionNotMet(
"Not found tensor called %s in the scope", name_));
return framework::vectorize<int>(tensor->dims()); return framework::vectorize<int>(tensor->dims());
} }
......
...@@ -112,16 +112,19 @@ static T convert(const std::string &item, ...@@ -112,16 +112,19 @@ static T convert(const std::string &item,
std::string message = std::string message =
"invalid_argument exception when try to convert : " + item; "invalid_argument exception when try to convert : " + item;
LOG(ERROR) << message; LOG(ERROR) << message;
PADDLE_THROW(message); PADDLE_THROW(platform::errors::InvalidArgument(
"invalid_argument exception when try to convert %s.", item));
} catch (std::out_of_range &e) { } catch (std::out_of_range &e) {
std::string message = std::string message =
"out_of_range exception when try to convert : " + item; "out_of_range exception when try to convert : " + item;
LOG(ERROR) << message; LOG(ERROR) << message;
PADDLE_THROW(message); PADDLE_THROW(platform::errors::InvalidArgument(
"out_of_range exception when try to convert %s.", item));
} catch (...) { } catch (...) {
std::string message = "unexpected exception when try to convert " + item; std::string message = "unexpected exception when try to convert " + item;
LOG(ERROR) << message; LOG(ERROR) << message;
PADDLE_THROW(message); PADDLE_THROW(platform::errors::InvalidArgument(
"unexpected exception when try to convert %s.", item));
} }
return res; return res;
} }
...@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, ...@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1, double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type = const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) { framework::proto::VarType::FP32) {
PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size."); PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument(
"Non-positive batch size."));
double sample_latency = batch_latency / batch_size; double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======"; << " ======";
......
...@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
if (scales_.find(var_name) != scales_.end()) continue; if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name); auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name); PADDLE_ENFORCE_NOT_NULL(var,
PADDLE_ENFORCE(var->IsType<LoDTensor>(), platform::errors::PreconditionNotMet(
"Only support lod tensor now."); "%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"Only support lod tensor now."));
LoDTensor* var_tensor = var->GetMutable<LoDTensor>(); LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
// force unsigned type if already know it // force unsigned type if already know it
...@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
} else if (op->Type() == "transpose2" || } else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") { op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0]; auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(), PADDLE_ENFORCE_NE(
"Input scales must be calculated before the " scales_.find(input_var_name), scales_.end(),
"output scales to infer if output is unsigned."); platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned."));
if (scales_.find(input_var_name) != scales_.end()) { if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name]; scales_[var_name] = scales_[input_var_name];
} }
...@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
is_unsigned = true; is_unsigned = true;
double min_scale = std::numeric_limits<double>::max(); double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) { for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE( PADDLE_ENFORCE_NE(
scales_.find(input_var_name) != scales_.end(), scales_.find(input_var_name), scales_.end(),
"Input scales must be calculated before the " platform::errors::PreconditionNotMet(
"output scales to infer if output is unsigned."); "Input scales must be calculated before the "
"output scales to infer if output is unsigned."));
is_unsigned = is_unsigned && scales_[input_var_name].first; is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min( min_scale = std::min(
min_scale, min_scale,
...@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale( ...@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
auto rule = qconfig_->scale_algo(op_type_name, conn_name); auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return; if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE( PADDLE_ENFORCE_GT(
var_tensor.numel() > 0, var_tensor.numel(), 0,
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op " platform::errors::InvalidArgument(
"%s of connection %s should not be empty.", "MkldnnQuantizer: LoDTensor of variable %s for quantization of op "
var_name, op_type_name, conn_name); "%s of connection %s should not be empty.",
var_name, op_type_name, conn_name));
switch (rule) { switch (rule) {
case ScaleAlgo::MAX: case ScaleAlgo::MAX:
...@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor( ...@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
float min_val = eigen_tensor.minCoeff(); float min_val = eigen_tensor.minCoeff();
bool is_positive = min_val >= 0.0f; bool is_positive = min_val >= 0.0f;
if (is_unsigned) if (is_unsigned)
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
is_positive, is_positive, true,
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", platform::errors::InvalidArgument(
min_val); "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
int num_quantized_bins = 255; int num_quantized_bins = 255;
...@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( ...@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
float max_abs = eigen_tensor.abs().maxCoeff(); float max_abs = eigen_tensor.abs().maxCoeff();
float min_val = eigen_tensor.minCoeff(); float min_val = eigen_tensor.minCoeff();
if (is_unsigned) if (is_unsigned)
PADDLE_ENFORCE( PADDLE_ENFORCE_GE(
min_val >= 0.0f, min_val, 0.0f,
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", platform::errors::InvalidArgument(
min_val); "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
LoDTensor scale_tensor = CreateScaleTensor(); LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs; scale_tensor.data<double>()[0] = 1.0 / max_abs;
...@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( ...@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
std::pair<bool, LoDTensor> std::pair<bool, LoDTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const { const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const {
PADDLE_ENFORCE(var_tensor.dims().size() > 0, "Tensor dimension is empty."); PADDLE_ENFORCE_GT(
var_tensor.dims().size(), 0,
platform::errors::InvalidArgument("Tensor dimension is empty."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(), ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1}; var_tensor.numel(), 1};
float min_val = eigen_tensor.minCoeff(); float min_val = eigen_tensor.minCoeff();
if (is_unsigned) if (is_unsigned)
PADDLE_ENFORCE( PADDLE_ENFORCE_GE(
min_val >= 0.0f, min_val, 0.0f,
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", platform::errors::InvalidArgument(
min_val); "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
auto dims = var_tensor.dims(); auto dims = var_tensor.dims();
constexpr int num_col_dims = 1; constexpr int num_col_dims = 1;
...@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram( ...@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
const framework::LoDTensor& var_tensor, float min_val, float max_val, const framework::LoDTensor& var_tensor, float min_val, float max_val,
size_t num_bins) const { size_t num_bins) const {
PADDLE_ENFORCE_GT(num_bins, 0, PADDLE_ENFORCE_GT(num_bins, 0,
"MkldnnQuantizer: To calculate Histogram, num_bins (" + platform::errors::InvalidArgument(
std::to_string(num_bins) + ") must be positive."); "MkldnnQuantizer: To calculate Histogram, num_bins (" +
PADDLE_ENFORCE_GT( std::to_string(num_bins) + ") must be positive."));
var_tensor.numel(), 0, PADDLE_ENFORCE_GT(var_tensor.numel(), 0,
"MkldnnQuantizer: To calculate Histogram, the tensor must not be empty."); platform::errors::InvalidArgument(
PADDLE_ENFORCE(max_val >= min_val, "MkldnnQuantizer: To calculate Histogram, the tensor "
"MkldnnQuantizer: To calculate Histogram, max_val (" + "must not be empty."));
std::to_string(max_val) + PADDLE_ENFORCE_GE(max_val, min_val,
") must be greater or equal" platform::errors::InvalidArgument(
"to min_val (" + "MkldnnQuantizer: To calculate Histogram, max_val (" +
std::to_string(min_val) + ")."); std::to_string(max_val) + ") must be greater or equal"
"to min_val (" +
std::to_string(min_val) + ")."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(), ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1}; var_tensor.numel(), 1};
auto bin_width = std::abs(max_val - min_val) / num_bins; auto bin_width = std::abs(max_val - min_val) / num_bins;
...@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { ...@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program())); auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program()));
arg.SetMainGraph(graph.release()); arg.SetMainGraph(graph.release());
auto* scope_ptr = arg.scope_ptr(); auto* scope_ptr = arg.scope_ptr();
PADDLE_ENFORCE(scope_ptr); PADDLE_ENFORCE_NOT_NULL(scope_ptr, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
auto* builder = predictor_.config_.pass_builder(); auto* builder = predictor_.config_.pass_builder();
...@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const { ...@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const {
PrepareArgument(); PrepareArgument();
auto& arg = predictor_.argument_; auto& arg = predictor_.argument_;
Analyzer().Run(&arg); Analyzer().Run(&arg);
PADDLE_ENFORCE(arg.scope_valid()); PADDLE_ENFORCE_EQ(
arg.scope_valid(), true,
platform::errors::PreconditionNotMet("The scope should be valid."));
VLOG(5) << "to prepare executor"; VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program); ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program);
predictor_.inference_program_.reset( predictor_.inference_program_.reset(
...@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const { ...@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
VLOG(3) << "Predictor: run a quantization warmup iteration"; VLOG(3) << "Predictor: run a quantization warmup iteration";
auto warmup_data = qconfig_->warmup_data(); auto warmup_data = qconfig_->warmup_data();
PADDLE_ENFORCE_NOT_NULL(warmup_data, PADDLE_ENFORCE_NOT_NULL(warmup_data,
"Warmup data cannot be NULL in the config."); platform::errors::PreconditionNotMet(
"Warmup data cannot be NULL in the config."));
PrettyLogH1("--- Running warmup iteration for quantization"); PrettyLogH1("--- Running warmup iteration for quantization");
// Run the inference program // Run the inference program
...@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const { ...@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
float AnalysisPredictor::MkldnnQuantizer::SafeEntropy( float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
std::vector<int> reference_distr_P, int P_sum, std::vector<int> reference_distr_P, int P_sum,
std::vector<int> candidate_distr_Q, int Q_sum) const { std::vector<int> candidate_distr_Q, int Q_sum) const {
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size()); PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size(),
platform::errors::InvalidArgument(
"The P size %d should be equal to Q size %d",
reference_distr_P.size(), candidate_distr_Q.size()));
float tmp_sum1 = 0; float tmp_sum1 = 0;
float tmp_sum2 = 0; float tmp_sum2 = 0;
for (size_t idx = 0; idx < reference_distr_P.size(); idx++) { for (size_t idx = 0; idx < reference_distr_P.size(); idx++) {
...@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy( ...@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
tmp_sum1 += 0; tmp_sum1 += 0;
tmp_sum2 += 0; tmp_sum2 += 0;
} else { } else {
PADDLE_ENFORCE(q_idx != 0, "MkldnnQuantizer: Fatal error!, idx = " + PADDLE_ENFORCE_NE(
std::to_string(idx) + q_idx, 0,
" qindex = 0! p_idx = " + platform::errors::PreconditionNotMet(
std::to_string(p_idx)); "MkldnnQuantizer: Fatal error!, idx = " + std::to_string(idx) +
" qindex = 0! p_idx = " + std::to_string(p_idx)));
} }
tmp_sum1 += p_idx * (log(Q_sum * p_idx)); tmp_sum1 += p_idx * (log(Q_sum * p_idx));
tmp_sum2 += p_idx * (log(P_sum * q_idx)); tmp_sum2 += p_idx * (log(P_sum * q_idx));
......
...@@ -31,12 +31,30 @@ limitations under the License. */ ...@@ -31,12 +31,30 @@ limitations under the License. */
#include "paddle_analysis_config.h" // NOLINT #include "paddle_analysis_config.h" // NOLINT
#include "paddle_api.h" // NOLINT #include "paddle_api.h" // NOLINT
///
/// \file paddle_inference_api.h
///
/// \brief Paddle Inference API
///
/// \author paddle-infer@baidu.com
/// \date 2020-09-01
/// \since 2.0.0-beta
///
namespace paddle_infer { namespace paddle_infer {
using DataType = paddle::PaddleDType; using DataType = paddle::PaddleDType;
using PlaceType = paddle::PaddlePlace; using PlaceType = paddle::PaddlePlace;
using PrecisionType = paddle::AnalysisConfig::Precision; using PrecisionType = paddle::AnalysisConfig::Precision;
using Config = paddle::AnalysisConfig; using Config = paddle::AnalysisConfig;
///
/// \class Tensor
///
/// \brief Represents an n-dimensional array of values.
/// The Tensor is used to store the input or output of the network.
/// It is obtained through Predictor::GetinputHandle()
/// and Predictor::GetOutputHandle() interface.
///
class PD_INFER_DECL Tensor { class PD_INFER_DECL Tensor {
public: public:
// Can only be created by predictor->GetInputHandle(cosnt std::string& name) // Can only be created by predictor->GetInputHandle(cosnt std::string& name)
...@@ -44,60 +62,186 @@ class PD_INFER_DECL Tensor { ...@@ -44,60 +62,186 @@ class PD_INFER_DECL Tensor {
Tensor() = delete; Tensor() = delete;
explicit Tensor(std::unique_ptr<paddle::ZeroCopyTensor>&& tensor) explicit Tensor(std::unique_ptr<paddle::ZeroCopyTensor>&& tensor)
: tensor_(std::move(tensor)) {} : tensor_(std::move(tensor)) {}
///
/// \brief Reset the shape of the tensor.
/// Generally it's only used for the input tensor.
/// Reshape must be called before calling mutable_data() or CopyFromCpu()
/// \param shape The shape to set.
///
void Reshape(const std::vector<int>& shape); void Reshape(const std::vector<int>& shape);
///
/// \brief Copy the host memory to tensor data.
/// It's usually used to set the input tensor data.
/// \param data The pointer of the data, from which the tensor will copy.
///
template <typename T> template <typename T>
void CopyFromCpu(const T* data); void CopyFromCpu(const T* data);
// should add the place ///
/// \brief Get the memory pointer in CPU or GPU with specific data type.
/// Please Reshape the tensor first before call this.
/// It's usually used to get input data pointer.
/// \param place The place of the tensor.
/// \return The tensor data buffer pointer.
///
template <typename T> template <typename T>
T* mutable_data(PlaceType place); T* mutable_data(PlaceType place);
///
/// \brief Copy the tensor data to the host memory.
/// It's usually used to get the output tensor data.
/// \param[out] data The tensor will copy the data to the address.
///
template <typename T> template <typename T>
void CopyToCpu(T* data); void CopyToCpu(T* data);
///
/// \brief Get the memory pointer directly.
/// It's usually used to get the output data pointer.
/// \param[out] place To get the device type of the tensor.
/// \param[out] size To get the data size of the tensor.
/// \return The tensor data buffer pointer.
///
template <typename T> template <typename T>
T* data(PlaceType* place, int* size) const; T* data(PlaceType* place, int* size) const;
///
/// \brief Set lod info of the tensor.
/// More about LOD can be seen here:
/// https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor
/// \param x the lod info.
///
void SetLoD(const std::vector<std::vector<size_t>>& x); void SetLoD(const std::vector<std::vector<size_t>>& x);
/// \brief Return the lod info of the tensor.
std::vector<std::vector<size_t>> lod() const; std::vector<std::vector<size_t>> lod() const;
/// \brief Return the data type of the tensor.
/// It's usually used to get the output tensor data type.
/// \return The data type of the tensor.
DataType type() const; DataType type() const;
/// \brief Return the shape of the Tensor.
std::vector<int> shape() const; std::vector<int> shape() const;
/// \brief Return the name of the tensor.
const std::string& name() const; const std::string& name() const;
private: private:
std::unique_ptr<paddle::ZeroCopyTensor> tensor_; std::unique_ptr<paddle::ZeroCopyTensor> tensor_;
}; };
///
/// \class Predictor
///
/// \brief Predictor is the interface for model prediction.
///
/// The predictor has the following typical uses:
///
/// Get predictor
/// \code{cpp}
/// auto predictor = CreatePredictor(config);
/// \endcode
///
/// Get input or output names
/// \code{cpp}
/// auto input_names = predictor->GetInputNames();
/// auto output_names = predictor->GetOutputNames();
/// \endcode
///
/// Get input or output handle
/// \code{cpp}
/// auto input_t = predictor->GetInputHandle(input_names[0]);
/// auto output_t = predictor->GetOutputHandle(output_names[0]);
/// \endcode
///
/// Run predictor
/// \code{cpp}
/// predictor->Run();
/// \endcode
///
class PD_INFER_DECL Predictor { class PD_INFER_DECL Predictor {
public: public:
Predictor() = default; Predictor() = delete;
~Predictor() {} ~Predictor() {}
// Use for clone // Use for clone
explicit Predictor(std::unique_ptr<paddle::PaddlePredictor>&& pred) explicit Predictor(std::unique_ptr<paddle::PaddlePredictor>&& pred)
: predictor_(std::move(pred)) {} : predictor_(std::move(pred)) {}
///
/// \brief Construct a new Predictor object
///
/// \param[in] Config config
///
explicit Predictor(const Config& config); explicit Predictor(const Config& config);
///
/// \brief Get the input names
///
/// \return input names
///
std::vector<std::string> GetInputNames(); std::vector<std::string> GetInputNames();
///
/// \brief Get the Input Tensor object
///
/// \param[in] name input name
/// \return input tensor
///
std::unique_ptr<Tensor> GetInputHandle(const std::string& name); std::unique_ptr<Tensor> GetInputHandle(const std::string& name);
///
/// \brief Run the prediction engine
///
/// \return Whether the function executed successfully
///
bool Run(); bool Run();
///
/// \brief Get the output names
///
/// \return output names
///
std::vector<std::string> GetOutputNames(); std::vector<std::string> GetOutputNames();
///
/// \brief Get the Output Tensor object
///
/// \param[in] name otuput name
/// \return output tensor
///
std::unique_ptr<Tensor> GetOutputHandle(const std::string& name); std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
///
/// \brief Clone to get the new predictor. thread safe.
///
/// \return get a new predictor
///
std::unique_ptr<Predictor> Clone(); std::unique_ptr<Predictor> Clone();
/// \brief Clear the intermediate tensors of the predictor
void ClearIntermediateTensor(); void ClearIntermediateTensor();
private: private:
std::unique_ptr<paddle::PaddlePredictor> predictor_; std::unique_ptr<paddle::PaddlePredictor> predictor_;
}; };
///
/// \brief A factory to help create predictors.
///
/// Usage:
///
/// \code{.cpp}
/// Config config;
/// ... // change the configs.
/// auto predictor = CreatePredictor(config);
/// \endcode
///
PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor( PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor(
const Config& config); // NOLINT const Config& config); // NOLINT
PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype); PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype);
PD_INFER_DECL std::string GetVersion(); PD_INFER_DECL std::string GetVersion();
...@@ -128,13 +272,24 @@ T* Tensor::data(PlaceType* place, int* size) const { ...@@ -128,13 +272,24 @@ T* Tensor::data(PlaceType* place, int* size) const {
namespace paddle_infer { namespace paddle_infer {
namespace services { namespace services {
///
/// \class PredictorPool
///
/// \brief PredictorPool is a simple encapsulation of Predictor, suitable for
/// use in multi-threaded situations. According to the thread id, the
/// corresponding Predictor is taken out from PredictorPool to complete the
/// prediction.
///
class PD_INFER_DECL PredictorPool { class PD_INFER_DECL PredictorPool {
public: public:
PredictorPool() = delete; PredictorPool() = delete;
PredictorPool(const PredictorPool&) = delete; PredictorPool(const PredictorPool&) = delete;
PredictorPool& operator=(const PredictorPool&) = delete; PredictorPool& operator=(const PredictorPool&) = delete;
/// \brief Construct the predictor pool with \param size predictor instances.
explicit PredictorPool(const Config& config, size_t size = 1); explicit PredictorPool(const Config& config, size_t size = 1);
/// \brief Get \param id-th predictor.
Predictor* Retrive(size_t idx); Predictor* Retrive(size_t idx);
private: private:
......
...@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { ...@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
void CpuPassStrategy::EnableMkldnnBfloat16() { void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_bfloat16_) {
passes_.push_back("cpu_bfloat16_placement_pass");
passes_.push_back("cpu_bfloat16_pass");
}
use_mkldnn_bfloat16_ = true; use_mkldnn_bfloat16_ = true;
#else #else
use_mkldnn_bfloat16_ = false; use_mkldnn_bfloat16_ = false;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision; using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType; using paddle::ConvertToPaddleDType;
...@@ -34,27 +35,37 @@ void PD_DeletePaddleBuf(PD_PaddleBuf* buf) { ...@@ -34,27 +35,37 @@ void PD_DeletePaddleBuf(PD_PaddleBuf* buf) {
} }
void PD_PaddleBufResize(PD_PaddleBuf* buf, size_t length) { void PD_PaddleBufResize(PD_PaddleBuf* buf, size_t length) {
PADDLE_ENFORCE_NOT_NULL(buf); PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
buf->buf.Resize(length); buf->buf.Resize(length);
} }
void PD_PaddleBufReset(PD_PaddleBuf* buf, void* data, size_t length) { void PD_PaddleBufReset(PD_PaddleBuf* buf, void* data, size_t length) {
PADDLE_ENFORCE_NOT_NULL(buf); PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
buf->buf.Reset(data, length); buf->buf.Reset(data, length);
} }
bool PD_PaddleBufEmpty(PD_PaddleBuf* buf) { bool PD_PaddleBufEmpty(PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(buf); PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
return buf->buf.empty(); return buf->buf.empty();
} }
void* PD_PaddleBufData(PD_PaddleBuf* buf) { void* PD_PaddleBufData(PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(buf); PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
return buf->buf.data(); return buf->buf.data();
} }
size_t PD_PaddleBufLength(PD_PaddleBuf* buf) { size_t PD_PaddleBufLength(PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(buf); PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
return buf->buf.length(); return buf->buf.length();
} }
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using PD_PaddleDType = paddle::PaddleDType; using PD_PaddleDType = paddle::PaddleDType;
using PD_ACPrecision = paddle::AnalysisConfig::Precision; using PD_ACPrecision = paddle::AnalysisConfig::Precision;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision; using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType; using paddle::ConvertToPaddleDType;
...@@ -40,7 +41,10 @@ void PD_DeleteAnalysisConfig(PD_AnalysisConfig* config) { ...@@ -40,7 +41,10 @@ void PD_DeleteAnalysisConfig(PD_AnalysisConfig* config) {
void PD_SetModel(PD_AnalysisConfig* config, const char* model_dir, void PD_SetModel(PD_AnalysisConfig* config, const char* model_dir,
const char* params_path) { const char* params_path) {
LOG(INFO) << model_dir; LOG(INFO) << model_dir;
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
LOG(INFO) << std::string(model_dir); LOG(INFO) << std::string(model_dir);
if (!params_path) { if (!params_path) {
config->config.SetModel(std::string(model_dir)); config->config.SetModel(std::string(model_dir));
...@@ -50,104 +54,164 @@ void PD_SetModel(PD_AnalysisConfig* config, const char* model_dir, ...@@ -50,104 +54,164 @@ void PD_SetModel(PD_AnalysisConfig* config, const char* model_dir,
} }
void PD_SetProgFile(PD_AnalysisConfig* config, const char* x) { void PD_SetProgFile(PD_AnalysisConfig* config, const char* x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetProgFile(std::string(x)); config->config.SetProgFile(std::string(x));
} }
void PD_SetParamsFile(PD_AnalysisConfig* config, const char* x) { void PD_SetParamsFile(PD_AnalysisConfig* config, const char* x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetParamsFile(std::string(x)); config->config.SetParamsFile(std::string(x));
} }
void PD_SetOptimCacheDir(PD_AnalysisConfig* config, const char* opt_cache_dir) { void PD_SetOptimCacheDir(PD_AnalysisConfig* config, const char* opt_cache_dir) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetOptimCacheDir(std::string(opt_cache_dir)); config->config.SetOptimCacheDir(std::string(opt_cache_dir));
} }
const char* PD_ModelDir(const PD_AnalysisConfig* config) { const char* PD_ModelDir(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.model_dir().c_str(); return config->config.model_dir().c_str();
} }
const char* PD_ProgFile(const PD_AnalysisConfig* config) { const char* PD_ProgFile(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.prog_file().c_str(); return config->config.prog_file().c_str();
} }
const char* PD_ParamsFile(const PD_AnalysisConfig* config) { const char* PD_ParamsFile(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.params_file().c_str(); return config->config.params_file().c_str();
} }
void PD_EnableUseGpu(PD_AnalysisConfig* config, int memory_pool_init_size_mb, void PD_EnableUseGpu(PD_AnalysisConfig* config, int memory_pool_init_size_mb,
int device_id) { int device_id) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableUseGpu(static_cast<uint64_t>(memory_pool_init_size_mb), config->config.EnableUseGpu(static_cast<uint64_t>(memory_pool_init_size_mb),
device_id); device_id);
} }
void PD_DisableGpu(PD_AnalysisConfig* config) { void PD_DisableGpu(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.DisableGpu(); config->config.DisableGpu();
} }
bool PD_UseGpu(const PD_AnalysisConfig* config) { bool PD_UseGpu(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.use_gpu(); return config->config.use_gpu();
} }
int PD_GpuDeviceId(const PD_AnalysisConfig* config) { int PD_GpuDeviceId(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.gpu_device_id(); return config->config.gpu_device_id();
} }
int PD_MemoryPoolInitSizeMb(const PD_AnalysisConfig* config) { int PD_MemoryPoolInitSizeMb(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.memory_pool_init_size_mb(); return config->config.memory_pool_init_size_mb();
} }
float PD_FractionOfGpuMemoryForPool(const PD_AnalysisConfig* config) { float PD_FractionOfGpuMemoryForPool(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.fraction_of_gpu_memory_for_pool(); return config->config.fraction_of_gpu_memory_for_pool();
} }
void PD_EnableCUDNN(PD_AnalysisConfig* config) { void PD_EnableCUDNN(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableCUDNN(); config->config.EnableCUDNN();
} }
bool PD_CudnnEnabled(const PD_AnalysisConfig* config) { bool PD_CudnnEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.cudnn_enabled(); return config->config.cudnn_enabled();
} }
void PD_SwitchIrOptim(PD_AnalysisConfig* config, bool x) { void PD_SwitchIrOptim(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchIrOptim(x); config->config.SwitchIrOptim(x);
} }
bool PD_IrOptim(const PD_AnalysisConfig* config) { bool PD_IrOptim(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.ir_optim(); return config->config.ir_optim();
} }
void PD_SwitchUseFeedFetchOps(PD_AnalysisConfig* config, bool x) { void PD_SwitchUseFeedFetchOps(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchUseFeedFetchOps(x); config->config.SwitchUseFeedFetchOps(x);
} }
bool PD_UseFeedFetchOpsEnabled(const PD_AnalysisConfig* config) { bool PD_UseFeedFetchOpsEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.use_feed_fetch_ops_enabled(); return config->config.use_feed_fetch_ops_enabled();
} }
void PD_SwitchSpecifyInputNames(PD_AnalysisConfig* config, bool x) { void PD_SwitchSpecifyInputNames(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchSpecifyInputNames(x); config->config.SwitchSpecifyInputNames(x);
} }
bool PD_SpecifyInputName(const PD_AnalysisConfig* config) { bool PD_SpecifyInputName(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.specify_input_name(); return config->config.specify_input_name();
} }
...@@ -155,110 +219,168 @@ void PD_EnableTensorRtEngine(PD_AnalysisConfig* config, int workspace_size, ...@@ -155,110 +219,168 @@ void PD_EnableTensorRtEngine(PD_AnalysisConfig* config, int workspace_size,
int max_batch_size, int min_subgraph_size, int max_batch_size, int min_subgraph_size,
Precision precision, bool use_static, Precision precision, bool use_static,
bool use_calib_mode) { bool use_calib_mode) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableTensorRtEngine( config->config.EnableTensorRtEngine(
workspace_size, max_batch_size, min_subgraph_size, workspace_size, max_batch_size, min_subgraph_size,
paddle::ConvertToACPrecision(precision), use_static, use_calib_mode); paddle::ConvertToACPrecision(precision), use_static, use_calib_mode);
} }
bool PD_TensorrtEngineEnabled(const PD_AnalysisConfig* config) { bool PD_TensorrtEngineEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.tensorrt_engine_enabled(); return config->config.tensorrt_engine_enabled();
} }
void PD_SwitchIrDebug(PD_AnalysisConfig* config, bool x) { void PD_SwitchIrDebug(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchIrDebug(x); config->config.SwitchIrDebug(x);
} }
void PD_EnableMKLDNN(PD_AnalysisConfig* config) { void PD_EnableMKLDNN(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMKLDNN(); config->config.EnableMKLDNN();
} }
void PD_SetMkldnnCacheCapacity(PD_AnalysisConfig* config, int capacity) { void PD_SetMkldnnCacheCapacity(PD_AnalysisConfig* config, int capacity) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetMkldnnCacheCapacity(capacity); config->config.SetMkldnnCacheCapacity(capacity);
} }
bool PD_MkldnnEnabled(const PD_AnalysisConfig* config) { bool PD_MkldnnEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.mkldnn_enabled(); return config->config.mkldnn_enabled();
} }
void PD_SetCpuMathLibraryNumThreads(PD_AnalysisConfig* config, void PD_SetCpuMathLibraryNumThreads(PD_AnalysisConfig* config,
int cpu_math_library_num_threads) { int cpu_math_library_num_threads) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetCpuMathLibraryNumThreads(cpu_math_library_num_threads); config->config.SetCpuMathLibraryNumThreads(cpu_math_library_num_threads);
} }
int PD_CpuMathLibraryNumThreads(const PD_AnalysisConfig* config) { int PD_CpuMathLibraryNumThreads(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.cpu_math_library_num_threads(); return config->config.cpu_math_library_num_threads();
} }
void PD_EnableMkldnnQuantizer(PD_AnalysisConfig* config) { void PD_EnableMkldnnQuantizer(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMkldnnQuantizer(); config->config.EnableMkldnnQuantizer();
} }
bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) { bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.mkldnn_quantizer_enabled(); return config->config.mkldnn_quantizer_enabled();
} }
void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) { void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound( PADDLE_ENFORCE_NOT_NULL(
"PD_AnalysisConfig should not be null")); config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMkldnnBfloat16(); config->config.EnableMkldnnBfloat16();
} }
bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) { bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound( PADDLE_ENFORCE_NOT_NULL(
"PD_AnalysisConfig should not be null")); config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.mkldnn_bfloat16_enabled(); return config->config.mkldnn_bfloat16_enabled();
} }
void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer, void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer,
size_t prog_buffer_size, const char* params_buffer, size_t prog_buffer_size, const char* params_buffer,
size_t params_buffer_size) { size_t params_buffer_size) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetModelBuffer(prog_buffer, prog_buffer_size, params_buffer, config->config.SetModelBuffer(prog_buffer, prog_buffer_size, params_buffer,
params_buffer_size); params_buffer_size);
} }
bool PD_ModelFromMemory(const PD_AnalysisConfig* config) { bool PD_ModelFromMemory(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.model_from_memory(); return config->config.model_from_memory();
} }
void PD_EnableMemoryOptim(PD_AnalysisConfig* config) { void PD_EnableMemoryOptim(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMemoryOptim(); config->config.EnableMemoryOptim();
} }
bool PD_MemoryOptimEnabled(const PD_AnalysisConfig* config) { bool PD_MemoryOptimEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.enable_memory_optim(); return config->config.enable_memory_optim();
} }
void PD_EnableProfile(PD_AnalysisConfig* config) { void PD_EnableProfile(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableProfile(); config->config.EnableProfile();
} }
bool PD_ProfileEnabled(const PD_AnalysisConfig* config) { bool PD_ProfileEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.profile_enabled(); return config->config.profile_enabled();
} }
void PD_SetInValid(PD_AnalysisConfig* config) { void PD_SetInValid(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetInValid(); config->config.SetInValid();
} }
bool PD_IsValid(const PD_AnalysisConfig* config) { bool PD_IsValid(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.is_valid(); return config->config.is_valid();
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision; using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType; using paddle::ConvertToPaddleDType;
...@@ -81,7 +82,10 @@ extern "C" { ...@@ -81,7 +82,10 @@ extern "C" {
bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
int in_size, PD_Tensor** output_data, int* out_size, int in_size, PD_Tensor** output_data, int* out_size,
int batch_size) { int batch_size) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
VLOG(3) << "Predoctor: PD_PredictorRun. "; VLOG(3) << "Predoctor: PD_PredictorRun. ";
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>> static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors; predictors;
...@@ -111,7 +115,10 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, ...@@ -111,7 +115,10 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
PD_ZeroCopyData* inputs, int in_size, PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int* out_size) { PD_ZeroCopyData** output, int* out_size) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>> static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors; predictors;
if (!predictors.count(config->config.model_dir())) { if (!predictors.count(config->config.model_dir())) {
...@@ -144,7 +151,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, ...@@ -144,7 +151,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
input_t->copy_from_cpu(static_cast<uint8_t*>(inputs[i].data)); input_t->copy_from_cpu(static_cast<uint8_t*>(inputs[i].data));
break; break;
default: default:
CHECK(false) << "Unsupport data type."; PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Unsupported data type."));
break; break;
} }
} }
...@@ -227,7 +235,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor, ...@@ -227,7 +235,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor,
input->copy_from_cpu(static_cast<uint8_t*>(tensor->data.data)); input->copy_from_cpu(static_cast<uint8_t*>(tensor->data.data));
break; break;
default: default:
CHECK(false) << "Unsupport data type."; PADDLE_THROW(
paddle::platform::errors::InvalidArgument("Unsupported data type."));
break; break;
} }
...@@ -294,7 +303,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) { ...@@ -294,7 +303,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
output->copy_to_cpu(reinterpret_cast<uint8_t*>(tensor->data.data)); output->copy_to_cpu(reinterpret_cast<uint8_t*>(tensor->data.data));
break; break;
default: default:
CHECK(false) << "Unsupport data type."; PADDLE_THROW(
paddle::platform::errors::InvalidArgument("Unsupported data type."));
break; break;
} }
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision; using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType; using paddle::ConvertToPaddleDType;
...@@ -37,44 +38,60 @@ void PD_DeletePaddleTensor(PD_Tensor* tensor) { ...@@ -37,44 +38,60 @@ void PD_DeletePaddleTensor(PD_Tensor* tensor) {
} }
void PD_SetPaddleTensorName(PD_Tensor* tensor, char* name) { void PD_SetPaddleTensorName(PD_Tensor* tensor, char* name) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.name = std::string(name); tensor->tensor.name = std::string(name);
} }
void PD_SetPaddleTensorDType(PD_Tensor* tensor, PD_DataType dtype) { void PD_SetPaddleTensorDType(PD_Tensor* tensor, PD_DataType dtype) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.dtype = paddle::ConvertToPaddleDType(dtype); tensor->tensor.dtype = paddle::ConvertToPaddleDType(dtype);
} }
void PD_SetPaddleTensorData(PD_Tensor* tensor, PD_PaddleBuf* buf) { void PD_SetPaddleTensorData(PD_Tensor* tensor, PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.data = buf->buf; tensor->tensor.data = buf->buf;
} }
void PD_SetPaddleTensorShape(PD_Tensor* tensor, int* shape, int size) { void PD_SetPaddleTensorShape(PD_Tensor* tensor, int* shape, int size) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.shape.assign(shape, shape + size); tensor->tensor.shape.assign(shape, shape + size);
} }
const char* PD_GetPaddleTensorName(const PD_Tensor* tensor) { const char* PD_GetPaddleTensorName(const PD_Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
return tensor->tensor.name.c_str(); return tensor->tensor.name.c_str();
} }
PD_DataType PD_GetPaddleTensorDType(const PD_Tensor* tensor) { PD_DataType PD_GetPaddleTensorDType(const PD_Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
return ConvertToPDDataType(tensor->tensor.dtype); return ConvertToPDDataType(tensor->tensor.dtype);
} }
PD_PaddleBuf* PD_GetPaddleTensorData(const PD_Tensor* tensor) { PD_PaddleBuf* PD_GetPaddleTensorData(const PD_Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
PD_PaddleBuf* ret = PD_NewPaddleBuf(); PD_PaddleBuf* ret = PD_NewPaddleBuf();
ret->buf = tensor->tensor.data; ret->buf = tensor->tensor.data;
return ret; return ret;
} }
const int* PD_GetPaddleTensorShape(const PD_Tensor* tensor, int* size) { const int* PD_GetPaddleTensorShape(const PD_Tensor* tensor, int* size) {
PADDLE_ENFORCE_NOT_NULL(tensor); PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
const std::vector<int>& shape = tensor->tensor.shape; const std::vector<int>& shape = tensor->tensor.shape;
*size = shape.size(); *size = shape.size();
return shape.data(); return shape.data();
......
...@@ -20,8 +20,12 @@ ...@@ -20,8 +20,12 @@
#define LITE_WITH_XPU 1 #define LITE_WITH_XPU 1
#endif #endif
#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif
#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/lite/engine.h"
#include "lite/api/paddle_use_passes.h" #include <utility>
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const { ...@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return engines_.at(name).get() != nullptr; return engines_.at(name).get() != nullptr;
} }
paddle::lite::Predictor* EngineManager::Get(const std::string& name) const { paddle::lite_api::PaddlePredictor* EngineManager::Get(
const std::string& name) const {
return engines_.at(name).get(); return engines_.at(name).get();
} }
paddle::lite::Predictor* EngineManager::Create(const std::string& name, paddle::lite_api::PaddlePredictor* EngineManager::Create(
const EngineConfig& cfg) { const std::string& name, const EngineConfig& cfg) {
if (cfg.valid_places.front().target == TARGET(kCUDA)) { // config info for predictor.
#ifdef PADDLE_WITH_CUDA paddle::lite_api::CxxConfig lite_cxx_config;
paddle::lite::Env<TARGET(kCUDA)>::Init(); lite_cxx_config.set_model_buffer(cfg.model.c_str(), cfg.model.size(),
cfg.param.c_str(), cfg.param.size());
lite_cxx_config.set_valid_places(cfg.valid_places);
#ifdef PADDLE_WITH_ARM
set_threads.set_threads(cfg.cpu_math_library_num_threads);
#else
lite_cxx_config.set_x86_math_library_num_threads(
cfg.cpu_math_library_num_threads);
#endif #endif
} else if (cfg.valid_places.front().target == TARGET(kXPU)) {
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
paddle::lite::TargetWrapper<TARGET(kXPU)>::workspace_l3_size_per_thread = lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size; cfg.xpu_l3_workspace_size);
#endif #endif
}
auto* p = new paddle::lite::Predictor(); // create predictor
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes, std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
cfg.model_type, cfg.model_from_memory); paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
engines_[name].reset(p); engines_[name] = std::move(p);
return p; return engines_[name].get();
} }
void EngineManager::DeleteAll() { void EngineManager::DeleteAll() {
for (auto& item : engines_) { for (auto& item : engines_) {
item.second.reset(nullptr); item.second.reset();
} }
} }
......
...@@ -23,12 +23,9 @@ ...@@ -23,12 +23,9 @@
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall" #pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h" #include "lite/api/paddle_place.h"
#include "lite/core/context.h" #include "lite/api/paddle_use_passes.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
namespace paddle { namespace paddle {
...@@ -38,25 +35,33 @@ namespace lite { ...@@ -38,25 +35,33 @@ namespace lite {
struct EngineConfig { struct EngineConfig {
std::string model; std::string model;
std::string param; std::string param;
paddle::lite::Place prefer_place; std::vector<paddle::lite_api::Place> valid_places;
std::vector<paddle::lite::Place> valid_places;
std::vector<std::string> neglected_passes; std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf}; lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true}; bool model_from_memory{true};
// for xpu
size_t xpu_l3_workspace_size; size_t xpu_l3_workspace_size;
// for x86 or arm
int cpu_math_library_num_threads{1};
// for cuda
bool use_multi_stream{false};
}; };
class EngineManager { class EngineManager {
public: public:
bool Empty() const; bool Empty() const;
bool Has(const std::string& name) const; bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const; paddle::lite_api::PaddlePredictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name, paddle::lite_api::PaddlePredictor* Create(const std::string& name,
const EngineConfig& cfg); const EngineConfig& cfg);
void DeleteAll(); void DeleteAll();
private: private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>> std::unordered_map<std::string,
std::shared_ptr<paddle::lite_api::PaddlePredictor>>
engines_; engines_;
}; };
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h" #include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map> #include <map>
#include <memory> #include <memory>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data, ...@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
} }
} }
void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) { void* GetLiteTensorDataPtr(paddle::lite_api::Tensor* src,
PrecisionType precision_type,
TargetType target_type) {
void* res{nullptr};
switch (precision_type) {
case PrecisionType::kFloat:
res = static_cast<void*>(src->mutable_data<float>(target_type));
break;
case PrecisionType::kInt8:
res = static_cast<void*>(src->mutable_data<int8_t>(target_type));
break;
case PrecisionType::kInt32:
res = static_cast<void*>(src->mutable_data<int32_t>(target_type));
break;
case PrecisionType::kInt64:
res = static_cast<void*>(src->mutable_data<int64_t>(target_type));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."));
break;
}
return res;
}
int64_t GetLiteTensorNumel(const paddle::lite_api::Tensor& tensor) {
auto shape = tensor.shape();
int64_t numel = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
return numel;
}
void InitDstTensor(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src) {
// Currently, Lite needs to explicitly specify the target type of // Currently, Lite needs to explicitly specify the target type of
// the input tensor. // the input tensor.
constexpr int empty_size = 0; constexpr int empty_size = 0;
dst->mutable_data(GetLiteTargetType(src.place()), empty_size); dst->Resize({empty_size});
dst->set_precision(GetLitePrecisionType(src.type())); GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
SetLoD(dst->mutable_lod(), src.lod()); GetLiteTargetType(src.place()));
dst->SetPrecision(GetLitePrecisionType(src.type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod);
} }
void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) {
constexpr framework::proto::VarType::Type dtype = constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32; framework::proto::VarType_Type_FP32;
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()), dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
...@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { ...@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
} }
template <> template <>
void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
InitDstTensor(dst, src); InitDstTensor(dst, src);
const platform::Place& src_place = src.place(); const platform::Place& src_place = src.place();
...@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, ...@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type()); static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
dst->Resize(framework::vectorize(src.dims())); dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>(); const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(bytes); void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type(); << ", dst = " << dst << ", src_type = " << src.type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << dst->memory_size(); VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
template <> template <>
void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src, void TensorCopyAsync(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize())); dst->Resize(paddle::framework::make_ddim(src.shape()));
InitDstTensor(dst, src); InitDstTensor(dst, src);
const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place(); const platform::Place& dst_place = dst->place();
const size_t bytes = int64_t src_numel = GetLiteTensorNumel(src);
static_cast<size_t>(src.numel()) * framework::SizeOfType(dst->type()); const size_t bytes = src_numel * framework::SizeOfType(dst->type());
const void* src_data = src.raw_data(); const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here. // When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type()); void* dst_data = dst->mutable_data(dst_place, dst->type());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type(); << ", dst = " << dst << ", src_type = " << dst->type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << src.memory_size(); VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
template <> template <>
void TensorDataShare(paddle::lite::Tensor* dst, framework::LoDTensor* src) { void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
const size_t bytes =
static_cast<size_t>(src->numel()) * framework::SizeOfType(src->type());
auto buf = std::make_shared<paddle::lite::Buffer>(paddle::lite::Buffer(
src->data<void>(), GetLiteTargetType(src->place()), src->memory_size()));
dst->Resize(framework::vectorize(src->dims())); dst->Resize(framework::vectorize(src->dims()));
dst->set_precision(GetLitePrecisionType(src->type())); dst->ShareExternalMemory(src->data<void>(), src->memory_size(),
SetLoD(dst->mutable_lod(), src->lod()); GetLiteTargetType(src->place()));
dst->ResetBuffer(buf, bytes); dst->SetPrecision(GetLitePrecisionType(src->type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod);
} }
template <> template <>
void TensorDataShare(framework::LoDTensor* dst, paddle::lite::Tensor* src) { void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
constexpr framework::proto::VarType::Type dtype = constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32; framework::proto::VarType_Type_FP32;
void* src_raw_data = src->raw_data(); void* src_raw_data =
GetLiteTensorDataPtr(src, GetLitePrecisionType(dtype), src->target());
size_t memory_size = GetLiteTensorNumel(*src) * sizeof(float);
std::shared_ptr<memory::allocation::Allocation> holder( std::shared_ptr<memory::allocation::Allocation> holder(
new memory::allocation::Allocation(src_raw_data, src->memory_size(), new memory::allocation::Allocation(src_raw_data, memory_size,
GetNativePlace(src->target()))); GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->dims().Vectorize())); dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, dtype); dst->ResetHolderWithType(holder, dtype);
} }
......
...@@ -102,10 +102,10 @@ TEST(EngineManager, engine) { ...@@ -102,10 +102,10 @@ TEST(EngineManager, engine) {
config.model_from_memory = true; config.model_from_memory = true;
config.valid_places = { config.valid_places = {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif #endif
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
}; };
LOG(INFO) << "Create EngineManager"; LOG(INFO) << "Create EngineManager";
...@@ -118,7 +118,7 @@ TEST(EngineManager, engine) { ...@@ -118,7 +118,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has( ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key), unique_key),
true); true);
paddle::lite::Predictor* engine_0 = paddle::lite_api::PaddlePredictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get( inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key); unique_key);
CHECK_NOTNULL(engine_0); CHECK_NOTNULL(engine_0);
......
...@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) { ...@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC)); EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC));
} }
template <typename T>
void test_lite_tensor_data_ptr(PrecisionType precision_type) {
void* GetLiteTensorDataPtr(paddle::lite_api::Tensor * src,
PrecisionType precision_type,
TargetType target_type);
const int count = 4;
paddle::lite::Tensor lite_tensor;
lite_tensor.Resize({count});
auto* lite_tensor_data = lite_tensor.mutable_data<T>();
for (size_t i = 0; i < count; ++i) {
lite_tensor_data[i] = i;
}
paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
T* data = static_cast<T*>(GetLiteTensorDataPtr(
&lite_api_tensor, precision_type, TargetType::kHost));
for (size_t i = 0; i < count; ++i) {
CHECK_EQ(data[i], static_cast<T>(i)) << "the i-th num is not correct.";
}
}
TEST(LiteEngineOp, GetLiteTensorDataPtr) {
test_lite_tensor_data_ptr<int64_t>(PrecisionType::kInt64);
test_lite_tensor_data_ptr<int32_t>(PrecisionType::kInt32);
test_lite_tensor_data_ptr<int8_t>(PrecisionType::kInt8);
EXPECT_ANY_THROW(test_lite_tensor_data_ptr<double>(PrecisionType::kUnk));
}
void test_tensor_copy(const platform::DeviceContext& ctx) { void test_tensor_copy(const platform::DeviceContext& ctx) {
// Create LoDTensor. // Create LoDTensor.
std::vector<float> vector({1, 2, 3, 4}); std::vector<float> vector({1, 2, 3, 4});
...@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) { ...@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod); lod_tensor.set_lod(lod);
// Create lite::Tensor and copy. // Create lite::Tensor and copy.
paddle::lite::Tensor lite_tensor; paddle::lite::Tensor lite_tensor;
TensorCopyAsync(&lite_tensor, lod_tensor, ctx); paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorCopyAsync(&lite_api_tensor, lod_tensor, ctx);
// Copy to LoDTensor. // Copy to LoDTensor.
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
platform::GpuStreamSync( platform::GpuStreamSync(
...@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) { ...@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod); lod_tensor.set_lod(lod);
// Create lite::Tensor and share. // Create lite::Tensor and share.
paddle::lite::Tensor lite_tensor; paddle::lite::Tensor lite_tensor;
TensorDataShare(&lite_tensor, &lod_tensor); paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorDataShare(&lite_api_tensor, &lod_tensor);
// Copy to LoDTensor. // Copy to LoDTensor.
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
std::vector<float> result; std::vector<float> result;
TensorToVector(lod_tensor_n, ctx, &result); TensorToVector(lod_tensor_n, ctx, &result);
ASSERT_EQ(result, vector); ASSERT_EQ(result, vector);
......
...@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter { ...@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter {
itensors.push_back(engine_->GetITensor(input_name)); itensors.push_back(engine_->GetITensor(input_name));
} }
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis > 0, PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument(
"The axis attr of Concat op should be large than 0 for trt"); "The axis attr of Concat"
" op should be larger than 0 for trt. "
"But received %d.",
axis));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(),
itensors.size()); itensors.size());
......
...@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op, ...@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input, auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias); nv_ksize, weight, bias);
PADDLE_ENFORCE(layer != nullptr); PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
layer->setStride(nv_strides); layer->setStride(nv_strides);
layer->setPadding(nv_paddings); layer->setPadding(nv_paddings);
layer->setNbGroups(groups); layer->setNbGroups(groups);
......
...@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer"; VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight op_desc.Input("X").size(), 1,
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(Y_v); PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>(); auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr; float* weight_data = nullptr;
weight_data = weight_data =
...@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter { ...@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight op_desc.Input("X").size(), 1,
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
......
...@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter { ...@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter {
// NOTE out is GPU memory. // NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out, virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(out != nullptr); PADDLE_ENFORCE_NOT_NULL(out,
PADDLE_ENFORCE(stream_ != nullptr); platform::errors::InvalidArgument(
"The input param 'out' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = in.place(); const auto& place = in.place();
size_t size = in.memory_size(); size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size); PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor in's memory_size shoule be less than or equal to "
"the input max_size. But in's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size, PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
cudaMemcpyHostToDevice, *stream_)); out, in.data<float>(), size, cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size, PADDLE_ENFORCE_EQ(
cudaMemcpyDeviceToDevice, *stream_)); 0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
} }
cudaStreamSynchronize(*stream_); cudaStreamSynchronize(*stream_);
} }
// NOTE in is GPU memory. // NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out, virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(in != nullptr); PADDLE_ENFORCE_NOT_NULL(in,
PADDLE_ENFORCE(stream_ != nullptr); platform::errors::InvalidArgument(
"The input param 'in' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = out->place(); const auto& place = out->place();
size_t size = out->memory_size(); size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size); PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor out's memory_size shoule be less than or equal "
"to the input max_size. "
"But out's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_)); cudaMemcpyDeviceToHost, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToHost) error."));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(
cudaMemcpyDeviceToDevice, *stream_)); 0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
} }
cudaStreamSynchronize(*stream_); cudaStreamSynchronize(*stream_);
} }
......
...@@ -44,10 +44,14 @@ class EngineIOConverter { ...@@ -44,10 +44,14 @@ class EngineIOConverter {
static void ConvertInput(const std::string& op_type, const LoDTensor& in, static void ConvertInput(const std::string& op_type, const LoDTensor& in,
void* out, size_t max_size, cudaStream_t* stream) { void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup( auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in is not supported yet.", op_type.c_str()));
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
...@@ -55,10 +59,14 @@ class EngineIOConverter { ...@@ -55,10 +59,14 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, const void* in, static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size, LoDTensor* out, size_t max_size,
cudaStream_t* stream) { cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup( auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in not supported yet.", op_type.c_str()));
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
......
...@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("fused_embedding_eltwise_layernorm"); teller_set.insert("fused_embedding_eltwise_layernorm");
teller_set.insert("multihead_matmul"); teller_set.insert("multihead_matmul");
teller_set.insert("skip_layernorm"); teller_set.insert("skip_layernorm");
teller_set.insert("slice");
#endif #endif
} }
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册