提交 a157f1c7 编写于 作者: J jingqinghe
......@@ -16,6 +16,7 @@ else()
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
endif()
######################################################################################
......@@ -188,6 +189,10 @@ elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
endif()
add_definitions("-DPADDLE_CUDA_BINVER=\"${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}\"")
......
......@@ -19,7 +19,7 @@ SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc/src/extern_dgc")
SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc")
SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE)
SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE)
SET(DGC_URL "http://fleet.bj.bcebos.com/collective_ef2216a.tgz")
SET(DGC_URL "https://fleet.bj.bcebos.com/dgc/collective_f66ef73.tgz")
INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR})
cache_third_party(extern_dgc
......@@ -30,7 +30,7 @@ ExternalProject_Add(
extern_dgc
${EXTERNAL_PROJECT_LOG_ARGS}
"${DGC_DOWNLOAD_CMD}"
URL_MD5 "2f67549fd5f1262383d83289abc4f88f"
URL_MD5 "94e6fa1bc97169d0e1aad44570fe3251"
PREFIX "${DGC_PREFIX_DIR}"
SOURCE_DIR "${DGC_SOURCES_DIR}"
CONFIGURE_COMMAND ""
......
......@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG dfdfa6440c83bf0b415f9f5a9ff84842ce0bb0fa)
set(LITE_GIT_TAG 6d2b2a4028a58715b01887b04eb9bff8432eb184)
endif()
if(NOT CUDA_ARCH_NAME)
......
......@@ -19,8 +19,8 @@ SET(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_REPOSITORY https://github.com/intel/mkl-dnn.git)
SET(MKLDNN_TAG 1ea812f4f5aa1bd989372a23ab50d0f0f81ee677)
SET(MKLDNN_REPOSITORY https://github.com/oneapi-src/oneDNN.git)
SET(MKLDNN_TAG 64a48f9565aa72f6359917b3406328075a409939)
# Introduce variables:
# * CMAKE_INSTALL_LIBDIR
......
......@@ -18,7 +18,7 @@ SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
set(WARPCTC_REPOSITORY https://github.com/baidu-research/warp-ctc.git)
set(WARPCTC_TAG bc29dcfff07ced1c7a19a4ecee48e5ad583cef8e)
set(WARPCTC_TAG fc7f226b93758216a03b1be9d24593a12819b984)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE)
......
......@@ -28,7 +28,15 @@ function(CheckCompilerCXX11Flag)
endfunction()
CheckCompilerCXX11Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if (WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
# safe_set_flag
#
# Set a compile flag only if compiler is support
......
......@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
# No unit test should exceed 2 minutes.
if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else()
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
endif()
......@@ -748,7 +748,7 @@ function(py_test TARGET_NAME)
endif()
if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else()
# No unit test should exceed 2 minutes in Linux.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
......
......@@ -138,12 +138,17 @@ function(op_library TARGET)
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}")
# [ \t\r\n]* is used for blank characters
string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}")
if (one_register STREQUAL "")
string(REPLACE "_op" "" TARGET "${TARGET}")
else ()
string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
string(REPLACE "," "" TARGET "${TARGET}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}")
endif()
# pybind USE_NO_KERNEL_OP
......
......@@ -243,9 +243,10 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
ENDIF()
if(WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
set(CUDAERROR_URL "http://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage
endif(WITH_GPU)
......
......@@ -49,7 +49,8 @@ std::vector<std::string> PD_GetGradOpDescStrs(
for (size_t i = 0; i < op_num; ++i) {
PADDLE_ENFORCE_EQ(
grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true,
"Cannot serialize message.");
paddle::platform::errors::Unavailable(
"Cannot serialize operator desc message."));
}
}
return ret;
......
......@@ -36,7 +36,10 @@ message AMPConfig {
repeated string custom_black_varnames = 9;
}
message LocalSGDConfig { optional int32 k_steps = 1 [ default = 4 ]; }
message LocalSGDConfig {
optional int32 k_steps = 1 [ default = 1 ];
optional int32 begin_step = 2 [ default = 1 ];
}
message GradientMergeConfig {
optional int32 k_steps = 1 [ default = 1 ];
......@@ -52,6 +55,8 @@ message DGCConfig {
message LarsConfig {
optional float lars_coeff = 1 [ default = 0.001 ];
optional float lars_weight_decay = 2 [ default = 0.0005 ];
optional float epsilon = 3 [ default = 0.0 ];
repeated string exclude_from_weight_decay = 4;
}
message LambConfig {
......
......@@ -25,7 +25,7 @@ bool NCCLWrapper::is_initialized_ = false;
void NCCLWrapper::InitNCCL() {
#if defined(PADDLE_WITH_NCCL)
PADDLE_ENFORCE(platform::dynload::ncclCommInitRank(
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitRank(
&(nccl_info_.comm_), nccl_info_.global_ranks_, nccl_info_.nccl_id_,
nccl_info_.my_global_rank_));
#endif
......@@ -41,7 +41,8 @@ void NCCLWrapper::SetNCCLId(const NCCLInfo& nccl_info) {
NCCLInfo NCCLWrapper::GetNCCLId() {
#if defined(PADDLE_WITH_NCCL)
PADDLE_ENFORCE(platform::dynload::ncclGetUniqueId(&(nccl_info_.nccl_id_)));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::ncclGetUniqueId(&(nccl_info_.nccl_id_)));
#endif
return nccl_info_;
}
......@@ -52,8 +53,8 @@ void NCCLWrapper::SetRankInfo(const int local_rank, const int global_rank,
nccl_info_.local_rank_ = local_rank;
nccl_info_.my_global_rank_ = global_rank;
nccl_info_.global_ranks_ = ranks;
PADDLE_ENFORCE(cudaSetDevice(local_rank));
PADDLE_ENFORCE(cudaStreamCreate(&(nccl_info_.stream_)));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(local_rank));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamCreate(&(nccl_info_.stream_)));
#endif
return;
}
......@@ -65,7 +66,7 @@ void NCCLWrapper::SyncVar(const int root_rank, const Scope& scope,
auto var = scope.FindVar(name);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
int32_t total_size = tensor->numel();
PADDLE_ENFORCE(platform::dynload::ncclBcast(
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast(
reinterpret_cast<void*>(tensor->data<float>()), total_size, ncclFloat,
root_rank, nccl_info_.comm_, nccl_info_.stream_));
cudaStreamSynchronize(nccl_info_.stream_);
......
......@@ -102,6 +102,8 @@ if(WITH_MKLDNN)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn)
pass_library(fc_mkldnn_pass inference DIR mkldnn)
pass_library(cpu_quantize_placement_pass base DIR mkldnn)
pass_library(cpu_quantize_pass inference DIR mkldnn)
......@@ -162,4 +164,6 @@ endif()
cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass)
cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass)
cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass)
cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
endif ()
......@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()(
return op;
}
PDNode *patterns::Bfloat16Placement::operator()(
const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>();
if (!bfloat16_enabled_op_types.empty()) {
supported_op_types = bfloat16_enabled_op_types;
}
auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types);
return op;
}
PDNode *patterns::OrphanedBfloat16::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
prev_op->LinksTo({prev_out});
op->LinksFrom({prev_out}).LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::LastBfloat16Ops::operator()() {
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
op->LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::FirstBfloat16Ops::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
auto *op_in = pattern->NewNode(op_in_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
prev_op->LinksTo({op_in});
op->LinksFrom({op_in});
return op;
}
PDNode *patterns::MKLDNNInPlace::operator()() {
const std::unordered_set<std::string> &supported_op_types = {
"abs",
......
......@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase {
PATTERN_DECL_NODE(op);
};
struct Bfloat16Placement : public PatternBase {
Bfloat16Placement(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "bfloat16_placement") {}
PDNode* operator()(
const std::unordered_set<std::string>& bfloat16_enabled_op_types);
PATTERN_DECL_NODE(op);
};
struct OrphanedBfloat16 : public PatternBase {
OrphanedBfloat16(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "orphaned_bfloat16") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(prev_out);
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct LastBfloat16Ops : public PatternBase {
LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "last_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct FirstBfloat16Ops : public PatternBase {
FirstBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "first_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(op_in);
PATTERN_DECL_NODE(op);
};
// Pattern used for enforcing inplace computation for in-place computation
// supporting DNNL ops. softmax, batch_norm and layer_norm
struct MKLDNNInPlace : public PatternBase {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void UnlinkNodes(ir::Node* a, ir::Node* b) {
a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
a->outputs.end());
b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
b->inputs.end());
}
void CPUBFloat16Pass::SetInputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::FirstBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"first_bfloat16_ops"};
bfloat16_ops();
int quantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_in, op_in, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
if (op->Op()->Type() != "conv2d" && prev_op->Op()->Type() != "quantize") {
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
// create a quantize op node
OpDesc q_desc;
q_desc.SetType("quantize");
q_desc.SetInput("Input", std::vector<std::string>({op_in->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("Scale", 1.f);
q_desc.SetAttr("bfloat16", true);
q_desc.SetAttr("output_format", Has("data_layout")
? Get<std::string>("data_layout")
: "NCHW");
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.
std::string op_input_name;
for (auto name : op->Op()->InputNames()) {
for (auto input_name : op->Op()->Input(name)) {
if (input_name == op_in->Name()) op_input_name = name;
}
}
PADDLE_ENFORCE_NE(
op_input_name.empty(), true,
platform::errors::NotFound(
"Operator before operator should have input as op output"));
op->Op()->SetInput(op_input_name,
std::vector<std::string>({quantize_out_node->Name()}));
UnlinkNodes(op_in, op);
IR_NODE_LINK_TO(op_in, quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
IR_NODE_LINK_TO(quantize_out_node, op);
quantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d quantize op before bfloat16 op",
quantize_counter);
}
void CPUBFloat16Pass::SetOutputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::LastBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"last_bfloat16_ops"};
bfloat16_ops();
int force_fp32_counter = 0, dequantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_out, op_out, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, bfloat16_ops);
if ((op->Op()->HasAttr("force_fp32_output") ||
op->Op()->HasProtoAttr("force_fp32_output")) &&
!op->Op()->GetAttrIfExists<bool>("fuse_residual_connection")) {
op->Op()->SetAttr("force_fp32_output", true);
force_fp32_counter++;
} else if (op->Op()->Type() != "prior_box") {
// Create dequantize input variable
VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
// create a dequantize op node for output.
OpDesc deq_desc;
deq_desc.SetType("dequantize");
deq_desc.SetInput("Input",
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetOutput("Output", std::vector<std::string>({op_out->Name()}));
deq_desc.SetAttr("Scale", 1.0f);
auto dequantize_op = g->CreateOpNode(&deq_desc);
std::string op_output_name;
for (auto name : op->Op()->OutputNames()) {
for (auto output_name : op->Op()->Output(name)) {
if (output_name == op_out->Name()) op_output_name = name;
}
}
PADDLE_ENFORCE_NE(
op_output_name.empty(), true,
platform::errors::NotFound(
"Operator after operator should have input as op output"));
op->Op()->SetOutput(op_output_name, std::vector<std::string>(
{dequantize_in_node->Name()}));
UnlinkNodes(op, op_out);
IR_NODE_LINK_TO(op, dequantize_in_node);
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
IR_NODE_LINK_TO(dequantize_op, op_out);
dequantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d dequantize op and used %d force_fp32_output",
dequantize_counter, force_fp32_counter);
}
void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const {
SetInputDataType(graph);
SetOutputDataType(graph);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class CPUBFloat16Pass : public Pass {
protected:
void SetInputDataType(ir::Graph* graph) const;
void SetOutputDataType(ir::Graph* graph) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, bool use_mkldnn,
const std::string& mkldnn_data_type = "float32",
const bool force_fp32_output = false) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("name", name);
if (type == "conv2d") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("force_fp32_output", force_fp32_output);
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "dropout") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "fc") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "concat") {
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "matmul" || type == "elementwise_add") {
op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
}
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
graph->reset(pass->Apply(graph->release()));
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
ProgramDesc BuildProgramDesc(bool use_mkldnn) {
ProgramDesc prog;
for (auto& v : variable_names) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dropout", "Dropout1", {"z"}, {"a"}, use_mkldnn, "float32");
SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn, "bfloat16");
SetOp(&prog, "pool2d", "Pool1", {"b"}, {"c"}, use_mkldnn, "bfloat16");
SetOp(&prog, "conv2d", "Conv1", {"c"}, {"d"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout2", {"d"}, {"e"}, use_mkldnn, "float32");
SetOp(&prog, "transpose2", "Transpose1", {"e"}, {"f"}, use_mkldnn,
"bfloat16");
SetOp(&prog, "reshape2", "Reshape1", {"f"}, {"g"}, use_mkldnn, "bfloat16");
SetOp(&prog, "concat", "Concat1", {"g"}, {"h"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout3", {"h"}, {"i"}, use_mkldnn, "float32");
return prog;
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int transpose_count, int quant_count, int dequant_count,
int added_nodes_count) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names, &original_nodes_num,
&current_nodes_num);
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int conv2d_nodes_count = 0;
int pool2d_nodes_count = 0;
int transpose2_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "conv2d") {
conv2d_nodes_count++;
} else if (op->Type() == "pool2d") {
pool2d_nodes_count++;
} else if (op->Type() == "transpose2") {
transpose2_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(conv2d_nodes_count, conv_count);
EXPECT_EQ(pool2d_nodes_count, pool_count);
EXPECT_EQ(transpose2_nodes_count, transpose_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, quantize) {
bool use_mkldnn = true;
// 1 quantize + 1 dequantize
int added_nodes = 2;
MainTest(BuildProgramDesc(use_mkldnn), 2, 1, 1, 1, 2, added_nodes);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void CPUBfloat16PlacementPass::SetMkldnnDataType(
ir::Graph* graph, int* bfloat16_operators) const {
const auto& op_types_list =
Get<std::unordered_set<std::string>>("bfloat16_enabled_op_types");
// set mkldnn_data_type to bfloat16 to all operators that are in
// bfloat16_enabled_op_types vector or they are included to Bfloat16Placement
// pattern
GraphPatternDetector gpd;
patterns::Bfloat16Placement bfloat16_placement_pattern{gpd.mutable_pattern(),
"bfloat16_placement"};
bfloat16_placement_pattern(op_types_list);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_placement_pattern);
if ((op->Op()->HasAttr("mkldnn_data_type") ||
op->Op()->HasProtoAttr("mkldnn_data_type")) &&
!platform::HasOpINT8DataType(op->Op())) {
op->Op()->SetAttr("mkldnn_data_type", std::string("bfloat16"));
(*bfloat16_operators)++;
}
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::RemoveOrhanedOperators(
ir::Graph* graph, int* bfloat16_operators) const {
// find orphaned bfloat16 operator that is between two float32 operators
// revert mkldnn_data_type attr to float32
GraphPatternDetector gpd;
patterns::OrphanedBfloat16 orphaned_bfloat16_pattern{gpd.mutable_pattern(),
"orphaned_bfloat16"};
orphaned_bfloat16_pattern();
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, orphaned_bfloat16_pattern);
op->Op()->SetAttr("mkldnn_data_type", std::string("float32"));
bfloat16_operators--;
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const {
int bfloat16_operators = 0;
SetMkldnnDataType(graph, &bfloat16_operators);
RemoveOrhanedOperators(graph, &bfloat16_operators);
PrettyLogDetail("--- marked %d operators to bfloat16 ",
bfloat16_operators);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_placement_pass,
paddle::framework::ir::CPUBfloat16PlacementPass)
// a vector of operator type names with bfloat16 support ("conv2d" etc.)
// the second param is the default value for this vector
.DefaultPassAttr("bfloat16_enabled_op_types",
new std::unordered_set<std::string>());
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* Specifies which operators should be run on bfloat16.
*/
class CPUBfloat16PlacementPass : public Pass {
protected:
void SetMkldnnDataType(ir::Graph* graph, int* bfloat16_operators) const;
void RemoveOrhanedOperators(ir::Graph* graph, int* bfloat16_operators) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator mkldnn_data_type
// ---------------------------------------
// (a,b)->concat->c float32
// c->conv->f float32
// f->relu->g float32
// g->pool->h float32
// h->conv->k float32
// k->pool->l float32
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "f", "g", "h", "k", "l"})) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"});
SetOp(&prog, "conv2d", "conv1", {"c"}, {"f"});
SetOp(&prog, "relu", "relu1", {"f"}, {"g"});
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"});
SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"});
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"});
return prog;
}
void MainTest(std::initializer_list<std::string> bfloat16_enabled_op_types,
unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
pass->Set("bfloat16_enabled_op_types",
new std::unordered_set<std::string>(bfloat16_enabled_op_types));
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
TEST(Bfloat16PlacementPass, enable_all) {
MainTest({"conv2d", "pool2d", "relu", "concat"}, 6);
}
TEST(Bfloat16PlacementPass, enabled_conv_and_pool) {
// 2 conv2d + 2 pool2 - 1 orphaned conv2d
MainTest({"conv2d", "pool2d"}, 3);
}
TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); }
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_placement_pass);
......@@ -615,6 +615,16 @@ static int BuildFusionV2(Graph* graph, const std::string& name_scope,
GET_IR_NODE_FROM_SUBGRAPH(transpose2_qkv_out, transpose2_qkv_out,
multihead_pattern);
// If weights or biases in qkv's fc are shared by multiple multihead_matmul
// patterns, we do not support this kind of fusion, this pass will not take
// effect.
bool is_fc_params_shared =
mul0_w->outputs.size() > 1 || mul1_w->outputs.size() > 1 ||
mul2_w->outputs.size() > 1 || eltadd0_b->outputs.size() > 1 ||
eltadd1_b->outputs.size() > 1 || eltadd2_b->outputs.size() > 1;
if (is_fc_params_shared) {
return;
}
fuse_creater(input0, mul0, mul1, mul2, mul0_out, mul1_out, mul2_out, mul0_w,
mul1_w, mul2_w, eltadd0_b, eltadd1_b, eltadd2_b, eltadd_qk_b,
reshape2_0, reshape2_qkv_out, scale, scale_out);
......
......@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
......@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS(transpose_flatten_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass);
REGISTER_PASS_CAPABILITY(transpose_flatten_concat_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("transpose", 0)
.EQ("transpose2", 0)
.EQ("flatten", 0)
.EQ("concat", 0)
.EQ("fusion_transpose_flatten_concat", 0));
......@@ -69,7 +69,8 @@ class OpInfo {
const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator's Creator has not been registered");
platform::errors::NotFound(
"Operator's Creator has not been registered."));
return creator_;
}
......@@ -79,11 +80,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL(
grad_op_maker_,
platform::errors::NotFound(
"Operator %s's GradOpMaker has not been "
"registered.\nPlease check whether %s_op has "
"grad_op.\nIf not, please set stop_gradient to True "
"registered.\nPlease check whether (%s) operator has "
"gradient operator.\nIf not, please set stop_gradient to be True "
"for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str());
type.c_str(), type.c_str()));
return grad_op_maker_;
}
......@@ -100,11 +102,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL(
dygraph_grad_op_maker_,
platform::errors::NotFound(
"Operator %s's DygraphGradOpMaker has not been "
"registered.\nPlease check whether %s_op has "
"grad_op.\nIf not, please set stop_gradient to True "
"registered.\nPlease check whether (%s) operator has "
"gradient operator.\nIf not, please set stop_gradient to be True "
"for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str());
type.c_str(), type.c_str()));
return dygraph_grad_op_maker_;
}
......@@ -130,14 +133,17 @@ class OpInfoMap {
}
void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
PADDLE_ENFORCE_NE(Has(type), true,
platform::errors::AlreadyExists(
"Operator (%s) has been registered.", type));
map_.insert({type, info});
}
const OpInfo& Get(const std::string& type) const {
auto op_info_ptr = GetNullable(type);
PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered",
type);
PADDLE_ENFORCE_NOT_NULL(
op_info_ptr,
platform::errors::NotFound("Operator (%s) is not registered.", type));
return *op_info_ptr;
}
......
......@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const {
cur_loc += OpKernelType::kLibBits;
int customized_value = key.customized_type_value_;
PADDLE_ENFORCE(customized_value < (1 << OpKernelType::kCustomizeBits));
PADDLE_ENFORCE_LT(customized_value, (1 << OpKernelType::kCustomizeBits),
platform::errors::Unavailable(
"Too many custom OpKernel attribute values, expected "
"maximum value is %d, received value is %d.",
(1 << OpKernelType::kCustomizeBits), customized_value));
customized_value = customized_value << cur_loc;
cur_loc += OpKernelType::kCustomizeBits;
PADDLE_ENFORCE(cur_loc < 64);
PADDLE_ENFORCE_LT(cur_loc, 64,
platform::errors::Unavailable(
"Too many OpKernel attribute values, expected maximum "
"value is 64, received value is %d.",
cur_loc));
std::hash<int> hasher;
return hasher(place + data_type + data_layout + library_type +
......
......@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name);
PADDLE_ENFORCE_EQ(
names.count(name), 0,
platform::errors::AlreadyExists("Attribute [%s] is duplicated.", name));
names.insert(name);
};
for (auto& attr : proto_->attrs()) {
......
......@@ -54,9 +54,10 @@ class Registrar {
template <typename... ARGS>
struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) {
if (OpInfoMap::Instance().Has(op_type)) {
PADDLE_THROW("'%s' is registered more than once.", op_type);
}
PADDLE_ENFORCE_EQ(
OpInfoMap::Instance().Has(op_type), false,
platform::errors::AlreadyExists(
"Operator '%s' is registered more than once.", op_type));
static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass");
OpInfo info;
......
......@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
AddInput("input", "input of cosine op").AsDuplicable();
AddOutput("output", "output of cosine op").AsIntermediate();
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument(
"'test_attr' must be even!"));
};
AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker);
......
......@@ -152,10 +152,10 @@ class OpVersionRegistrar {
return instance;
}
OpVersion& Register(const std::string& op_type) {
if (op_version_map_.find(op_type) != op_version_map_.end()) {
PADDLE_THROW("'%s' is registered in operator version more than once.",
op_type);
}
PADDLE_ENFORCE_EQ(
op_version_map_.find(op_type), op_version_map_.end(),
platform::errors::AlreadyExists(
"'%s' is registered in operator version more than once.", op_type));
op_version_map_.insert({op_type, OpVersion()});
return op_version_map_[op_type];
}
......
此差异已折叠。
......@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) {
EXPECT_TRUE(
ex_msg.find(
"The Input Variable(Other) of "
"indicate_other_data_type_test Op used to "
"(indicate_other_data_type_test) Operator used to "
"determine kernel data type "
"is empty or not LoDTensor or SelectedRows or LoDTensorArray") !=
"is empty or not LoDTensor or SelectedRows or LoDTensorArray.") !=
std::string::npos);
}
ASSERT_TRUE(caught);
......
......@@ -20,7 +20,10 @@ namespace framework {
void ReaderBase::ReadNext(std::vector<LoDTensor> *out) {
std::lock_guard<std::mutex> lock(mu_);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning,
platform::errors::Unavailable(
"The current reader has stopped running and cannot "
"continue to read the next batch of data."));
ReadNextImpl(out);
}
......
......@@ -32,17 +32,21 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
"acquire read lock failed");
PADDLE_ENFORCE_EQ(
pthread_rwlock_rdlock(&lock_), 0,
platform::errors::External("The pthread failed to acquire read lock."));
}
inline void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed");
platform::errors::External(
"The pthread failed to acquire write lock."));
}
inline void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
PADDLE_ENFORCE_EQ(
pthread_rwlock_unlock(&lock_), 0,
platform::errors::External("The pthread failed to unlock."));
}
private:
......
......@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) {
VLOG(5) << "Can't read [" << length << "] from file"
<< "file seems breakem";
PADDLE_THROW("Model load error, file seems breaken");
PADDLE_THROW(platform::errors::Unavailable(
"Model load failed, istream state error."));
}
}
......@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) {
sizeof(char) * tensor_number_mark.size());
std::string str_read_tensor_number_mark(tensor_number_mark_buffer,
tensor_number_mark.size());
PADDLE_ENFORCE_EQ(
tensor_number_mark, str_read_tensor_number_mark,
"Tensor number mark not match, expect [%s], but read from file is [%]",
tensor_number_mark, str_read_tensor_number_mark);
PADDLE_ENFORCE_EQ(tensor_number_mark, str_read_tensor_number_mark,
platform::errors::InvalidArgument(
"Tensor number mark does not match, expect mark is "
"[%s], but the mark read from file is [%s].",
tensor_number_mark, str_read_tensor_number_mark));
size_t tensor_number = 0;
istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number));
......@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) {
std::string str_read_tensor_name_mark(name_mark_buffer,
tensor_name_mark.size());
PADDLE_ENFORCE_EQ(
tensor_name_mark, str_read_tensor_name_mark,
"Tensor name mark not match, expect [%s], but read from file is [%]",
tensor_name_mark, str_read_tensor_name_mark);
PADDLE_ENFORCE_EQ(tensor_name_mark, str_read_tensor_name_mark,
platform::errors::InvalidArgument(
"Tensor name mark does not match, expect mark is [%s], "
"but the mark read from file is [%s].",
tensor_name_mark, str_read_tensor_name_mark));
size_t tensor_name_length = 0;
istre.read(reinterpret_cast<char*>(&tensor_name_length),
......@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Variable find error, when save model, can't not find vairable [%s], "
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
var_ptr, platform::errors::NotFound("Variable (%s) is not found when "
"saving model, please make sure "
"that exe.run(startup_program) has "
"been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, please make sure "
"that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
map_tensor[vec_tensor_name_list[i]] = tensor;
}
......@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk(
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_var_base_list[i]->Name());
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, please make sure "
"that exe.run(startup_program) has been executed.",
vec_var_base_list[i]->Name()));
map_tensor[vec_var_base_list[i]->Name()] = tensor;
}
......@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto it = map_load_tensor.find(vec_tensor_name_list[i]);
PADDLE_ENFORCE(it != map_load_tensor.end(),
"Paramete not found in Model file, "
"Can not find [%s] in model file [%s]",
vec_tensor_name_list[i], file_name);
PADDLE_ENFORCE_NE(it, map_load_tensor.end(),
platform::errors::NotFound(
"Parameter (%s) not found in model file (%s).",
vec_tensor_name_list[i], file_name));
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Parameter not created, when load model, can't not find parameter [%s] "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
var_ptr,
platform::errors::PreconditionNotMet(
"Parameter (%s) is not created when loading model, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NE(tensor, nullptr,
"Paramter [%s] not initialzed "
"please make sure you have run startUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
tensor,
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has "
"been executed.v",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(
tensor->dims(), it->second->dims(),
"Shape not matching: the Program requires a parameter with a shape of "
"(%s), "
"while the loaded parameter (namely [ %s ]) has a shape of (%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims());
platform::errors::InvalidArgument(
"Shape does not match, the program requires a parameter with a "
"shape of "
"(%s), while the loaded parameter (namely [ %s ]) has a shape of "
"(%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims()));
TensorCopySync(*(it->second.get()), tensor->place(), tensor);
......@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name,
MkDirRecursively(DirName(file_name).c_str());
std::ofstream fout(file_name, std::ios::binary);
if (!fout) {
PADDLE_THROW("File open error. Can not open file [%s]", file_name);
}
PADDLE_ENFORCE_EQ(
fout.is_open(), true,
platform::errors::Unavailable("File (%s) open failed.", file_name));
// first 256 byte for reserve for fulture upgrade
char* kReserveBuffer = new char[model_file_reserve_size];
......@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name,
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
data_ptr = temp.data<void>();
#else
PADDLE_THROW(
"Tensor is in CUDA device, but paddle not compile with CUDA, this "
"should not happen");
PADDLE_THROW(platform::errors::Unavailable(
"Tensor is in CUDA device, but paddle not compiled with CUDA."));
#endif
}
fout.write(static_cast<const char*>(data_ptr),
......@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name,
}
if (!fout) {
PADDLE_THROW("Model save failed, data write to model file [%s] error",
file_name);
PADDLE_THROW(platform::errors::Unavailable(
"Model save failed, error when writing data into model file [%s].",
file_name));
}
fout.close();
......@@ -316,9 +329,9 @@ bool LoadTensorFromDisk(
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) {
std::ifstream fin(file_name, std::ios::binary);
if (!fin) {
PADDLE_THROW("File open error. Can not open model file [%s]", file_name);
}
PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::Unavailable("File (%s) open failed.", file_name));
ReadReserveBuffer(fin);
......@@ -331,7 +344,8 @@ bool LoadTensorFromDisk(
uint32_t version;
fin.read(reinterpret_cast<char*>(&version), sizeof(version));
CheckInStreamState(fin, sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument(
"Only version 0 tensor is supported."));
proto::VarType::TensorDesc desc;
{
// int32_t size
......@@ -344,7 +358,7 @@ bool LoadTensorFromDisk(
CheckInStreamState(fin, sizeof(size));
PADDLE_ENFORCE_EQ(
desc.ParseFromArray(buf.get(), size), true,
platform::errors::InvalidArgument("Cannot parse tensor desc"));
platform::errors::InvalidArgument("Parse tensor desc failed."));
}
{ // read tensor
......
......@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
// the 1st field, unit32_t version for SelectedRows
uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
PADDLE_ENFORCE_EQ(version, 0U,
platform::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
}
{
// the 2st field, rows information
......@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
rwlock_->UNLock();
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
}
PADDLE_ENFORCE_EQ(
auto_grown, true,
platform::errors::NotFound("Input key(%lld) is not found.", key));
rwlock_->WRLock();
auto map_size = id_to_index_.size();
auto vector_size = rows_.size();
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
map_size, vector_size);
PADDLE_THROW(platform::errors::InvalidArgument(
"Row map size(%zu) should be equal to rows size(%zu).", map_size,
vector_size));
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
int row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
PADDLE_THROW(platform::errors::InvalidArgument(
"Selected rows is full, then length exceed the length of first "
"dimension (%d).",
row_num));
}
// key logic to put a key into id_to_index_
rows_.push_back(key);
......@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() {
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown, bool is_test) {
PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized.");
PADDLE_ENFORCE_EQ(value->IsInitialized(), true,
platform::errors::InvalidArgument(
"The value tensor is not initialized."));
if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!";
} else {
int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
"output tensor should have the same shape with table "
"except the dims[0].");
PADDLE_ENFORCE_EQ(
value_width, value->numel() / value->dims()[0],
platform::errors::InvalidArgument(
"Output tensor should have the same shape with table "
"except the first dimmension, excepted value width not counting "
"the first dimension is %d, actual value width is %d.",
value_width, value->numel() / value->dims()[0]));
for (int i = 0; i < ids.numel(); ++i) {
auto id = ids.data<int64_t>()[i];
int64_t index = AutoGrownIndex(id, auto_grown, is_test);
......
......@@ -82,7 +82,8 @@ class SelectedRows {
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key);
PADDLE_THROW(platform::errors::NotFound(
"Input id (%lld) is not in current rows table.", key));
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
......
......@@ -25,20 +25,22 @@ namespace framework {
std::vector<DDim> InferShapeContext::GetReaderDims(
const std::string &name) const {
const std::vector<std::string> &arg_names = Inputs(name);
PADDLE_ENFORCE_EQ(
arg_names.size(), 1UL,
"Reader input '%s' should hold one element, but now it holds %d", name,
arg_names.size());
PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
platform::errors::InvalidArgument(
"Reader input '%s' should hold one element, but now it "
"holds %d elements.",
name, arg_names.size()));
return this->GetRepeatedDims(arg_names[0]);
}
void InferShapeContext::SetReaderDims(const std::string &name,
const std::vector<DDim> &dims) {
const std::vector<std::string> &arg_names = Outputs(name);
PADDLE_ENFORCE_EQ(
arg_names.size(), 1UL,
"Reader output '%s' should hold one element, but now it holds %d", name,
arg_names.size());
PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
platform::errors::InvalidArgument(
"Reader output '%s' should hold one element, but now "
"it holds %d elements.",
name, arg_names.size()));
return this->SetRepeatedDims(arg_names[0], dims);
}
......
......@@ -19,13 +19,17 @@ namespace paddle {
namespace framework {
extern size_t SizeOfType(proto::VarType::Type type);
void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_NOT_NULL(holder_, platform::errors::PreconditionNotMet(
"Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
PADDLE_ENFORCE_LE(
numel() * SizeOfType(type()), memory_size(),
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored.");
platform::errors::PreconditionNotMet(
"Tensor's dimension is out of bound."
"Tensor's dimension must be equal or less than the size of its "
"memory."
"But received Tensor's dimension is d%, memory's size is %d.",
numel() * SizeOfType(type()), memory_size()));
}
Tensor::Tensor(const proto::VarType::Type& dtype) : type_(dtype), offset_(0) {}
......@@ -37,15 +41,21 @@ size_t Tensor::memory_size() const {
void* Tensor::mutable_data(const platform::Place& place,
proto::VarType::Type type, size_t requested_size) {
type_ = type;
PADDLE_ENFORCE_GE(numel(), 0,
"When calling this method, the Tensor's numel must be "
"equal or larger than zero. "
"Please check Tensor::dims, or Tensor::Resize has been "
"called first. The Tensor's shape is [",
dims(), "] now");
PADDLE_ENFORCE_GE(
numel(), 0,
platform::errors::PreconditionNotMet(
"The Tensor's element number must be equal or greater than zero. "
"The Tensor's shape is [",
dims(), "] now"));
size_t size = numel() * SizeOfType(type);
if (requested_size) {
PADDLE_ENFORCE_GE(requested_size, size);
PADDLE_ENFORCE_GE(
requested_size, size,
platform::errors::InvalidArgument(
"The requested memory size is less than the memory size of Tensor. "
"But received requested memory size is d%, "
"memory size of Tensor is %d.",
requested_size, size));
size = requested_size;
}
/* some versions of boost::variant don't have operator!= */
......@@ -62,8 +72,8 @@ void* Tensor::mutable_data(const platform::Place& place,
void* Tensor::mutable_data(const platform::Place& place,
size_t requested_size) {
PADDLE_ENFORCE_NOT_NULL(
this->holder_, "Cannot invoke mutable data if current hold nothing.");
PADDLE_ENFORCE_NOT_NULL(this->holder_, platform::errors::PreconditionNotMet(
"The tensor is not initialized."));
return mutable_data(place, type_, requested_size);
}
......@@ -75,12 +85,20 @@ Tensor& Tensor::ShareDataWith(const Tensor& src) {
Tensor Tensor::Slice(int64_t begin_idx, int64_t end_idx) const {
check_memory_size();
PADDLE_ENFORCE_GE(begin_idx, 0,
"The start row index must be greater than 0.");
PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound.");
PADDLE_ENFORCE_GE(
begin_idx, 0,
platform::errors::OutOfRange("The start row index must be greater than 0."
"But received the start index is d%.",
begin_idx));
PADDLE_ENFORCE_LE(
end_idx, dims_[0],
platform::errors::OutOfRange("The end row index is out of bound."));
PADDLE_ENFORCE_LT(
begin_idx, end_idx,
"The start row index must be lesser than the end row index.");
platform::errors::InvalidArgument(
"The start row index must be less than the end row index."
"But received the start index = %d, the end index = %d.",
begin_idx, end_idx));
if (dims_[0] == 1) {
return *this;
......
......@@ -131,13 +131,17 @@ class Tensor {
const platform::Place& place() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::place() is called.");
holder_,
platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::place() is called."));
return holder_->place();
}
proto::VarType::Type type() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::type() is called.");
holder_,
platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::type() is called."));
return type_;
}
......
......@@ -43,9 +43,13 @@ inline T* Tensor::data() {
check_memory_size();
bool valid =
std::is_same<T, void>::value || type_ == DataTypeTrait<T>::DataType();
PADDLE_ENFORCE(
valid, "Tensor holds the wrong type, it holds %s, but desires to be %s",
DataTypeToString(type_), DataTypeToString(DataTypeTrait<T>::DataType()));
PADDLE_ENFORCE_EQ(
valid, true,
platform::errors::InvalidArgument(
"Tensor holds the wrong type, it holds %s, but desires to be %s",
DataTypeToString(type_),
DataTypeToString(DataTypeTrait<T>::DataType())));
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
......@@ -69,9 +73,12 @@ inline T* Tensor::mutable_data(const platform::Place& place,
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
int rank = src.dims().size();
PADDLE_ENFORCE_GE(
rank, 2,
rank, 2, platform::errors::InvalidArgument(
"'ReshapeToMatrix()' is only used for flatten high rank "
"tensors to matrixs. Can not be used in reshaping vectors.");
"tensors to matrixs. The dimensions of Tensor must be "
"greater or equal than 2. "
"But received dimensions of Tensor is %d",
rank));
if (rank == 2) {
return src;
}
......
......@@ -41,7 +41,7 @@ TEST(Tensor, DataAssert) {
std::string ex_msg = err.what();
EXPECT_TRUE(ex_msg.find("holder_ should not be null") != std::string::npos);
EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call "
"Tensor::mutable_data first.") !=
"Tensor::mutable_data firstly.") !=
std::string::npos);
}
ASSERT_TRUE(caught);
......@@ -157,7 +157,7 @@ TEST(Tensor, ShareDataWith) {
EXPECT_TRUE(ex_msg.find("holder_ should not be null") !=
std::string::npos);
EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call "
"Tensor::mutable_data first.") !=
"Tensor::mutable_data firstly.") !=
std::string::npos);
}
ASSERT_TRUE(caught);
......
......@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Source place and context place do not match, source "
"place is %s, context place is %s.",
src_gpu_place, ctx_gpu_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
......@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Destination place and context place do not match, "
"destination place is %s, context place is %s.",
dst_gpu_place, ctx_gpu_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
......@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
......@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
PADDLE_THROW(platform::errors::Unavailable(
"Context place dose not match the source and destination place."));
}
}
}
else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
PADDLE_THROW(platform::errors::Unimplemented(
"Copying from %s to %s is not supported.", src_place, dst_place));
}
#endif
}
......@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
nullptr);
}
else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
}
......@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
const platform::Place& dst_place) {
// vector types not currently supported
PADDLE_ENFORCE_LE(type.lanes, 1, "vector types not currently supported");
PADDLE_ENFORCE_LE(type.lanes, 1,
platform::errors::Unimplemented(
"Vector type is not supported currently."));
switch (type.bits) {
case 8:
......@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
return static_cast<void*>(dst->mutable_data<int8_t>(dst_place));
if (type.code == kDLUInt)
return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 16:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int16_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(
dst->mutable_data<paddle::platform::float16>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 32:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int32_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<float>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 64:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int64_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<double>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
default:
PADDLE_THROW("Unsupport type.bits %d", type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported DLDataType.bits %d.", type.bits));
}
}
......
......@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true);
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(src.place()), true,
platform::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
src.place()));
memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);
......
......@@ -42,7 +42,8 @@ void ThreadPool::Init() {
num_threads = FLAGS_dist_threadpool_size;
VLOG(1) << "set dist_threadpool_size to " << num_threads;
}
PADDLE_ENFORCE_GT(num_threads, 0);
PADDLE_ENFORCE_GT(num_threads, 0, platform::errors::InvalidArgument(
"The number of threads is 0."));
threadpool_.reset(new ThreadPool(num_threads));
}
}
......@@ -83,7 +84,8 @@ void ThreadPool::TaskLoop() {
}
if (tasks_.empty()) {
PADDLE_THROW("This thread has no task to Run");
PADDLE_THROW(platform::errors::Unavailable(
"Current thread has no task to Run."));
}
// pop a task from the task queue
......
......@@ -91,7 +91,8 @@ class ThreadPool {
{
std::unique_lock<std::mutex> lock(mutex_);
if (!running_) {
PADDLE_THROW("enqueue on stopped ThreadPool");
PADDLE_THROW(platform::errors::Unavailable(
"Task is enqueued into stopped ThreadPool."));
}
tasks_.push(std::move(task));
}
......
......@@ -43,8 +43,9 @@ void VarDesc::SetTensorDescNum(size_t num) {
} break;
default:
PADDLE_THROW(
"Setting 'sub_tensor_number' is not supported by the type of var %s.",
this->Name());
platform::errors::Unavailable("Setting 'sub_tensor_number' is not "
"supported by the %s type variable.",
this->Name()));
}
}
......@@ -55,8 +56,9 @@ size_t VarDesc::GetTensorDescNum() const {
break;
default:
PADDLE_THROW(
"Getting 'sub_tensor_number' is not supported by the type of var %s.",
this->Name());
platform::errors::Unavailable("Getting 'sub_tensor_number' is not "
"supported by the %s type variable.",
this->Name()));
}
}
......@@ -133,9 +135,9 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
desc_.mutable_type()->mutable_tensor_array()->set_lod_level(lod_level);
break;
default:
PADDLE_THROW(
"Setting 'lod_level' is not supported by the type of var %s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Setting 'lod_level' is not supported by the %s type variable.",
this->Name()));
}
}
......@@ -157,9 +159,9 @@ void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
}
} break;
default:
PADDLE_THROW(
"Setting 'lod_levels' is not supported by the type of var %s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Setting 'lod_levels' is not supported by the %s type variable",
this->Name()));
}
}
......@@ -170,9 +172,9 @@ int32_t VarDesc::GetLoDLevel() const {
case proto::VarType::LOD_TENSOR_ARRAY:
return desc_.type().tensor_array().lod_level();
default:
PADDLE_THROW(
"Getting 'lod_level' is not supported by the type of var %s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Getting 'lod_level' is not supported by the %s type variable.",
this->Name()));
}
}
......@@ -187,15 +189,19 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
return res;
break;
default:
PADDLE_THROW(
"Getting 'lod_levels' is not supported by the type of var %s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Getting 'lod_levels' is not supported by the %s type variable.",
this->Name()));
}
}
const proto::VarType::TensorDesc &VarDesc::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE_EQ(
desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
PADDLE_ENFORCE_EQ(
desc_.type().has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
switch (desc_.type().type()) {
case proto::VarType::SELECTED_ROWS:
return desc_.type().selected_rows();
......@@ -204,14 +210,16 @@ const proto::VarType::TensorDesc &VarDesc::tensor_desc() const {
case proto::VarType::LOD_TENSOR_ARRAY:
return desc_.type().tensor_array().tensor();
default:
PADDLE_THROW(
"Getting 'tensor_desc' is not supported by the type of var %s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Getting 'tensor_desc' is not supported by the %s type variable.",
this->Name()));
}
}
std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE_EQ(
desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
std::vector<proto::VarType::TensorDesc> res;
res.reserve(GetTensorDescNum());
switch (desc_.type().type()) {
......@@ -221,16 +229,19 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
}
return res;
default:
PADDLE_THROW(
"Getting 'tensor_descs' is not supported by the type of var "
"%s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Getting 'tensor_descs' is not supported by the %s type variable.",
this->Name()));
}
}
proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE_EQ(
desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
PADDLE_ENFORCE_EQ(
desc_.type().has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
switch (desc_.type().type()) {
case proto::VarType::SELECTED_ROWS:
return desc_.mutable_type()->mutable_selected_rows();
......@@ -240,15 +251,19 @@ proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() {
return desc_.mutable_type()->mutable_tensor_array()->mutable_tensor();
default:
PADDLE_THROW(
"Getting 'mutable_tensor_desc' is not supported by the type of var "
"%s.",
this->Name());
platform::errors::Unavailable("Getting 'mutable_tensor_desc' is not "
"supported by the %s type variable.",
this->Name()));
}
}
std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE_EQ(
desc_.has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
PADDLE_ENFORCE_EQ(
desc_.type().has_type(), true,
platform::errors::NotFound("The variable's type was not be set."));
std::vector<proto::VarType::TensorDesc *> res;
res.reserve(GetTensorDescNum());
switch (desc_.type().type()) {
......@@ -259,10 +274,9 @@ std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() {
}
return res;
default:
PADDLE_THROW(
"Getting 'tensor_descs' is not supported by the type of var "
"%s.",
this->Name());
PADDLE_THROW(platform::errors::Unavailable(
"Getting 'tensor_descs' is not supported by the %s type variable.",
this->Name()));
}
}
......
......@@ -40,7 +40,8 @@ inline proto::VarType::Type ToVarType(int type) {
case proto::VarType::READER:
return static_cast<proto::VarType::Type>(type);
default:
PADDLE_THROW("ToVarType:Unsupported type %d", type);
PADDLE_THROW(platform::errors::Unavailable(
"ToVarType method Unsupported type %d.", type));
}
}
......@@ -66,7 +67,8 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
visitor(var.Get<FetchList>());
return;
default:
PADDLE_THROW("Not supported visit type, %s", ToTypeName(var.Type()));
PADDLE_THROW(platform::errors::Unavailable("Not supported visit type %s.",
ToTypeName(var.Type())));
}
}
......
......@@ -46,12 +46,14 @@ struct VarIdToTypeIndexMapInitializerImpl {
static_assert(!std::is_same<Type, void>::value, "Type cannot be void");
constexpr int kId = VarTypeTrait<Type>::kId;
auto type = std::type_index(typeid(Type));
PADDLE_ENFORCE(id_to_type->count(kId) == 0,
"Registered duplicate type id %d for type %s", kId,
type.name());
PADDLE_ENFORCE(type_to_id->count(type) == 0,
"Registered duplicate type_index %s for id %d", type.name(),
kId);
PADDLE_ENFORCE_EQ(
id_to_type->count(kId), 0,
platform::errors::AlreadyExists(
"Registered duplicate type id %d for type %s.", kId, type.name()));
PADDLE_ENFORCE_EQ(
type_to_id->count(type), 0,
platform::errors::AlreadyExists(
"Registered duplicate type index %s for id %d.", type.name(), kId));
id_to_type->emplace(kId, type);
type_to_id->emplace(type, kId);
VarIdToTypeIndexMapInitializerImpl<kStart + 1, kEnd,
......@@ -79,15 +81,17 @@ struct VarIdToTypeIndexMapHolder {
public:
static const std::type_index &ToTypeIndex(int var_id) {
auto it = Instance().id_to_type_map_.find(var_id);
PADDLE_ENFORCE(it != Instance().id_to_type_map_.end(),
"VarId %d is not registered.", var_id);
PADDLE_ENFORCE_NE(it, Instance().id_to_type_map_.end(),
platform::errors::NotFound(
"Variable Id %d is not registered.", var_id));
return it->second;
}
static int ToTypeId(const std::type_index &type) {
auto it = Instance().type_to_id_map_.find(type);
PADDLE_ENFORCE(it != Instance().type_to_id_map_.end(),
"VarType %s is not registered.", type.name());
PADDLE_ENFORCE_NE(it, Instance().type_to_id_map_.end(),
platform::errors::NotFound(
"Variable Type %s is not registered.", type.name()));
return it->second;
}
......
......@@ -50,11 +50,11 @@ void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
} else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in operator
} else {
PADDLE_THROW(
PADDLE_THROW(platform::errors::Unavailable(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, RAW]",
var_type);
"LOD_RANK_TABLE, PLACE_LIST, READER, RAW].",
var_type));
}
}
......@@ -76,7 +76,8 @@ void CopyVariable(const Variable &src_var, Variable *dst_var) {
auto *dst_t = tmp_grad_slr->mutable_value();
framework::TensorCopy(src_t, cpu_place, dst_t);
} else {
PADDLE_THROW("unknown var type to copy");
PADDLE_THROW(
platform::errors::Unavailable("Unknown variable type to copy."));
}
}
......
......@@ -27,8 +27,9 @@ Analyzer::Analyzer() {}
void Analyzer::Run(Argument *argument) { RunAnalysis(argument); }
void Analyzer::RunAnalysis(Argument *argument) {
PADDLE_ENFORCE(argument->analysis_passes_valid(),
"analsis_passes is not valid in the argument.");
PADDLE_ENFORCE_EQ(argument->analysis_passes_valid(), true,
platform::errors::InvalidArgument(
"analsis_passes is not valid in the argument."));
const bool disable_logs = argument->disable_logs();
for (auto &pass : argument->analysis_passes()) {
if (!disable_logs) {
......@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) {
continue;
auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass);
PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet(
"no analysis pass called %s", pass));
ptr->Run(argument);
}
}
......
......@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) {
std::vector<PaddleTensor> outputs;
CHECK(predictor->Run(slots, &outputs));
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
PADDLE_ENFORCE_EQ(outputs.size(), 1UL,
platform::errors::PreconditionNotMet(
"Output size should be 1, but got %d", outputs.size()));
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL);
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL,
platform::errors::PreconditionNotMet(
"Output's data length should be 33168 but got %d",
outputs.front().data.length()));
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
......
......@@ -79,7 +79,9 @@ struct Argument {
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE(Has(#field__), "There is no such field"); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return field__##_; \
} \
void Set##Field(const type__& x) { \
......@@ -98,8 +100,11 @@ struct Argument {
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE_NOT_NULL(field__##_); \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \
"filed should not be null.")); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return *static_cast<type__*>(field__##_.get()); \
} \
void Set##Field(type__* x) { \
......@@ -113,11 +118,15 @@ struct Argument {
} \
DECL_ARGUMENT_FIELD_VALID(field__); \
type__* field__##_ptr() { \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.get()); \
} \
type__* Release##Field() { \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
......@@ -218,13 +227,19 @@ struct Argument {
DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
int);
private:
std::unordered_set<std::string> valid_fields_;
};
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
PADDLE_ENFORCE(argument__->Has(#fieldname__), \
"the argument field [%s] should be set", #fieldname__);
PADDLE_ENFORCE_EQ( \
argument__->Has(#fieldname__), true, \
platform::errors::PreconditionNotMet( \
"the argument field [%s] should be set", #fieldname__));
} // namespace analysis
} // namespace inference
......
......@@ -73,12 +73,15 @@ struct DataTypeNamer {
template <typename T>
const std::string &repr() const {
auto x = std::type_index(typeid(T));
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation");
PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(x);
}
const std::string &repr(const std::type_index &type) const { // NOLINT
PADDLE_ENFORCE(dic_.count(type), "unknown type for representation");
PADDLE_ENFORCE_GT(dic_.count(type), 0,
platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(type);
}
......@@ -116,7 +119,9 @@ template <typename T>
class OrderedRegistry {
public:
T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name);
PADDLE_ENFORCE_EQ(dic_.count(name), 0,
platform::errors::PreconditionNotMet(
"There exists duplicate key [%s]", name));
dic_[name] = elements_.size();
elements_.emplace_back(std::unique_ptr<T>(x));
return elements_.back().get();
......@@ -136,14 +141,20 @@ class OrderedRegistry {
template <typename T>
T &GetFromScope(const framework::Scope &scope, const std::string &name) {
framework::Variable *var = scope.FindVar(name);
PADDLE_ENFORCE(var != nullptr);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"The var which name is %s should not be nullptr.", name));
return *var->GetMutable<T>();
}
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(fin.is_open(), "Cannot open file %s", model_path);
PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
......@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) {
static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) {
std::string opt_cache_dir = model_root + "/_opt_cache/";
if (!PathExists(opt_cache_dir)) {
PADDLE_ENFORCE(MKDIR(opt_cache_dir.c_str()) != -1,
PADDLE_ENFORCE_NE(
MKDIR(opt_cache_dir.c_str()), -1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
opt_cache_dir);
opt_cache_dir));
}
return opt_cache_dir;
}
......
......@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) {
graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
if (argument->Has("scope")) {
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
......@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument,
std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid,
PADDLE_ENFORCE_EQ(
int8_valid, true,
platform::errors::PreconditionNotMet(
"When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
"config.SetOptimCacheDir()");
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
"config.SetOptimCacheDir()"));
PADDLE_ENFORCE_EQ(
!(model_from_memory && use_static_engine), true,
platform::errors::PreconditionNotMet(
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false.");
"from memory, you should set the use_static to false."));
if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
......@@ -150,6 +156,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
}
disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") {
......
......@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
lite_api::TargetType target_type;
if (use_gpu) {
......@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible.
paddle::lite::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kInt64)}),
paddle::lite::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
paddle::lite_api::Place({target_type, precision_type}),
paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
};
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size;
if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
......
......@@ -123,7 +123,9 @@ void RenameAndGetOutputs(
auto add_block_var = [&](const std::string &graph_arg,
const std::string &block_arg) {
auto arg_var_node = graph_var_map.find(graph_arg);
PADDLE_ENFORCE(arg_var_node != graph_var_map.end());
PADDLE_ENFORCE_NE(arg_var_node, graph_var_map.end(),
platform::errors::InvalidArgument(
"Can not find %s in graph_var_map", graph_arg));
auto *var_t = block_desc->Var(block_arg);
var_t->SetShape(arg_var_node->second->Var()->GetShape());
var_t->SetDataType(arg_var_node->second->Var()->GetDataType());
......@@ -133,7 +135,10 @@ void RenameAndGetOutputs(
framework::proto::OpDesc *op = block_desc->Op(index)->Proto();
framework::OpDesc op_desc(*op, nullptr);
auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type());
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type(),
platform::errors::PreconditionNotMet(
"We should get %s, but get %s", op->type(),
correspond_node->Name()));
std::unordered_map<std::string, size_t> var2id;
std::unordered_map<std::string, framework::ir::Node *> in_vars;
......
......@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::vector<std::string> *repetitive_params) const {
auto *op_desc = node->Op();
auto &subgraph = *framework::ir::Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty());
PADDLE_ENFORCE_EQ(subgraph.empty(), false,
platform::errors::PreconditionNotMet(
"The subgraph should not be empty."));
framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program");
......@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// to Tensor.
std::vector<std::string> output_mapping;
for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0);
PADDLE_ENFORCE_NE(output_name_map.count(name), 0,
platform::errors::PreconditionNotMet(
"The output_name_map should have %s", name));
output_mapping.push_back(output_name_map[name]);
}
PADDLE_ENFORCE(!output_mapping.empty());
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
"the block has no var-desc");
PADDLE_ENFORCE_EQ(output_mapping.empty(), false,
platform::errors::PreconditionNotMet(
"The output_mapping should not be empty."));
PADDLE_ENFORCE_EQ(
!block_desc.Proto()->vars().empty(), true,
platform::errors::PreconditionNotMet("the block has no var-desc"));
// Set attrs
op_desc->SetType("tensorrt_engine");
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include <memory>
#include <utility>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
......@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
// Apply passes.
IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph));
PADDLE_ENFORCE_GT(graph->Nodes().size(), 0);
PADDLE_ENFORCE_GT(
graph->Nodes().size(), 0,
platform::errors::PreconditionNotMet(
"The graph nodes size should be greater than 0, but got 0"));
argument->SetMainGraph(graph.release());
CollectFusionStatis(argument);
}
......
......@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if (!argument->scope_valid()) {
argument->SetScope(new framework::Scope);
}
PADDLE_ENFORCE(argument->use_gpu_valid());
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
// The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying
......@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument->model_from_memory_valid() && argument->model_from_memory());
argument->SetMainProgram(program.release());
} else {
PADDLE_THROW(
"either model_dir or (program path and parameter path) should be set.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"either model_dir or (program path and parameter path) should be "
"set."));
}
auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program()));
argument->SetMainGraph(graph.release());
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
......
......@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) {
std::unordered_set<const framework::ir::Node*> invalid_nodes;
int valid_op = 0;
for (auto* node : graph.Nodes()) {
PADDLE_ENFORCE_NOT_NULL(node);
PADDLE_ENFORCE_NOT_NULL(node, platform::errors::PreconditionNotMet(
"The node should not be nullptr."));
if (is_valid_node(node)) {
invalid_nodes.insert(node);
} else if (node->IsOp()) {
......
......@@ -23,8 +23,12 @@ namespace inference {
namespace analysis {
void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE(argument->scope_valid());
PADDLE_ENFORCE(argument->use_gpu_valid());
PADDLE_ENFORCE_EQ(
argument->scope_valid(), true,
platform::errors::PreconditionNotMet("The scope field should be valid"));
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
platform::Place place;
......@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
LOG(INFO) << "Sync params from CPU to GPU";
PADDLE_ENFORCE(argument->gpu_device_id_valid());
PADDLE_ENFORCE_EQ(argument->gpu_device_id_valid(), true,
platform::errors::PreconditionNotMet(
"The gpu_device_id field should be valid"));
place = platform::CUDAPlace(argument->gpu_device_id());
auto *scope = argument->scope_ptr();
......@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
continue;
}
auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE(var != nullptr);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"The var should not be nullptr"));
if (var->IsType<framework::LoDTensor>() ||
var->IsType<framework::Tensor>()) {
auto *t = var->GetMutable<framework::LoDTensor>();
......
......@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse(
// modify the graph
for (auto input_node : node->inputs) {
PADDLE_ENFORCE(input_node->IsVar());
PADDLE_ENFORCE_EQ(input_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The input node should be a variable."));
std::string input_node_name = input_node->Name();
if (reuse_table.count(input_node_name) &&
reuse_table.at(input_node_name) != input_node_name) {
......@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse(
// modify the graph
for (auto out_node : node->outputs) {
PADDLE_ENFORCE(out_node->IsVar());
PADDLE_ENFORCE_EQ(out_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The output node should be a variable."));
std::string out_node_name = out_node->Name();
if (reuse_table.count(out_node_name) &&
reuse_table.at(out_node_name) != out_node_name) {
......
......@@ -53,12 +53,10 @@ if(WITH_TESTING)
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_fluid_shared
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${PYTHON_TESTS_DIR}/book)
set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
set_tests_properties(test_api_impl PROPERTIES LABELS "RUN_TYPE=DIST")
elseif(WIN32)
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${PYTHON_TESTS_DIR}/book)
set_tests_properties(test_api_impl PROPERTIES DEPENDS test_image_classification)
set_tests_properties(test_api_impl PROPERTIES LABELS "RUN_TYPE=DIST")
endif()
endif()
......
......@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet.");
platform::errors::PreconditionNotMet(
"MkldnnQuantizer was not enabled yet."));
return mkldnn_quantizer_config_.get();
}
......
......@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope(
if (parent_scope) {
PADDLE_ENFORCE_NOT_NULL(
parent_scope,
"Both program and parent_scope should be set in Clone mode.");
platform::errors::PreconditionNotMet(
"Both program and parent_scope should be set in Clone mode."));
scope_ = parent_scope;
status_is_cloned_ = true;
} else {
......@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare(sub_scope_, *inference_program_, 0,
config_.use_feed_fetch_ops_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
return true;
}
......@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
timer.tic();
// set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(scope, "The scope should not be nullptr.");
PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
......@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
......@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() {
if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir());
} else {
PADDLE_ENFORCE(
!config_.params_file().empty(),
"Either model_dir or (param_file, prog_file) should be set.");
PADDLE_ENFORCE(!config_.prog_file().empty());
PADDLE_ENFORCE_EQ(config_.params_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or param_file should be set."));
PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or prog_file should be set."));
std::string dir = inference::analysis::GetDirRoot(config_.prog_file());
argument_.SetModelProgramPath(config_.prog_file());
......@@ -461,6 +471,8 @@ void AnalysisPredictor::PrepareArgument() {
}
if (config_.lite_engine_enabled()) {
argument_.SetCpuMathLibraryNumThreads(
config_.cpu_math_library_num_threads());
argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
......@@ -501,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
PrepareArgument();
Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid());
PADDLE_ENFORCE_EQ(
argument_.scope_valid(), true,
platform::errors::InvalidArgument("The argument scope should be valid."));
VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
inference_program_.reset(
......@@ -523,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
FLAGS_minloglevel = 2; // GLOG_ERROR
}
VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE(config.is_valid(),
"Note: Each config can only be used for one predictor.");
PADDLE_ENFORCE_EQ(
config.is_valid(), true,
platform::errors::InvalidArgument(
"Note: Each config can only be used for one predictor."));
if (config.use_gpu()) {
static std::once_flag gflags_initialized;
......@@ -621,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() {
}
void AnalysisPredictor::PrepareFeedFetch() {
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::InvalidArgument(
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
......@@ -644,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() {
}
void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope);
PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument(
"The scope should not be nullptr."));
auto *var = scope->Var("feed");
var->GetMutable<framework::FeedList>();
var = scope->Var("fetch");
......@@ -665,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() {
std::vector<std::string> names = GetInputNames();
for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "input %s does not exist.", name);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"Input %s does not exist.", name));
input_shapes[name] = var->GetShape();
}
return input_shapes;
......@@ -681,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"The variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = true;
......@@ -698,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"he variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = false;
......@@ -759,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() {
std::string pb_content;
// Read binary
std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin.is_open()), "Cannot open file %s",
filename);
PADDLE_ENFORCE_EQ(
static_cast<bool>(fin.is_open()), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
filename));
fin.seekg(0, std::ios::end);
pb_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
......@@ -777,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() {
bool AnalysisPredictor::LoadParameters() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
"The inference program should be loaded first.");
platform::errors::PreconditionNotMet(
"The inference program should be loaded first."));
const auto &global_block = inference_program_->MutableBlock(0);
......@@ -853,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() {
#if PADDLE_WITH_TENSORRT
bool AnalysisPredictor::SaveTrtCalibToDisk() {
PADDLE_ENFORCE(config_.tensorrt_engine_enabled(),
"This func can be invoked only in trt mode");
PADDLE_ENFORCE_EQ(config_.tensorrt_engine_enabled(), true,
platform::errors::PreconditionNotMet(
"This func can be invoked only in trt mode"));
auto &block = inference_program_->Block(0);
for (auto &op_desc : block.AllOps()) {
if (op_desc->Type() == "tensorrt_engine") {
......
......@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
if (other.length() && other.data())
memcpy(data_, other.data(), other.length());
else if (other.length())
PADDLE_THROW(
PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid argument, null pointer data with length %u is passed",
other.length());
other.length()));
length_ = other.length();
memory_owned_ = true;
......@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) {
length_ = length;
memory_owned_ = true;
} else {
PADDLE_THROW("The memory is allocated externally, can not Resized");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"The memory is allocated externally, can not Resized"));
}
}
......@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) {
void PaddleBuf::Free() {
if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0UL);
PADDLE_ENFORCE_GT(
length_, 0UL,
platform::errors::PreconditionNotMet(
"The memory used in PaddleBuf %d should be greater than 0",
length_));
delete[] static_cast<char *>(data_);
data_ = nullptr;
length_ = 0;
......
......@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init(
if (parent_scope) {
scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope());
PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
} else {
paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope());
......@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(cls.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init";
return nullptr;
......@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false;
}
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
PADDLE_ENFORCE_NOT_NULL(input_ptr,
platform::errors::InvalidArgument(
"The input_ptr should not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(
inputs[i].data.data(),
platform::errors::InvalidArgument(
"The data of input tensor should not be null."));
if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
......@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
platform::CPUPlace(), inputs[i].data.data(),
inputs[i].data.length(), dev_ctx->stream());
#else
PADDLE_THROW("Not compile with CUDA, should not reach here.");
PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif
}
......@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
......@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memory
PADDLE_ENFORCE_GE(
config.fraction_of_gpu_memory, 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
platform::errors::InvalidArgument(
"fraction_of_gpu_memory in the config should be set "
"to range (0., 1.]"));
PADDLE_ENFORCE_GE(config.device, 0,
platform::errors::PreconditionNotMet(
"Invalid device id %d, the device id should be "
"greater than or equal to 0.",
config.device));
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
......@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get()));
dynamic_cast<NativePaddlePredictor *>(predictor.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr;
}
......
......@@ -21,15 +21,21 @@
namespace paddle {
void ZeroCopyTensor::Reshape(const std::vector<int> &shape) {
PADDLE_ENFORCE(!name_.empty(),
PADDLE_ENFORCE_EQ(
name_.empty(), false,
platform::errors::PreconditionNotMet(
"Need to SetName first, so that the corresponding tensor can "
"be retrieved.");
PADDLE_ENFORCE(input_or_output_,
"Can't reshape the output tensor, it is readonly");
PADDLE_ENFORCE(scope_);
"be retrieved."));
PADDLE_ENFORCE_EQ(input_or_output_, true,
platform::errors::PermissionDenied(
"Can't reshape the output tensor, it is readonly"));
PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
auto *scope = static_cast<framework::Scope *>(scope_);
auto *var = scope->FindVar(name_);
PADDLE_ENFORCE(var, "No tensor called [%s] in the runtime scope", name_);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"No tensor called [%s] in the runtime scope", name_));
auto *tensor = var->GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(shape));
}
......@@ -45,8 +51,10 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
EAGER_GET_TENSOR;
PADDLE_ENFORCE_GT(
tensor->numel(), 0,
"You should call ZeroCopyTensor::Reshape(const std::vector<int> &shape)"
"function before retrieving mutable_data from input tensor.");
platform::errors::PreconditionNotMet(
"You should call ZeroCopyTensor::Reshape(const std::vector<int> "
"&shape)"
"function before retrieving mutable_data from input tensor."));
switch (static_cast<int>(place)) {
case static_cast<int>(PaddlePlace::kCPU): {
return tensor->mutable_data<T>(platform::CPUPlace());
......@@ -55,7 +63,8 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
return tensor->mutable_data<T>(platform::CUDAPlace(device_));
}
default:
PADDLE_THROW("Unsupported place: %d", static_cast<int>(place));
PADDLE_THROW(platform::errors::Unavailable("Unsupported place: %d",
static_cast<int>(place)));
break;
}
return nullptr;
......@@ -96,10 +105,11 @@ PaddleDType ZeroCopyTensor::type() const {
template <typename T>
void ZeroCopyTensor::copy_from_cpu(const T *data) {
EAGER_GET_TENSOR;
PADDLE_ENFORCE_GE(
tensor->numel(), 0,
"You should call ZeroCopyTensor::Reshape(const std::vector<int> &shape)"
"function before copying data from cpu.");
PADDLE_ENFORCE_GE(tensor->numel(), 0,
platform::errors::PreconditionNotMet(
"You should call ZeroCopyTensor::Reshape(const "
"std::vector<int> &shape)"
"function before copying data from cpu."));
size_t ele_size = tensor->numel() * sizeof(T);
if (place_ == PaddlePlace::kCPU) {
......@@ -116,7 +126,8 @@ void ZeroCopyTensor::copy_from_cpu(const T *data) {
memory::Copy(gpu_place, static_cast<void *>(t_data), platform::CPUPlace(),
data, ele_size, dev_ctx->stream());
#else
PADDLE_THROW("Not compiled with CUDA, should not reach here.");
PADDLE_THROW(platform::errors::Unavailable(
"Not compiled with CUDA, should not reach here."));
#endif
}
}
......@@ -141,7 +152,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
cudaStreamSynchronize(dev_ctx->stream());
#else
PADDLE_THROW("Not compile with CUDA, should not reach here.");
PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif
}
}
......@@ -176,20 +188,27 @@ template PD_INFER_DECL uint8_t *ZeroCopyTensor::mutable_data<uint8_t>(
PaddlePlace place);
void *ZeroCopyTensor::FindTensor() const {
PADDLE_ENFORCE(!name_.empty(),
PADDLE_ENFORCE_EQ(
name_.empty(), false,
platform::errors::PreconditionNotMet(
"Need to SetName first, so that the corresponding tensor can "
"be retrieved.");
PADDLE_ENFORCE(scope_);
"be retrieved."));
PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
auto *scope = static_cast<framework::Scope *>(scope_);
auto *var = scope->FindVar(name_);
PADDLE_ENFORCE(var, "No tensor called [%s] in the runtime scope", name_);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"No tensor called [%s] in the runtime scope", name_));
auto *tensor = var->GetMutable<framework::LoDTensor>();
return tensor;
}
std::vector<int> ZeroCopyTensor::shape() const {
EAGER_GET_TENSOR;
PADDLE_ENFORCE(tensor_, "not found tensor called %s in the scope", name_);
PADDLE_ENFORCE_NOT_NULL(
tensor_, platform::errors::PreconditionNotMet(
"Not found tensor called %s in the scope", name_));
return framework::vectorize<int>(tensor->dims());
}
......
......@@ -112,16 +112,19 @@ static T convert(const std::string &item,
std::string message =
"invalid_argument exception when try to convert : " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid_argument exception when try to convert %s.", item));
} catch (std::out_of_range &e) {
std::string message =
"out_of_range exception when try to convert : " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"out_of_range exception when try to convert %s.", item));
} catch (...) {
std::string message = "unexpected exception when try to convert " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"unexpected exception when try to convert %s.", item));
}
return res;
}
......@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) {
PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size.");
PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument(
"Non-positive batch size."));
double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======";
......
......@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
PADDLE_ENFORCE(var->IsType<LoDTensor>(),
"Only support lod tensor now.");
PADDLE_ENFORCE_NOT_NULL(var,
platform::errors::PreconditionNotMet(
"%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"Only support lod tensor now."));
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
// force unsigned type if already know it
......@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
} else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
PADDLE_ENFORCE_NE(
scales_.find(input_var_name), scales_.end(),
platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
"output scales to infer if output is unsigned."));
if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name];
}
......@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
is_unsigned = true;
double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(
scales_.find(input_var_name) != scales_.end(),
PADDLE_ENFORCE_NE(
scales_.find(input_var_name), scales_.end(),
platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
"output scales to infer if output is unsigned."));
is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min(
min_scale,
......@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE(
var_tensor.numel() > 0,
PADDLE_ENFORCE_GT(
var_tensor.numel(), 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op "
"%s of connection %s should not be empty.",
var_name, op_type_name, conn_name);
var_name, op_type_name, conn_name));
switch (rule) {
case ScaleAlgo::MAX:
......@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
float min_val = eigen_tensor.minCoeff();
bool is_positive = min_val >= 0.0f;
if (is_unsigned)
PADDLE_ENFORCE(
is_positive,
PADDLE_ENFORCE_EQ(
is_positive, true,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
int num_quantized_bins = 255;
......@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
float max_abs = eigen_tensor.abs().maxCoeff();
float min_val = eigen_tensor.minCoeff();
if (is_unsigned)
PADDLE_ENFORCE(
min_val >= 0.0f,
PADDLE_ENFORCE_GE(
min_val, 0.0f,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs;
......@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
std::pair<bool, LoDTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const {
PADDLE_ENFORCE(var_tensor.dims().size() > 0, "Tensor dimension is empty.");
PADDLE_ENFORCE_GT(
var_tensor.dims().size(), 0,
platform::errors::InvalidArgument("Tensor dimension is empty."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1};
float min_val = eigen_tensor.minCoeff();
if (is_unsigned)
PADDLE_ENFORCE(
min_val >= 0.0f,
PADDLE_ENFORCE_GE(
min_val, 0.0f,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
auto dims = var_tensor.dims();
constexpr int num_col_dims = 1;
......@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
const framework::LoDTensor& var_tensor, float min_val, float max_val,
size_t num_bins) const {
PADDLE_ENFORCE_GT(num_bins, 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, num_bins (" +
std::to_string(num_bins) + ") must be positive.");
PADDLE_ENFORCE_GT(
var_tensor.numel(), 0,
"MkldnnQuantizer: To calculate Histogram, the tensor must not be empty.");
PADDLE_ENFORCE(max_val >= min_val,
std::to_string(num_bins) + ") must be positive."));
PADDLE_ENFORCE_GT(var_tensor.numel(), 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, the tensor "
"must not be empty."));
PADDLE_ENFORCE_GE(max_val, min_val,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, max_val (" +
std::to_string(max_val) +
") must be greater or equal"
std::to_string(max_val) + ") must be greater or equal"
"to min_val (" +
std::to_string(min_val) + ").");
std::to_string(min_val) + ")."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1};
auto bin_width = std::abs(max_val - min_val) / num_bins;
......@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program()));
arg.SetMainGraph(graph.release());
auto* scope_ptr = arg.scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
auto* builder = predictor_.config_.pass_builder();
......@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const {
PrepareArgument();
auto& arg = predictor_.argument_;
Analyzer().Run(&arg);
PADDLE_ENFORCE(arg.scope_valid());
PADDLE_ENFORCE_EQ(
arg.scope_valid(), true,
platform::errors::PreconditionNotMet("The scope should be valid."));
VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program);
predictor_.inference_program_.reset(
......@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
VLOG(3) << "Predictor: run a quantization warmup iteration";
auto warmup_data = qconfig_->warmup_data();
PADDLE_ENFORCE_NOT_NULL(warmup_data,
"Warmup data cannot be NULL in the config.");
platform::errors::PreconditionNotMet(
"Warmup data cannot be NULL in the config."));
PrettyLogH1("--- Running warmup iteration for quantization");
// Run the inference program
......@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
std::vector<int> reference_distr_P, int P_sum,
std::vector<int> candidate_distr_Q, int Q_sum) const {
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size());
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size(),
platform::errors::InvalidArgument(
"The P size %d should be equal to Q size %d",
reference_distr_P.size(), candidate_distr_Q.size()));
float tmp_sum1 = 0;
float tmp_sum2 = 0;
for (size_t idx = 0; idx < reference_distr_P.size(); idx++) {
......@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
tmp_sum1 += 0;
tmp_sum2 += 0;
} else {
PADDLE_ENFORCE(q_idx != 0, "MkldnnQuantizer: Fatal error!, idx = " +
std::to_string(idx) +
" qindex = 0! p_idx = " +
std::to_string(p_idx));
PADDLE_ENFORCE_NE(
q_idx, 0,
platform::errors::PreconditionNotMet(
"MkldnnQuantizer: Fatal error!, idx = " + std::to_string(idx) +
" qindex = 0! p_idx = " + std::to_string(p_idx)));
}
tmp_sum1 += p_idx * (log(Q_sum * p_idx));
tmp_sum2 += p_idx * (log(P_sum * q_idx));
......
......@@ -31,12 +31,30 @@ limitations under the License. */
#include "paddle_analysis_config.h" // NOLINT
#include "paddle_api.h" // NOLINT
///
/// \file paddle_inference_api.h
///
/// \brief Paddle Inference API
///
/// \author paddle-infer@baidu.com
/// \date 2020-09-01
/// \since 2.0.0-beta
///
namespace paddle_infer {
using DataType = paddle::PaddleDType;
using PlaceType = paddle::PaddlePlace;
using PrecisionType = paddle::AnalysisConfig::Precision;
using Config = paddle::AnalysisConfig;
///
/// \class Tensor
///
/// \brief Represents an n-dimensional array of values.
/// The Tensor is used to store the input or output of the network.
/// It is obtained through Predictor::GetinputHandle()
/// and Predictor::GetOutputHandle() interface.
///
class PD_INFER_DECL Tensor {
public:
// Can only be created by predictor->GetInputHandle(cosnt std::string& name)
......@@ -44,60 +62,186 @@ class PD_INFER_DECL Tensor {
Tensor() = delete;
explicit Tensor(std::unique_ptr<paddle::ZeroCopyTensor>&& tensor)
: tensor_(std::move(tensor)) {}
///
/// \brief Reset the shape of the tensor.
/// Generally it's only used for the input tensor.
/// Reshape must be called before calling mutable_data() or CopyFromCpu()
/// \param shape The shape to set.
///
void Reshape(const std::vector<int>& shape);
///
/// \brief Copy the host memory to tensor data.
/// It's usually used to set the input tensor data.
/// \param data The pointer of the data, from which the tensor will copy.
///
template <typename T>
void CopyFromCpu(const T* data);
// should add the place
///
/// \brief Get the memory pointer in CPU or GPU with specific data type.
/// Please Reshape the tensor first before call this.
/// It's usually used to get input data pointer.
/// \param place The place of the tensor.
/// \return The tensor data buffer pointer.
///
template <typename T>
T* mutable_data(PlaceType place);
///
/// \brief Copy the tensor data to the host memory.
/// It's usually used to get the output tensor data.
/// \param[out] data The tensor will copy the data to the address.
///
template <typename T>
void CopyToCpu(T* data);
///
/// \brief Get the memory pointer directly.
/// It's usually used to get the output data pointer.
/// \param[out] place To get the device type of the tensor.
/// \param[out] size To get the data size of the tensor.
/// \return The tensor data buffer pointer.
///
template <typename T>
T* data(PlaceType* place, int* size) const;
///
/// \brief Set lod info of the tensor.
/// More about LOD can be seen here:
/// https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor
/// \param x the lod info.
///
void SetLoD(const std::vector<std::vector<size_t>>& x);
/// \brief Return the lod info of the tensor.
std::vector<std::vector<size_t>> lod() const;
/// \brief Return the data type of the tensor.
/// It's usually used to get the output tensor data type.
/// \return The data type of the tensor.
DataType type() const;
/// \brief Return the shape of the Tensor.
std::vector<int> shape() const;
/// \brief Return the name of the tensor.
const std::string& name() const;
private:
std::unique_ptr<paddle::ZeroCopyTensor> tensor_;
};
///
/// \class Predictor
///
/// \brief Predictor is the interface for model prediction.
///
/// The predictor has the following typical uses:
///
/// Get predictor
/// \code{cpp}
/// auto predictor = CreatePredictor(config);
/// \endcode
///
/// Get input or output names
/// \code{cpp}
/// auto input_names = predictor->GetInputNames();
/// auto output_names = predictor->GetOutputNames();
/// \endcode
///
/// Get input or output handle
/// \code{cpp}
/// auto input_t = predictor->GetInputHandle(input_names[0]);
/// auto output_t = predictor->GetOutputHandle(output_names[0]);
/// \endcode
///
/// Run predictor
/// \code{cpp}
/// predictor->Run();
/// \endcode
///
class PD_INFER_DECL Predictor {
public:
Predictor() = default;
Predictor() = delete;
~Predictor() {}
// Use for clone
explicit Predictor(std::unique_ptr<paddle::PaddlePredictor>&& pred)
: predictor_(std::move(pred)) {}
///
/// \brief Construct a new Predictor object
///
/// \param[in] Config config
///
explicit Predictor(const Config& config);
///
/// \brief Get the input names
///
/// \return input names
///
std::vector<std::string> GetInputNames();
///
/// \brief Get the Input Tensor object
///
/// \param[in] name input name
/// \return input tensor
///
std::unique_ptr<Tensor> GetInputHandle(const std::string& name);
///
/// \brief Run the prediction engine
///
/// \return Whether the function executed successfully
///
bool Run();
///
/// \brief Get the output names
///
/// \return output names
///
std::vector<std::string> GetOutputNames();
///
/// \brief Get the Output Tensor object
///
/// \param[in] name otuput name
/// \return output tensor
///
std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
///
/// \brief Clone to get the new predictor. thread safe.
///
/// \return get a new predictor
///
std::unique_ptr<Predictor> Clone();
/// \brief Clear the intermediate tensors of the predictor
void ClearIntermediateTensor();
private:
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
///
/// \brief A factory to help create predictors.
///
/// Usage:
///
/// \code{.cpp}
/// Config config;
/// ... // change the configs.
/// auto predictor = CreatePredictor(config);
/// \endcode
///
PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor(
const Config& config); // NOLINT
PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype);
PD_INFER_DECL std::string GetVersion();
......@@ -128,13 +272,24 @@ T* Tensor::data(PlaceType* place, int* size) const {
namespace paddle_infer {
namespace services {
///
/// \class PredictorPool
///
/// \brief PredictorPool is a simple encapsulation of Predictor, suitable for
/// use in multi-threaded situations. According to the thread id, the
/// corresponding Predictor is taken out from PredictorPool to complete the
/// prediction.
///
class PD_INFER_DECL PredictorPool {
public:
PredictorPool() = delete;
PredictorPool(const PredictorPool&) = delete;
PredictorPool& operator=(const PredictorPool&) = delete;
/// \brief Construct the predictor pool with \param size predictor instances.
explicit PredictorPool(const Config& config, size_t size = 1);
/// \brief Get \param id-th predictor.
Predictor* Retrive(size_t idx);
private:
......
......@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_bfloat16_) {
passes_.push_back("cpu_bfloat16_placement_pass");
passes_.push_back("cpu_bfloat16_pass");
}
use_mkldnn_bfloat16_ = true;
#else
use_mkldnn_bfloat16_ = false;
......
......@@ -16,6 +16,7 @@
#include <vector>
#include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType;
......@@ -34,27 +35,37 @@ void PD_DeletePaddleBuf(PD_PaddleBuf* buf) {
}
void PD_PaddleBufResize(PD_PaddleBuf* buf, size_t length) {
PADDLE_ENFORCE_NOT_NULL(buf);
PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
buf->buf.Resize(length);
}
void PD_PaddleBufReset(PD_PaddleBuf* buf, void* data, size_t length) {
PADDLE_ENFORCE_NOT_NULL(buf);
PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
buf->buf.Reset(data, length);
}
bool PD_PaddleBufEmpty(PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(buf);
PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
return buf->buf.empty();
}
void* PD_PaddleBufData(PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(buf);
PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
return buf->buf.data();
}
size_t PD_PaddleBufLength(PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(buf);
PADDLE_ENFORCE_NOT_NULL(buf,
paddle::platform::errors::InvalidArgument(
"The pointer of Buffer shouldn't be nullptr"));
return buf->buf.length();
}
......
......@@ -18,7 +18,6 @@
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using PD_PaddleDType = paddle::PaddleDType;
using PD_ACPrecision = paddle::AnalysisConfig::Precision;
......
......@@ -20,6 +20,7 @@
#include <vector>
#include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType;
......@@ -40,7 +41,10 @@ void PD_DeleteAnalysisConfig(PD_AnalysisConfig* config) {
void PD_SetModel(PD_AnalysisConfig* config, const char* model_dir,
const char* params_path) {
LOG(INFO) << model_dir;
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
LOG(INFO) << std::string(model_dir);
if (!params_path) {
config->config.SetModel(std::string(model_dir));
......@@ -50,104 +54,164 @@ void PD_SetModel(PD_AnalysisConfig* config, const char* model_dir,
}
void PD_SetProgFile(PD_AnalysisConfig* config, const char* x) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetProgFile(std::string(x));
}
void PD_SetParamsFile(PD_AnalysisConfig* config, const char* x) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetParamsFile(std::string(x));
}
void PD_SetOptimCacheDir(PD_AnalysisConfig* config, const char* opt_cache_dir) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetOptimCacheDir(std::string(opt_cache_dir));
}
const char* PD_ModelDir(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.model_dir().c_str();
}
const char* PD_ProgFile(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.prog_file().c_str();
}
const char* PD_ParamsFile(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.params_file().c_str();
}
void PD_EnableUseGpu(PD_AnalysisConfig* config, int memory_pool_init_size_mb,
int device_id) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableUseGpu(static_cast<uint64_t>(memory_pool_init_size_mb),
device_id);
}
void PD_DisableGpu(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.DisableGpu();
}
bool PD_UseGpu(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.use_gpu();
}
int PD_GpuDeviceId(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.gpu_device_id();
}
int PD_MemoryPoolInitSizeMb(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.memory_pool_init_size_mb();
}
float PD_FractionOfGpuMemoryForPool(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.fraction_of_gpu_memory_for_pool();
}
void PD_EnableCUDNN(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableCUDNN();
}
bool PD_CudnnEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.cudnn_enabled();
}
void PD_SwitchIrOptim(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchIrOptim(x);
}
bool PD_IrOptim(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.ir_optim();
}
void PD_SwitchUseFeedFetchOps(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchUseFeedFetchOps(x);
}
bool PD_UseFeedFetchOpsEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.use_feed_fetch_ops_enabled();
}
void PD_SwitchSpecifyInputNames(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchSpecifyInputNames(x);
}
bool PD_SpecifyInputName(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.specify_input_name();
}
......@@ -155,110 +219,168 @@ void PD_EnableTensorRtEngine(PD_AnalysisConfig* config, int workspace_size,
int max_batch_size, int min_subgraph_size,
Precision precision, bool use_static,
bool use_calib_mode) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableTensorRtEngine(
workspace_size, max_batch_size, min_subgraph_size,
paddle::ConvertToACPrecision(precision), use_static, use_calib_mode);
}
bool PD_TensorrtEngineEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.tensorrt_engine_enabled();
}
void PD_SwitchIrDebug(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SwitchIrDebug(x);
}
void PD_EnableMKLDNN(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMKLDNN();
}
void PD_SetMkldnnCacheCapacity(PD_AnalysisConfig* config, int capacity) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetMkldnnCacheCapacity(capacity);
}
bool PD_MkldnnEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.mkldnn_enabled();
}
void PD_SetCpuMathLibraryNumThreads(PD_AnalysisConfig* config,
int cpu_math_library_num_threads) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetCpuMathLibraryNumThreads(cpu_math_library_num_threads);
}
int PD_CpuMathLibraryNumThreads(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.cpu_math_library_num_threads();
}
void PD_EnableMkldnnQuantizer(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMkldnnQuantizer();
}
bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.mkldnn_quantizer_enabled();
}
void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMkldnnBfloat16();
}
bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.mkldnn_bfloat16_enabled();
}
void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer,
size_t prog_buffer_size, const char* params_buffer,
size_t params_buffer_size) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetModelBuffer(prog_buffer, prog_buffer_size, params_buffer,
params_buffer_size);
}
bool PD_ModelFromMemory(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.model_from_memory();
}
void PD_EnableMemoryOptim(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableMemoryOptim();
}
bool PD_MemoryOptimEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.enable_memory_optim();
}
void PD_EnableProfile(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.EnableProfile();
}
bool PD_ProfileEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.profile_enabled();
}
void PD_SetInValid(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
config->config.SetInValid();
}
bool PD_IsValid(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
return config->config.is_valid();
}
......
......@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType;
......@@ -81,7 +82,10 @@ extern "C" {
bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
int in_size, PD_Tensor** output_data, int* out_size,
int batch_size) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
VLOG(3) << "Predoctor: PD_PredictorRun. ";
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors;
......@@ -111,7 +115,10 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int* out_size) {
PADDLE_ENFORCE_NOT_NULL(config);
PADDLE_ENFORCE_NOT_NULL(
config,
paddle::platform::errors::InvalidArgument(
"The pointer of analysis configuration shouldn't be nullptr"));
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors;
if (!predictors.count(config->config.model_dir())) {
......@@ -144,7 +151,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
input_t->copy_from_cpu(static_cast<uint8_t*>(inputs[i].data));
break;
default:
CHECK(false) << "Unsupport data type.";
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Unsupported data type."));
break;
}
}
......@@ -227,7 +235,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor,
input->copy_from_cpu(static_cast<uint8_t*>(tensor->data.data));
break;
default:
CHECK(false) << "Unsupport data type.";
PADDLE_THROW(
paddle::platform::errors::InvalidArgument("Unsupported data type."));
break;
}
......@@ -294,7 +303,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
output->copy_to_cpu(reinterpret_cast<uint8_t*>(tensor->data.data));
break;
default:
CHECK(false) << "Unsupport data type.";
PADDLE_THROW(
paddle::platform::errors::InvalidArgument("Unsupported data type."));
break;
}
}
......
......@@ -19,6 +19,7 @@
#include <vector>
#include "paddle/fluid/inference/capi/c_api_internal.h"
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/platform/enforce.h"
using paddle::ConvertToACPrecision;
using paddle::ConvertToPaddleDType;
......@@ -37,44 +38,60 @@ void PD_DeletePaddleTensor(PD_Tensor* tensor) {
}
void PD_SetPaddleTensorName(PD_Tensor* tensor, char* name) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.name = std::string(name);
}
void PD_SetPaddleTensorDType(PD_Tensor* tensor, PD_DataType dtype) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.dtype = paddle::ConvertToPaddleDType(dtype);
}
void PD_SetPaddleTensorData(PD_Tensor* tensor, PD_PaddleBuf* buf) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.data = buf->buf;
}
void PD_SetPaddleTensorShape(PD_Tensor* tensor, int* shape, int size) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
tensor->tensor.shape.assign(shape, shape + size);
}
const char* PD_GetPaddleTensorName(const PD_Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
return tensor->tensor.name.c_str();
}
PD_DataType PD_GetPaddleTensorDType(const PD_Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
return ConvertToPDDataType(tensor->tensor.dtype);
}
PD_PaddleBuf* PD_GetPaddleTensorData(const PD_Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
PD_PaddleBuf* ret = PD_NewPaddleBuf();
ret->buf = tensor->tensor.data;
return ret;
}
const int* PD_GetPaddleTensorShape(const PD_Tensor* tensor, int* size) {
PADDLE_ENFORCE_NOT_NULL(tensor);
PADDLE_ENFORCE_NOT_NULL(tensor,
paddle::platform::errors::InvalidArgument(
"The pointer of tensor shouldn't be nullptr"));
const std::vector<int>& shape = tensor->tensor.shape;
*size = shape.size();
return shape.data();
......
......@@ -20,8 +20,12 @@
#define LITE_WITH_XPU 1
#endif
#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif
#include "paddle/fluid/inference/lite/engine.h"
#include "lite/api/paddle_use_passes.h"
#include <utility>
namespace paddle {
namespace inference {
......@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return engines_.at(name).get() != nullptr;
}
paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
paddle::lite_api::PaddlePredictor* EngineManager::Get(
const std::string& name) const {
return engines_.at(name).get();
}
paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
if (cfg.valid_places.front().target == TARGET(kCUDA)) {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Env<TARGET(kCUDA)>::Init();
paddle::lite_api::PaddlePredictor* EngineManager::Create(
const std::string& name, const EngineConfig& cfg) {
// config info for predictor.
paddle::lite_api::CxxConfig lite_cxx_config;
lite_cxx_config.set_model_buffer(cfg.model.c_str(), cfg.model.size(),
cfg.param.c_str(), cfg.param.size());
lite_cxx_config.set_valid_places(cfg.valid_places);
#ifdef PADDLE_WITH_ARM
set_threads.set_threads(cfg.cpu_math_library_num_threads);
#else
lite_cxx_config.set_x86_math_library_num_threads(
cfg.cpu_math_library_num_threads);
#endif
} else if (cfg.valid_places.front().target == TARGET(kXPU)) {
#ifdef PADDLE_WITH_XPU
paddle::lite::TargetWrapper<TARGET(kXPU)>::workspace_l3_size_per_thread =
cfg.xpu_l3_workspace_size;
lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size);
#endif
}
auto* p = new paddle::lite::Predictor();
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
return p;
// create predictor
std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
engines_[name] = std::move(p);
return engines_[name].get();
}
void EngineManager::DeleteAll() {
for (auto& item : engines_) {
item.second.reset(nullptr);
item.second.reset();
}
}
......
......@@ -23,12 +23,9 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/api/paddle_use_passes.h"
#pragma GCC diagnostic pop
namespace paddle {
......@@ -38,25 +35,33 @@ namespace lite {
struct EngineConfig {
std::string model;
std::string param;
paddle::lite::Place prefer_place;
std::vector<paddle::lite::Place> valid_places;
std::vector<paddle::lite_api::Place> valid_places;
std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true};
// for xpu
size_t xpu_l3_workspace_size;
// for x86 or arm
int cpu_math_library_num_threads{1};
// for cuda
bool use_multi_stream{false};
};
class EngineManager {
public:
bool Empty() const;
bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name,
paddle::lite_api::PaddlePredictor* Get(const std::string& name) const;
paddle::lite_api::PaddlePredictor* Create(const std::string& name,
const EngineConfig& cfg);
void DeleteAll();
private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>>
std::unordered_map<std::string,
std::shared_ptr<paddle::lite_api::PaddlePredictor>>
engines_;
};
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map>
#include <memory>
#include "paddle/fluid/framework/data_type.h"
......@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
}
}
void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
void* GetLiteTensorDataPtr(paddle::lite_api::Tensor* src,
PrecisionType precision_type,
TargetType target_type) {
void* res{nullptr};
switch (precision_type) {
case PrecisionType::kFloat:
res = static_cast<void*>(src->mutable_data<float>(target_type));
break;
case PrecisionType::kInt8:
res = static_cast<void*>(src->mutable_data<int8_t>(target_type));
break;
case PrecisionType::kInt32:
res = static_cast<void*>(src->mutable_data<int32_t>(target_type));
break;
case PrecisionType::kInt64:
res = static_cast<void*>(src->mutable_data<int64_t>(target_type));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."));
break;
}
return res;
}
int64_t GetLiteTensorNumel(const paddle::lite_api::Tensor& tensor) {
auto shape = tensor.shape();
int64_t numel = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
return numel;
}
void InitDstTensor(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src) {
// Currently, Lite needs to explicitly specify the target type of
// the input tensor.
constexpr int empty_size = 0;
dst->mutable_data(GetLiteTargetType(src.place()), empty_size);
dst->set_precision(GetLitePrecisionType(src.type()));
SetLoD(dst->mutable_lod(), src.lod());
dst->Resize({empty_size});
GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
dst->SetPrecision(GetLitePrecisionType(src.type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod);
}
void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) {
constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32;
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
......@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
}
template <>
void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src,
const platform::DeviceContext& ctx) {
InitDstTensor(dst, src);
const platform::Place& src_place = src.place();
......@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(bytes);
void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << dst->memory_size();
VLOG(3) << "[Lite memory size] Bytes = " << bytes;
}
template <>
void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src,
void TensorCopyAsync(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src,
const platform::DeviceContext& ctx) {
dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize()));
dst->Resize(paddle::framework::make_ddim(src.shape()));
InitDstTensor(dst, src);
const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place();
const size_t bytes =
static_cast<size_t>(src.numel()) * framework::SizeOfType(dst->type());
const void* src_data = src.raw_data();
int64_t src_numel = GetLiteTensorNumel(src);
const size_t bytes = src_numel * framework::SizeOfType(dst->type());
const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << src.memory_size();
VLOG(3) << "[Lite memory size] Bytes = " << bytes;
}
template <>
void TensorDataShare(paddle::lite::Tensor* dst, framework::LoDTensor* src) {
const size_t bytes =
static_cast<size_t>(src->numel()) * framework::SizeOfType(src->type());
auto buf = std::make_shared<paddle::lite::Buffer>(paddle::lite::Buffer(
src->data<void>(), GetLiteTargetType(src->place()), src->memory_size()));
void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
dst->Resize(framework::vectorize(src->dims()));
dst->set_precision(GetLitePrecisionType(src->type()));
SetLoD(dst->mutable_lod(), src->lod());
dst->ResetBuffer(buf, bytes);
dst->ShareExternalMemory(src->data<void>(), src->memory_size(),
GetLiteTargetType(src->place()));
dst->SetPrecision(GetLitePrecisionType(src->type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod);
}
template <>
void TensorDataShare(framework::LoDTensor* dst, paddle::lite::Tensor* src) {
void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32;
void* src_raw_data = src->raw_data();
void* src_raw_data =
GetLiteTensorDataPtr(src, GetLitePrecisionType(dtype), src->target());
size_t memory_size = GetLiteTensorNumel(*src) * sizeof(float);
std::shared_ptr<memory::allocation::Allocation> holder(
new memory::allocation::Allocation(src_raw_data, src->memory_size(),
new memory::allocation::Allocation(src_raw_data, memory_size,
GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->dims().Vectorize()));
dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, dtype);
}
......
......@@ -102,10 +102,10 @@ TEST(EngineManager, engine) {
config.model_from_memory = true;
config.valid_places = {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
};
LOG(INFO) << "Create EngineManager";
......@@ -118,7 +118,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key),
true);
paddle::lite::Predictor* engine_0 =
paddle::lite_api::PaddlePredictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key);
CHECK_NOTNULL(engine_0);
......
......@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC));
}
template <typename T>
void test_lite_tensor_data_ptr(PrecisionType precision_type) {
void* GetLiteTensorDataPtr(paddle::lite_api::Tensor * src,
PrecisionType precision_type,
TargetType target_type);
const int count = 4;
paddle::lite::Tensor lite_tensor;
lite_tensor.Resize({count});
auto* lite_tensor_data = lite_tensor.mutable_data<T>();
for (size_t i = 0; i < count; ++i) {
lite_tensor_data[i] = i;
}
paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
T* data = static_cast<T*>(GetLiteTensorDataPtr(
&lite_api_tensor, precision_type, TargetType::kHost));
for (size_t i = 0; i < count; ++i) {
CHECK_EQ(data[i], static_cast<T>(i)) << "the i-th num is not correct.";
}
}
TEST(LiteEngineOp, GetLiteTensorDataPtr) {
test_lite_tensor_data_ptr<int64_t>(PrecisionType::kInt64);
test_lite_tensor_data_ptr<int32_t>(PrecisionType::kInt32);
test_lite_tensor_data_ptr<int8_t>(PrecisionType::kInt8);
EXPECT_ANY_THROW(test_lite_tensor_data_ptr<double>(PrecisionType::kUnk));
}
void test_tensor_copy(const platform::DeviceContext& ctx) {
// Create LoDTensor.
std::vector<float> vector({1, 2, 3, 4});
......@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod);
// Create lite::Tensor and copy.
paddle::lite::Tensor lite_tensor;
TensorCopyAsync(&lite_tensor, lod_tensor, ctx);
paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorCopyAsync(&lite_api_tensor, lod_tensor, ctx);
// Copy to LoDTensor.
framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx);
TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
platform::GpuStreamSync(
......@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod);
// Create lite::Tensor and share.
paddle::lite::Tensor lite_tensor;
TensorDataShare(&lite_tensor, &lod_tensor);
paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorDataShare(&lite_api_tensor, &lod_tensor);
// Copy to LoDTensor.
framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx);
TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
std::vector<float> result;
TensorToVector(lod_tensor_n, ctx, &result);
ASSERT_EQ(result, vector);
......
......@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter {
itensors.push_back(engine_->GetITensor(input_name));
}
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis > 0,
"The axis attr of Concat op should be large than 0 for trt");
PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument(
"The axis attr of Concat"
" op should be larger than 0 for trt. "
"But received %d.",
axis));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(),
itensors.size());
......
......@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias);
PADDLE_ENFORCE(layer != nullptr);
PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
layer->setStride(nv_strides);
layer->setPadding(nv_paddings);
layer->setNbGroups(groups);
......
......@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(Y_v);
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr;
weight_data =
......@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
......
......@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter {
// NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override {
PADDLE_ENFORCE(out != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
PADDLE_ENFORCE_NOT_NULL(out,
platform::errors::InvalidArgument(
"The input param 'out' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = in.place();
size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size);
PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor in's memory_size shoule be less than or equal to "
"the input max_size. But in's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyHostToDevice, *stream_));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
out, in.data<float>(), size, cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_));
PADDLE_ENFORCE_EQ(
0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else {
PADDLE_THROW("Unknown device for converter");
PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
}
cudaStreamSynchronize(*stream_);
}
// NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override {
PADDLE_ENFORCE(in != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
PADDLE_ENFORCE_NOT_NULL(in,
platform::errors::InvalidArgument(
"The input param 'in' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = out->place();
size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size);
PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor out's memory_size shoule be less than or equal "
"to the input max_size. "
"But out's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_));
cudaMemcpyDeviceToHost, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToHost) error."));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_));
PADDLE_ENFORCE_EQ(
0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else {
PADDLE_THROW("Unknown device for converter");
PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
}
cudaStreamSynchronize(*stream_);
}
......
......@@ -44,10 +44,14 @@ class EngineIOConverter {
static void ConvertInput(const std::string& op_type, const LoDTensor& in,
void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in is not supported yet.", op_type.c_str()));
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
......@@ -55,10 +59,14 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size,
cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in not supported yet.", op_type.c_str()));
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
......
......@@ -53,7 +53,12 @@ class OpConverter {
OpConverter* it{nullptr};
if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
it = Registry<OpConverter>::Global().Lookup("fc");
......@@ -66,38 +71,51 @@ class OpConverter {
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"};
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
int op_type_len = op_desc.Type().size();
std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len);
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type);
PADDLE_ENFORCE_GT(
add_weight_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented(
"no OpConverter for optype [%s]", op_desc.Type()));
} else {
PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type);
PADDLE_ENFORCE_GT(
add_tensor_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor");
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine);
(*it)(op, scope, test_mode);
......@@ -149,9 +167,13 @@ class OpConverter {
for (auto& input : inputs) {
if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE(var, "no variable called %s", input);
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
"TensorRT engine only takes LoDTensor as input");
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("no variable called %s in block.",
input.c_str()));
PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input"));
auto var_shape = var->GetShape();
if (engine->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
......
......@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("fused_embedding_eltwise_layernorm");
teller_set.insert("multihead_matmul");
teller_set.insert("skip_layernorm");
teller_set.insert("slice");
#endif
}
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册