提交 c6e0ee6d 编写于 作者: J jingqinghe
......@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
# No unit test should exceed 2 minutes.
if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else()
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
endif()
......@@ -748,7 +748,7 @@ function(py_test TARGET_NAME)
endif()
if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else()
# No unit test should exceed 2 minutes in Linux.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
......
......@@ -138,12 +138,17 @@ function(op_library TARGET)
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}")
# [ \t\r\n]* is used for blank characters
string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}")
if (one_register STREQUAL "")
string(REPLACE "_op" "" TARGET "${TARGET}")
else ()
string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
string(REPLACE "," "" TARGET "${TARGET}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}")
endif()
# pybind USE_NO_KERNEL_OP
......
......@@ -102,6 +102,8 @@ if(WITH_MKLDNN)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn)
pass_library(fc_mkldnn_pass inference DIR mkldnn)
pass_library(cpu_quantize_placement_pass base DIR mkldnn)
pass_library(cpu_quantize_pass inference DIR mkldnn)
......@@ -162,4 +164,6 @@ endif()
cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass)
cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass)
cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass)
cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
endif ()
......@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()(
return op;
}
PDNode *patterns::Bfloat16Placement::operator()(
const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>();
if (!bfloat16_enabled_op_types.empty()) {
supported_op_types = bfloat16_enabled_op_types;
}
auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types);
return op;
}
PDNode *patterns::OrphanedBfloat16::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
prev_op->LinksTo({prev_out});
op->LinksFrom({prev_out}).LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::LastBfloat16Ops::operator()() {
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
op->LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::FirstBfloat16Ops::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
auto *op_in = pattern->NewNode(op_in_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
prev_op->LinksTo({op_in});
op->LinksFrom({op_in});
return op;
}
PDNode *patterns::MKLDNNInPlace::operator()() {
const std::unordered_set<std::string> &supported_op_types = {
"abs",
......
......@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase {
PATTERN_DECL_NODE(op);
};
struct Bfloat16Placement : public PatternBase {
Bfloat16Placement(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "bfloat16_placement") {}
PDNode* operator()(
const std::unordered_set<std::string>& bfloat16_enabled_op_types);
PATTERN_DECL_NODE(op);
};
struct OrphanedBfloat16 : public PatternBase {
OrphanedBfloat16(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "orphaned_bfloat16") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(prev_out);
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct LastBfloat16Ops : public PatternBase {
LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "last_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct FirstBfloat16Ops : public PatternBase {
FirstBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "first_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(op_in);
PATTERN_DECL_NODE(op);
};
// Pattern used for enforcing inplace computation for in-place computation
// supporting DNNL ops. softmax, batch_norm and layer_norm
struct MKLDNNInPlace : public PatternBase {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void UnlinkNodes(ir::Node* a, ir::Node* b) {
a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
a->outputs.end());
b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
b->inputs.end());
}
void CPUBFloat16Pass::SetInputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::FirstBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"first_bfloat16_ops"};
bfloat16_ops();
int quantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_in, op_in, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
if (op->Op()->Type() != "conv2d" && prev_op->Op()->Type() != "quantize") {
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
// create a quantize op node
OpDesc q_desc;
q_desc.SetType("quantize");
q_desc.SetInput("Input", std::vector<std::string>({op_in->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("Scale", 1.f);
q_desc.SetAttr("bfloat16", true);
q_desc.SetAttr("output_format", Has("data_layout")
? Get<std::string>("data_layout")
: "NCHW");
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.
std::string op_input_name;
for (auto name : op->Op()->InputNames()) {
for (auto input_name : op->Op()->Input(name)) {
if (input_name == op_in->Name()) op_input_name = name;
}
}
PADDLE_ENFORCE_NE(
op_input_name.empty(), true,
platform::errors::NotFound(
"Operator before operator should have input as op output"));
op->Op()->SetInput(op_input_name,
std::vector<std::string>({quantize_out_node->Name()}));
UnlinkNodes(op_in, op);
IR_NODE_LINK_TO(op_in, quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
IR_NODE_LINK_TO(quantize_out_node, op);
quantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d quantize op before bfloat16 op",
quantize_counter);
}
void CPUBFloat16Pass::SetOutputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::LastBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"last_bfloat16_ops"};
bfloat16_ops();
int force_fp32_counter = 0, dequantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_out, op_out, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, bfloat16_ops);
if ((op->Op()->HasAttr("force_fp32_output") ||
op->Op()->HasProtoAttr("force_fp32_output")) &&
!op->Op()->GetAttrIfExists<bool>("fuse_residual_connection")) {
op->Op()->SetAttr("force_fp32_output", true);
force_fp32_counter++;
} else if (op->Op()->Type() != "prior_box") {
// Create dequantize input variable
VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
// create a dequantize op node for output.
OpDesc deq_desc;
deq_desc.SetType("dequantize");
deq_desc.SetInput("Input",
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetOutput("Output", std::vector<std::string>({op_out->Name()}));
deq_desc.SetAttr("Scale", 1.0f);
auto dequantize_op = g->CreateOpNode(&deq_desc);
std::string op_output_name;
for (auto name : op->Op()->OutputNames()) {
for (auto output_name : op->Op()->Output(name)) {
if (output_name == op_out->Name()) op_output_name = name;
}
}
PADDLE_ENFORCE_NE(
op_output_name.empty(), true,
platform::errors::NotFound(
"Operator after operator should have input as op output"));
op->Op()->SetOutput(op_output_name, std::vector<std::string>(
{dequantize_in_node->Name()}));
UnlinkNodes(op, op_out);
IR_NODE_LINK_TO(op, dequantize_in_node);
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
IR_NODE_LINK_TO(dequantize_op, op_out);
dequantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d dequantize op and used %d force_fp32_output",
dequantize_counter, force_fp32_counter);
}
void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const {
SetInputDataType(graph);
SetOutputDataType(graph);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class CPUBFloat16Pass : public Pass {
protected:
void SetInputDataType(ir::Graph* graph) const;
void SetOutputDataType(ir::Graph* graph) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, bool use_mkldnn,
const std::string& mkldnn_data_type = "float32",
const bool force_fp32_output = false) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("name", name);
if (type == "conv2d") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("force_fp32_output", force_fp32_output);
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "dropout") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "fc") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "concat") {
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "matmul" || type == "elementwise_add") {
op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
}
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
graph->reset(pass->Apply(graph->release()));
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
ProgramDesc BuildProgramDesc(bool use_mkldnn) {
ProgramDesc prog;
for (auto& v : variable_names) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dropout", "Dropout1", {"z"}, {"a"}, use_mkldnn, "float32");
SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn, "bfloat16");
SetOp(&prog, "pool2d", "Pool1", {"b"}, {"c"}, use_mkldnn, "bfloat16");
SetOp(&prog, "conv2d", "Conv1", {"c"}, {"d"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout2", {"d"}, {"e"}, use_mkldnn, "float32");
SetOp(&prog, "transpose2", "Transpose1", {"e"}, {"f"}, use_mkldnn,
"bfloat16");
SetOp(&prog, "reshape2", "Reshape1", {"f"}, {"g"}, use_mkldnn, "bfloat16");
SetOp(&prog, "concat", "Concat1", {"g"}, {"h"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout3", {"h"}, {"i"}, use_mkldnn, "float32");
return prog;
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int transpose_count, int quant_count, int dequant_count,
int added_nodes_count) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names, &original_nodes_num,
&current_nodes_num);
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int conv2d_nodes_count = 0;
int pool2d_nodes_count = 0;
int transpose2_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "conv2d") {
conv2d_nodes_count++;
} else if (op->Type() == "pool2d") {
pool2d_nodes_count++;
} else if (op->Type() == "transpose2") {
transpose2_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(conv2d_nodes_count, conv_count);
EXPECT_EQ(pool2d_nodes_count, pool_count);
EXPECT_EQ(transpose2_nodes_count, transpose_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, quantize) {
bool use_mkldnn = true;
// 1 quantize + 1 dequantize
int added_nodes = 2;
MainTest(BuildProgramDesc(use_mkldnn), 2, 1, 1, 1, 2, added_nodes);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void CPUBfloat16PlacementPass::SetMkldnnDataType(
ir::Graph* graph, int* bfloat16_operators) const {
const auto& op_types_list =
Get<std::unordered_set<std::string>>("bfloat16_enabled_op_types");
// set mkldnn_data_type to bfloat16 to all operators that are in
// bfloat16_enabled_op_types vector or they are included to Bfloat16Placement
// pattern
GraphPatternDetector gpd;
patterns::Bfloat16Placement bfloat16_placement_pattern{gpd.mutable_pattern(),
"bfloat16_placement"};
bfloat16_placement_pattern(op_types_list);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_placement_pattern);
if ((op->Op()->HasAttr("mkldnn_data_type") ||
op->Op()->HasProtoAttr("mkldnn_data_type")) &&
!platform::HasOpINT8DataType(op->Op())) {
op->Op()->SetAttr("mkldnn_data_type", std::string("bfloat16"));
(*bfloat16_operators)++;
}
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::RemoveOrhanedOperators(
ir::Graph* graph, int* bfloat16_operators) const {
// find orphaned bfloat16 operator that is between two float32 operators
// revert mkldnn_data_type attr to float32
GraphPatternDetector gpd;
patterns::OrphanedBfloat16 orphaned_bfloat16_pattern{gpd.mutable_pattern(),
"orphaned_bfloat16"};
orphaned_bfloat16_pattern();
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, orphaned_bfloat16_pattern);
op->Op()->SetAttr("mkldnn_data_type", std::string("float32"));
bfloat16_operators--;
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const {
int bfloat16_operators = 0;
SetMkldnnDataType(graph, &bfloat16_operators);
RemoveOrhanedOperators(graph, &bfloat16_operators);
PrettyLogDetail("--- marked %d operators to bfloat16 ",
bfloat16_operators);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_placement_pass,
paddle::framework::ir::CPUBfloat16PlacementPass)
// a vector of operator type names with bfloat16 support ("conv2d" etc.)
// the second param is the default value for this vector
.DefaultPassAttr("bfloat16_enabled_op_types",
new std::unordered_set<std::string>());
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* Specifies which operators should be run on bfloat16.
*/
class CPUBfloat16PlacementPass : public Pass {
protected:
void SetMkldnnDataType(ir::Graph* graph, int* bfloat16_operators) const;
void RemoveOrhanedOperators(ir::Graph* graph, int* bfloat16_operators) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator mkldnn_data_type
// ---------------------------------------
// (a,b)->concat->c float32
// c->conv->f float32
// f->relu->g float32
// g->pool->h float32
// h->conv->k float32
// k->pool->l float32
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "f", "g", "h", "k", "l"})) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"});
SetOp(&prog, "conv2d", "conv1", {"c"}, {"f"});
SetOp(&prog, "relu", "relu1", {"f"}, {"g"});
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"});
SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"});
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"});
return prog;
}
void MainTest(std::initializer_list<std::string> bfloat16_enabled_op_types,
unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
pass->Set("bfloat16_enabled_op_types",
new std::unordered_set<std::string>(bfloat16_enabled_op_types));
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
TEST(Bfloat16PlacementPass, enable_all) {
MainTest({"conv2d", "pool2d", "relu", "concat"}, 6);
}
TEST(Bfloat16PlacementPass, enabled_conv_and_pool) {
// 2 conv2d + 2 pool2 - 1 orphaned conv2d
MainTest({"conv2d", "pool2d"}, 3);
}
TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); }
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_placement_pass);
......@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
......@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS(transpose_flatten_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass);
REGISTER_PASS_CAPABILITY(transpose_flatten_concat_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("transpose", 0)
.EQ("transpose2", 0)
.EQ("flatten", 0)
.EQ("concat", 0)
.EQ("fusion_transpose_flatten_concat", 0));
......@@ -69,7 +69,8 @@ class OpInfo {
const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator's Creator has not been registered");
platform::errors::NotFound(
"Operator's Creator has not been registered."));
return creator_;
}
......@@ -79,11 +80,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL(
grad_op_maker_,
platform::errors::NotFound(
"Operator %s's GradOpMaker has not been "
"registered.\nPlease check whether %s_op has "
"grad_op.\nIf not, please set stop_gradient to True "
"registered.\nPlease check whether (%s) operator has "
"gradient operator.\nIf not, please set stop_gradient to be True "
"for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str());
type.c_str(), type.c_str()));
return grad_op_maker_;
}
......@@ -100,11 +102,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL(
dygraph_grad_op_maker_,
platform::errors::NotFound(
"Operator %s's DygraphGradOpMaker has not been "
"registered.\nPlease check whether %s_op has "
"grad_op.\nIf not, please set stop_gradient to True "
"registered.\nPlease check whether (%s) operator has "
"gradient operator.\nIf not, please set stop_gradient to be True "
"for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str());
type.c_str(), type.c_str()));
return dygraph_grad_op_maker_;
}
......@@ -130,14 +133,17 @@ class OpInfoMap {
}
void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
PADDLE_ENFORCE_NE(Has(type), true,
platform::errors::AlreadyExists(
"Operator (%s) has been registered.", type));
map_.insert({type, info});
}
const OpInfo& Get(const std::string& type) const {
auto op_info_ptr = GetNullable(type);
PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered",
type);
PADDLE_ENFORCE_NOT_NULL(
op_info_ptr,
platform::errors::NotFound("Operator (%s) is not registered.", type));
return *op_info_ptr;
}
......
......@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const {
cur_loc += OpKernelType::kLibBits;
int customized_value = key.customized_type_value_;
PADDLE_ENFORCE(customized_value < (1 << OpKernelType::kCustomizeBits));
PADDLE_ENFORCE_LT(customized_value, (1 << OpKernelType::kCustomizeBits),
platform::errors::Unavailable(
"Too many custom OpKernel attribute values, expected "
"maximum value is %d, received value is %d.",
(1 << OpKernelType::kCustomizeBits), customized_value));
customized_value = customized_value << cur_loc;
cur_loc += OpKernelType::kCustomizeBits;
PADDLE_ENFORCE(cur_loc < 64);
PADDLE_ENFORCE_LT(cur_loc, 64,
platform::errors::Unavailable(
"Too many OpKernel attribute values, expected maximum "
"value is 64, received value is %d.",
cur_loc));
std::hash<int> hasher;
return hasher(place + data_type + data_layout + library_type +
......
......@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name);
PADDLE_ENFORCE_EQ(
names.count(name), 0,
platform::errors::AlreadyExists("Attribute [%s] is duplicated.", name));
names.insert(name);
};
for (auto& attr : proto_->attrs()) {
......
......@@ -54,9 +54,10 @@ class Registrar {
template <typename... ARGS>
struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) {
if (OpInfoMap::Instance().Has(op_type)) {
PADDLE_THROW("'%s' is registered more than once.", op_type);
}
PADDLE_ENFORCE_EQ(
OpInfoMap::Instance().Has(op_type), false,
platform::errors::AlreadyExists(
"Operator '%s' is registered more than once.", op_type));
static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass");
OpInfo info;
......
......@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
AddInput("input", "input of cosine op").AsDuplicable();
AddOutput("output", "output of cosine op").AsIntermediate();
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument(
"'test_attr' must be even!"));
};
AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker);
......
......@@ -152,10 +152,10 @@ class OpVersionRegistrar {
return instance;
}
OpVersion& Register(const std::string& op_type) {
if (op_version_map_.find(op_type) != op_version_map_.end()) {
PADDLE_THROW("'%s' is registered in operator version more than once.",
op_type);
}
PADDLE_ENFORCE_EQ(
op_version_map_.find(op_type), op_version_map_.end(),
platform::errors::AlreadyExists(
"'%s' is registered in operator version more than once.", op_type));
op_version_map_.insert({op_type, OpVersion()});
return op_version_map_[op_type];
}
......
此差异已折叠。
......@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) {
EXPECT_TRUE(
ex_msg.find(
"The Input Variable(Other) of "
"indicate_other_data_type_test Op used to "
"(indicate_other_data_type_test) Operator used to "
"determine kernel data type "
"is empty or not LoDTensor or SelectedRows or LoDTensorArray") !=
"is empty or not LoDTensor or SelectedRows or LoDTensorArray.") !=
std::string::npos);
}
ASSERT_TRUE(caught);
......
......@@ -20,7 +20,10 @@ namespace framework {
void ReaderBase::ReadNext(std::vector<LoDTensor> *out) {
std::lock_guard<std::mutex> lock(mu_);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning,
platform::errors::Unavailable(
"The current reader has stopped running and cannot "
"continue to read the next batch of data."));
ReadNextImpl(out);
}
......
......@@ -32,17 +32,21 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
"acquire read lock failed");
PADDLE_ENFORCE_EQ(
pthread_rwlock_rdlock(&lock_), 0,
platform::errors::External("The pthread failed to acquire read lock."));
}
inline void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed");
platform::errors::External(
"The pthread failed to acquire write lock."));
}
inline void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
PADDLE_ENFORCE_EQ(
pthread_rwlock_unlock(&lock_), 0,
platform::errors::External("The pthread failed to unlock."));
}
private:
......
......@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) {
VLOG(5) << "Can't read [" << length << "] from file"
<< "file seems breakem";
PADDLE_THROW("Model load error, file seems breaken");
PADDLE_THROW(platform::errors::Unavailable(
"Model load failed, istream state error."));
}
}
......@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) {
sizeof(char) * tensor_number_mark.size());
std::string str_read_tensor_number_mark(tensor_number_mark_buffer,
tensor_number_mark.size());
PADDLE_ENFORCE_EQ(
tensor_number_mark, str_read_tensor_number_mark,
"Tensor number mark not match, expect [%s], but read from file is [%]",
tensor_number_mark, str_read_tensor_number_mark);
PADDLE_ENFORCE_EQ(tensor_number_mark, str_read_tensor_number_mark,
platform::errors::InvalidArgument(
"Tensor number mark does not match, expect mark is "
"[%s], but the mark read from file is [%s].",
tensor_number_mark, str_read_tensor_number_mark));
size_t tensor_number = 0;
istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number));
......@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) {
std::string str_read_tensor_name_mark(name_mark_buffer,
tensor_name_mark.size());
PADDLE_ENFORCE_EQ(
tensor_name_mark, str_read_tensor_name_mark,
"Tensor name mark not match, expect [%s], but read from file is [%]",
tensor_name_mark, str_read_tensor_name_mark);
PADDLE_ENFORCE_EQ(tensor_name_mark, str_read_tensor_name_mark,
platform::errors::InvalidArgument(
"Tensor name mark does not match, expect mark is [%s], "
"but the mark read from file is [%s].",
tensor_name_mark, str_read_tensor_name_mark));
size_t tensor_name_length = 0;
istre.read(reinterpret_cast<char*>(&tensor_name_length),
......@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Variable find error, when save model, can't not find vairable [%s], "
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
var_ptr, platform::errors::NotFound("Variable (%s) is not found when "
"saving model, please make sure "
"that exe.run(startup_program) has "
"been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, please make sure "
"that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
map_tensor[vec_tensor_name_list[i]] = tensor;
}
......@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk(
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_var_base_list[i]->Name());
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, please make sure "
"that exe.run(startup_program) has been executed.",
vec_var_base_list[i]->Name()));
map_tensor[vec_var_base_list[i]->Name()] = tensor;
}
......@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto it = map_load_tensor.find(vec_tensor_name_list[i]);
PADDLE_ENFORCE(it != map_load_tensor.end(),
"Paramete not found in Model file, "
"Can not find [%s] in model file [%s]",
vec_tensor_name_list[i], file_name);
PADDLE_ENFORCE_NE(it, map_load_tensor.end(),
platform::errors::NotFound(
"Parameter (%s) not found in model file (%s).",
vec_tensor_name_list[i], file_name));
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Parameter not created, when load model, can't not find parameter [%s] "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
var_ptr,
platform::errors::PreconditionNotMet(
"Parameter (%s) is not created when loading model, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NE(tensor, nullptr,
"Paramter [%s] not initialzed "
"please make sure you have run startUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
tensor,
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has "
"been executed.v",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(
tensor->dims(), it->second->dims(),
"Shape not matching: the Program requires a parameter with a shape of "
"(%s), "
"while the loaded parameter (namely [ %s ]) has a shape of (%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims());
platform::errors::InvalidArgument(
"Shape does not match, the program requires a parameter with a "
"shape of "
"(%s), while the loaded parameter (namely [ %s ]) has a shape of "
"(%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims()));
TensorCopySync(*(it->second.get()), tensor->place(), tensor);
......@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name,
MkDirRecursively(DirName(file_name).c_str());
std::ofstream fout(file_name, std::ios::binary);
if (!fout) {
PADDLE_THROW("File open error. Can not open file [%s]", file_name);
}
PADDLE_ENFORCE_EQ(
fout.is_open(), true,
platform::errors::Unavailable("File (%s) open failed.", file_name));
// first 256 byte for reserve for fulture upgrade
char* kReserveBuffer = new char[model_file_reserve_size];
......@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name,
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
data_ptr = temp.data<void>();
#else
PADDLE_THROW(
"Tensor is in CUDA device, but paddle not compile with CUDA, this "
"should not happen");
PADDLE_THROW(platform::errors::Unavailable(
"Tensor is in CUDA device, but paddle not compiled with CUDA."));
#endif
}
fout.write(static_cast<const char*>(data_ptr),
......@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name,
}
if (!fout) {
PADDLE_THROW("Model save failed, data write to model file [%s] error",
file_name);
PADDLE_THROW(platform::errors::Unavailable(
"Model save failed, error when writing data into model file [%s].",
file_name));
}
fout.close();
......@@ -316,9 +329,9 @@ bool LoadTensorFromDisk(
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) {
std::ifstream fin(file_name, std::ios::binary);
if (!fin) {
PADDLE_THROW("File open error. Can not open model file [%s]", file_name);
}
PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::Unavailable("File (%s) open failed.", file_name));
ReadReserveBuffer(fin);
......@@ -331,7 +344,8 @@ bool LoadTensorFromDisk(
uint32_t version;
fin.read(reinterpret_cast<char*>(&version), sizeof(version));
CheckInStreamState(fin, sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument(
"Only version 0 tensor is supported."));
proto::VarType::TensorDesc desc;
{
// int32_t size
......@@ -344,7 +358,7 @@ bool LoadTensorFromDisk(
CheckInStreamState(fin, sizeof(size));
PADDLE_ENFORCE_EQ(
desc.ParseFromArray(buf.get(), size), true,
platform::errors::InvalidArgument("Cannot parse tensor desc"));
platform::errors::InvalidArgument("Parse tensor desc failed."));
}
{ // read tensor
......
......@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
// the 1st field, unit32_t version for SelectedRows
uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
PADDLE_ENFORCE_EQ(version, 0U,
platform::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
}
{
// the 2st field, rows information
......@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
rwlock_->UNLock();
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
}
PADDLE_ENFORCE_EQ(
auto_grown, true,
platform::errors::NotFound("Input key(%lld) is not found.", key));
rwlock_->WRLock();
auto map_size = id_to_index_.size();
auto vector_size = rows_.size();
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
map_size, vector_size);
PADDLE_THROW(platform::errors::InvalidArgument(
"Row map size(%zu) should be equal to rows size(%zu).", map_size,
vector_size));
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
int row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
PADDLE_THROW(platform::errors::InvalidArgument(
"Selected rows is full, then length exceed the length of first "
"dimension (%d).",
row_num));
}
// key logic to put a key into id_to_index_
rows_.push_back(key);
......@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() {
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown, bool is_test) {
PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized.");
PADDLE_ENFORCE_EQ(value->IsInitialized(), true,
platform::errors::InvalidArgument(
"The value tensor is not initialized."));
if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!";
} else {
int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
"output tensor should have the same shape with table "
"except the dims[0].");
PADDLE_ENFORCE_EQ(
value_width, value->numel() / value->dims()[0],
platform::errors::InvalidArgument(
"Output tensor should have the same shape with table "
"except the first dimmension, excepted value width not counting "
"the first dimension is %d, actual value width is %d.",
value_width, value->numel() / value->dims()[0]));
for (int i = 0; i < ids.numel(); ++i) {
auto id = ids.data<int64_t>()[i];
int64_t index = AutoGrownIndex(id, auto_grown, is_test);
......
......@@ -82,7 +82,8 @@ class SelectedRows {
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key);
PADDLE_THROW(platform::errors::NotFound(
"Input id (%lld) is not in current rows table.", key));
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
......
......@@ -25,20 +25,22 @@ namespace framework {
std::vector<DDim> InferShapeContext::GetReaderDims(
const std::string &name) const {
const std::vector<std::string> &arg_names = Inputs(name);
PADDLE_ENFORCE_EQ(
arg_names.size(), 1UL,
"Reader input '%s' should hold one element, but now it holds %d", name,
arg_names.size());
PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
platform::errors::InvalidArgument(
"Reader input '%s' should hold one element, but now it "
"holds %d elements.",
name, arg_names.size()));
return this->GetRepeatedDims(arg_names[0]);
}
void InferShapeContext::SetReaderDims(const std::string &name,
const std::vector<DDim> &dims) {
const std::vector<std::string> &arg_names = Outputs(name);
PADDLE_ENFORCE_EQ(
arg_names.size(), 1UL,
"Reader output '%s' should hold one element, but now it holds %d", name,
arg_names.size());
PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
platform::errors::InvalidArgument(
"Reader output '%s' should hold one element, but now "
"it holds %d elements.",
name, arg_names.size()));
return this->SetRepeatedDims(arg_names[0], dims);
}
......
......@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Source place and context place do not match, source "
"place is %s, context place is %s.",
src_gpu_place, ctx_gpu_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
......@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Destination place and context place do not match, "
"destination place is %s, context place is %s.",
dst_gpu_place, ctx_gpu_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
......@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
......@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
PADDLE_THROW(platform::errors::Unavailable(
"Context place dose not match the source and destination place."));
}
}
}
else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
PADDLE_THROW(platform::errors::Unimplemented(
"Copying from %s to %s is not supported.", src_place, dst_place));
}
#endif
}
......@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
nullptr);
}
else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
}
......@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
const platform::Place& dst_place) {
// vector types not currently supported
PADDLE_ENFORCE_LE(type.lanes, 1, "vector types not currently supported");
PADDLE_ENFORCE_LE(type.lanes, 1,
platform::errors::Unimplemented(
"Vector type is not supported currently."));
switch (type.bits) {
case 8:
......@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
return static_cast<void*>(dst->mutable_data<int8_t>(dst_place));
if (type.code == kDLUInt)
return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 16:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int16_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(
dst->mutable_data<paddle::platform::float16>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 32:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int32_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<float>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 64:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int64_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<double>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
default:
PADDLE_THROW("Unsupport type.bits %d", type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported DLDataType.bits %d.", type.bits));
}
}
......
......@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true);
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(src.place()), true,
platform::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
src.place()));
memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);
......
......@@ -27,8 +27,9 @@ Analyzer::Analyzer() {}
void Analyzer::Run(Argument *argument) { RunAnalysis(argument); }
void Analyzer::RunAnalysis(Argument *argument) {
PADDLE_ENFORCE(argument->analysis_passes_valid(),
"analsis_passes is not valid in the argument.");
PADDLE_ENFORCE_EQ(argument->analysis_passes_valid(), true,
platform::errors::InvalidArgument(
"analsis_passes is not valid in the argument."));
const bool disable_logs = argument->disable_logs();
for (auto &pass : argument->analysis_passes()) {
if (!disable_logs) {
......@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) {
continue;
auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass);
PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet(
"no analysis pass called %s", pass));
ptr->Run(argument);
}
}
......
......@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) {
std::vector<PaddleTensor> outputs;
CHECK(predictor->Run(slots, &outputs));
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
PADDLE_ENFORCE_EQ(outputs.size(), 1UL,
platform::errors::PreconditionNotMet(
"Output size should be 1, but got %d", outputs.size()));
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL);
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL,
platform::errors::PreconditionNotMet(
"Output's data length should be 33168 but got %d",
outputs.front().data.length()));
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
......
......@@ -79,7 +79,9 @@ struct Argument {
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE(Has(#field__), "There is no such field"); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return field__##_; \
} \
void Set##Field(const type__& x) { \
......@@ -98,8 +100,11 @@ struct Argument {
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE_NOT_NULL(field__##_); \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \
"filed should not be null.")); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return *static_cast<type__*>(field__##_.get()); \
} \
void Set##Field(type__* x) { \
......@@ -113,11 +118,15 @@ struct Argument {
} \
DECL_ARGUMENT_FIELD_VALID(field__); \
type__* field__##_ptr() { \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.get()); \
} \
type__* Release##Field() { \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
......@@ -227,8 +236,10 @@ struct Argument {
};
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
PADDLE_ENFORCE(argument__->Has(#fieldname__), \
"the argument field [%s] should be set", #fieldname__);
PADDLE_ENFORCE_EQ( \
argument__->Has(#fieldname__), true, \
platform::errors::PreconditionNotMet( \
"the argument field [%s] should be set", #fieldname__));
} // namespace analysis
} // namespace inference
......
......@@ -73,12 +73,15 @@ struct DataTypeNamer {
template <typename T>
const std::string &repr() const {
auto x = std::type_index(typeid(T));
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation");
PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(x);
}
const std::string &repr(const std::type_index &type) const { // NOLINT
PADDLE_ENFORCE(dic_.count(type), "unknown type for representation");
PADDLE_ENFORCE_GT(dic_.count(type), 0,
platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(type);
}
......@@ -116,7 +119,9 @@ template <typename T>
class OrderedRegistry {
public:
T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name);
PADDLE_ENFORCE_EQ(dic_.count(name), 0,
platform::errors::PreconditionNotMet(
"There exists duplicate key [%s]", name));
dic_[name] = elements_.size();
elements_.emplace_back(std::unique_ptr<T>(x));
return elements_.back().get();
......@@ -136,14 +141,20 @@ class OrderedRegistry {
template <typename T>
T &GetFromScope(const framework::Scope &scope, const std::string &name) {
framework::Variable *var = scope.FindVar(name);
PADDLE_ENFORCE(var != nullptr);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"The var which name is %s should not be nullptr.", name));
return *var->GetMutable<T>();
}
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(fin.is_open(), "Cannot open file %s", model_path);
PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
......@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) {
static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) {
std::string opt_cache_dir = model_root + "/_opt_cache/";
if (!PathExists(opt_cache_dir)) {
PADDLE_ENFORCE(MKDIR(opt_cache_dir.c_str()) != -1,
PADDLE_ENFORCE_NE(
MKDIR(opt_cache_dir.c_str()), -1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
opt_cache_dir);
opt_cache_dir));
}
return opt_cache_dir;
}
......
......@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) {
graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
if (argument->Has("scope")) {
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
......@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument,
std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid,
PADDLE_ENFORCE_EQ(
int8_valid, true,
platform::errors::PreconditionNotMet(
"When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
"config.SetOptimCacheDir()");
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
"config.SetOptimCacheDir()"));
PADDLE_ENFORCE_EQ(
!(model_from_memory && use_static_engine), true,
platform::errors::PreconditionNotMet(
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false.");
"from memory, you should set the use_static to false."));
if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
......
......@@ -123,7 +123,9 @@ void RenameAndGetOutputs(
auto add_block_var = [&](const std::string &graph_arg,
const std::string &block_arg) {
auto arg_var_node = graph_var_map.find(graph_arg);
PADDLE_ENFORCE(arg_var_node != graph_var_map.end());
PADDLE_ENFORCE_NE(arg_var_node, graph_var_map.end(),
platform::errors::InvalidArgument(
"Can not find %s in graph_var_map", graph_arg));
auto *var_t = block_desc->Var(block_arg);
var_t->SetShape(arg_var_node->second->Var()->GetShape());
var_t->SetDataType(arg_var_node->second->Var()->GetDataType());
......@@ -133,7 +135,10 @@ void RenameAndGetOutputs(
framework::proto::OpDesc *op = block_desc->Op(index)->Proto();
framework::OpDesc op_desc(*op, nullptr);
auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type());
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type(),
platform::errors::PreconditionNotMet(
"We should get %s, but get %s", op->type(),
correspond_node->Name()));
std::unordered_map<std::string, size_t> var2id;
std::unordered_map<std::string, framework::ir::Node *> in_vars;
......
......@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::vector<std::string> *repetitive_params) const {
auto *op_desc = node->Op();
auto &subgraph = *framework::ir::Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty());
PADDLE_ENFORCE_EQ(subgraph.empty(), false,
platform::errors::PreconditionNotMet(
"The subgraph should not be empty."));
framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program");
......@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// to Tensor.
std::vector<std::string> output_mapping;
for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0);
PADDLE_ENFORCE_NE(output_name_map.count(name), 0,
platform::errors::PreconditionNotMet(
"The output_name_map should have %s", name));
output_mapping.push_back(output_name_map[name]);
}
PADDLE_ENFORCE(!output_mapping.empty());
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
"the block has no var-desc");
PADDLE_ENFORCE_EQ(output_mapping.empty(), false,
platform::errors::PreconditionNotMet(
"The output_mapping should not be empty."));
PADDLE_ENFORCE_EQ(
!block_desc.Proto()->vars().empty(), true,
platform::errors::PreconditionNotMet("the block has no var-desc"));
// Set attrs
op_desc->SetType("tensorrt_engine");
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include <memory>
#include <utility>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
......@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
// Apply passes.
IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph));
PADDLE_ENFORCE_GT(graph->Nodes().size(), 0);
PADDLE_ENFORCE_GT(
graph->Nodes().size(), 0,
platform::errors::PreconditionNotMet(
"The graph nodes size should be greater than 0, but got 0"));
argument->SetMainGraph(graph.release());
CollectFusionStatis(argument);
}
......
......@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if (!argument->scope_valid()) {
argument->SetScope(new framework::Scope);
}
PADDLE_ENFORCE(argument->use_gpu_valid());
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
// The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying
......@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument->model_from_memory_valid() && argument->model_from_memory());
argument->SetMainProgram(program.release());
} else {
PADDLE_THROW(
"either model_dir or (program path and parameter path) should be set.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"either model_dir or (program path and parameter path) should be "
"set."));
}
auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program()));
argument->SetMainGraph(graph.release());
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
......
......@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) {
std::unordered_set<const framework::ir::Node*> invalid_nodes;
int valid_op = 0;
for (auto* node : graph.Nodes()) {
PADDLE_ENFORCE_NOT_NULL(node);
PADDLE_ENFORCE_NOT_NULL(node, platform::errors::PreconditionNotMet(
"The node should not be nullptr."));
if (is_valid_node(node)) {
invalid_nodes.insert(node);
} else if (node->IsOp()) {
......
......@@ -23,8 +23,12 @@ namespace inference {
namespace analysis {
void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE(argument->scope_valid());
PADDLE_ENFORCE(argument->use_gpu_valid());
PADDLE_ENFORCE_EQ(
argument->scope_valid(), true,
platform::errors::PreconditionNotMet("The scope field should be valid"));
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
platform::Place place;
......@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
LOG(INFO) << "Sync params from CPU to GPU";
PADDLE_ENFORCE(argument->gpu_device_id_valid());
PADDLE_ENFORCE_EQ(argument->gpu_device_id_valid(), true,
platform::errors::PreconditionNotMet(
"The gpu_device_id field should be valid"));
place = platform::CUDAPlace(argument->gpu_device_id());
auto *scope = argument->scope_ptr();
......@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
continue;
}
auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE(var != nullptr);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"The var should not be nullptr"));
if (var->IsType<framework::LoDTensor>() ||
var->IsType<framework::Tensor>()) {
auto *t = var->GetMutable<framework::LoDTensor>();
......
......@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse(
// modify the graph
for (auto input_node : node->inputs) {
PADDLE_ENFORCE(input_node->IsVar());
PADDLE_ENFORCE_EQ(input_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The input node should be a variable."));
std::string input_node_name = input_node->Name();
if (reuse_table.count(input_node_name) &&
reuse_table.at(input_node_name) != input_node_name) {
......@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse(
// modify the graph
for (auto out_node : node->outputs) {
PADDLE_ENFORCE(out_node->IsVar());
PADDLE_ENFORCE_EQ(out_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The output node should be a variable."));
std::string out_node_name = out_node->Name();
if (reuse_table.count(out_node_name) &&
reuse_table.at(out_node_name) != out_node_name) {
......
......@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet.");
platform::errors::PreconditionNotMet(
"MkldnnQuantizer was not enabled yet."));
return mkldnn_quantizer_config_.get();
}
......
......@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope(
if (parent_scope) {
PADDLE_ENFORCE_NOT_NULL(
parent_scope,
"Both program and parent_scope should be set in Clone mode.");
platform::errors::PreconditionNotMet(
"Both program and parent_scope should be set in Clone mode."));
scope_ = parent_scope;
status_is_cloned_ = true;
} else {
......@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare(sub_scope_, *inference_program_, 0,
config_.use_feed_fetch_ops_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
return true;
}
......@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
timer.tic();
// set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(scope, "The scope should not be nullptr.");
PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
......@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
......@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() {
if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir());
} else {
PADDLE_ENFORCE(
!config_.params_file().empty(),
"Either model_dir or (param_file, prog_file) should be set.");
PADDLE_ENFORCE(!config_.prog_file().empty());
PADDLE_ENFORCE_EQ(config_.params_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or param_file should be set."));
PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or prog_file should be set."));
std::string dir = inference::analysis::GetDirRoot(config_.prog_file());
argument_.SetModelProgramPath(config_.prog_file());
......@@ -503,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
PrepareArgument();
Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid());
PADDLE_ENFORCE_EQ(
argument_.scope_valid(), true,
platform::errors::InvalidArgument("The argument scope should be valid."));
VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
inference_program_.reset(
......@@ -525,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
FLAGS_minloglevel = 2; // GLOG_ERROR
}
VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE(config.is_valid(),
"Note: Each config can only be used for one predictor.");
PADDLE_ENFORCE_EQ(
config.is_valid(), true,
platform::errors::InvalidArgument(
"Note: Each config can only be used for one predictor."));
if (config.use_gpu()) {
static std::once_flag gflags_initialized;
......@@ -623,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() {
}
void AnalysisPredictor::PrepareFeedFetch() {
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::InvalidArgument(
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
......@@ -646,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() {
}
void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope);
PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument(
"The scope should not be nullptr."));
auto *var = scope->Var("feed");
var->GetMutable<framework::FeedList>();
var = scope->Var("fetch");
......@@ -667,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() {
std::vector<std::string> names = GetInputNames();
for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "input %s does not exist.", name);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"Input %s does not exist.", name));
input_shapes[name] = var->GetShape();
}
return input_shapes;
......@@ -683,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"The variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = true;
......@@ -700,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"he variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = false;
......@@ -761,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() {
std::string pb_content;
// Read binary
std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin.is_open()), "Cannot open file %s",
filename);
PADDLE_ENFORCE_EQ(
static_cast<bool>(fin.is_open()), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
filename));
fin.seekg(0, std::ios::end);
pb_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
......@@ -779,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() {
bool AnalysisPredictor::LoadParameters() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
"The inference program should be loaded first.");
platform::errors::PreconditionNotMet(
"The inference program should be loaded first."));
const auto &global_block = inference_program_->MutableBlock(0);
......@@ -855,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() {
#if PADDLE_WITH_TENSORRT
bool AnalysisPredictor::SaveTrtCalibToDisk() {
PADDLE_ENFORCE(config_.tensorrt_engine_enabled(),
"This func can be invoked only in trt mode");
PADDLE_ENFORCE_EQ(config_.tensorrt_engine_enabled(), true,
platform::errors::PreconditionNotMet(
"This func can be invoked only in trt mode"));
auto &block = inference_program_->Block(0);
for (auto &op_desc : block.AllOps()) {
if (op_desc->Type() == "tensorrt_engine") {
......
......@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
if (other.length() && other.data())
memcpy(data_, other.data(), other.length());
else if (other.length())
PADDLE_THROW(
PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid argument, null pointer data with length %u is passed",
other.length());
other.length()));
length_ = other.length();
memory_owned_ = true;
......@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) {
length_ = length;
memory_owned_ = true;
} else {
PADDLE_THROW("The memory is allocated externally, can not Resized");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"The memory is allocated externally, can not Resized"));
}
}
......@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) {
void PaddleBuf::Free() {
if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0UL);
PADDLE_ENFORCE_GT(
length_, 0UL,
platform::errors::PreconditionNotMet(
"The memory used in PaddleBuf %d should be greater than 0",
length_));
delete[] static_cast<char *>(data_);
data_ = nullptr;
length_ = 0;
......
......@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init(
if (parent_scope) {
scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope());
PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
} else {
paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope());
......@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(cls.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init";
return nullptr;
......@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false;
}
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
PADDLE_ENFORCE_NOT_NULL(input_ptr,
platform::errors::InvalidArgument(
"The input_ptr should not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(
inputs[i].data.data(),
platform::errors::InvalidArgument(
"The data of input tensor should not be null."));
if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
......@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
platform::CPUPlace(), inputs[i].data.data(),
inputs[i].data.length(), dev_ctx->stream());
#else
PADDLE_THROW("Not compile with CUDA, should not reach here.");
PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif
}
......@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
......@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memory
PADDLE_ENFORCE_GE(
config.fraction_of_gpu_memory, 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
platform::errors::InvalidArgument(
"fraction_of_gpu_memory in the config should be set "
"to range (0., 1.]"));
PADDLE_ENFORCE_GE(config.device, 0,
platform::errors::PreconditionNotMet(
"Invalid device id %d, the device id should be "
"greater than or equal to 0.",
config.device));
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
......@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get()));
dynamic_cast<NativePaddlePredictor *>(predictor.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr;
}
......
......@@ -112,16 +112,19 @@ static T convert(const std::string &item,
std::string message =
"invalid_argument exception when try to convert : " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid_argument exception when try to convert %s.", item));
} catch (std::out_of_range &e) {
std::string message =
"out_of_range exception when try to convert : " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"out_of_range exception when try to convert %s.", item));
} catch (...) {
std::string message = "unexpected exception when try to convert " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"unexpected exception when try to convert %s.", item));
}
return res;
}
......@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) {
PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size.");
PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument(
"Non-positive batch size."));
double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======";
......
......@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
PADDLE_ENFORCE(var->IsType<LoDTensor>(),
"Only support lod tensor now.");
PADDLE_ENFORCE_NOT_NULL(var,
platform::errors::PreconditionNotMet(
"%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"Only support lod tensor now."));
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
// force unsigned type if already know it
......@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
} else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
PADDLE_ENFORCE_NE(
scales_.find(input_var_name), scales_.end(),
platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
"output scales to infer if output is unsigned."));
if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name];
}
......@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
is_unsigned = true;
double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(
scales_.find(input_var_name) != scales_.end(),
PADDLE_ENFORCE_NE(
scales_.find(input_var_name), scales_.end(),
platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
"output scales to infer if output is unsigned."));
is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min(
min_scale,
......@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE(
var_tensor.numel() > 0,
PADDLE_ENFORCE_GT(
var_tensor.numel(), 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op "
"%s of connection %s should not be empty.",
var_name, op_type_name, conn_name);
var_name, op_type_name, conn_name));
switch (rule) {
case ScaleAlgo::MAX:
......@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
float min_val = eigen_tensor.minCoeff();
bool is_positive = min_val >= 0.0f;
if (is_unsigned)
PADDLE_ENFORCE(
is_positive,
PADDLE_ENFORCE_EQ(
is_positive, true,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
int num_quantized_bins = 255;
......@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
float max_abs = eigen_tensor.abs().maxCoeff();
float min_val = eigen_tensor.minCoeff();
if (is_unsigned)
PADDLE_ENFORCE(
min_val >= 0.0f,
PADDLE_ENFORCE_GE(
min_val, 0.0f,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs;
......@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
std::pair<bool, LoDTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const {
PADDLE_ENFORCE(var_tensor.dims().size() > 0, "Tensor dimension is empty.");
PADDLE_ENFORCE_GT(
var_tensor.dims().size(), 0,
platform::errors::InvalidArgument("Tensor dimension is empty."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1};
float min_val = eigen_tensor.minCoeff();
if (is_unsigned)
PADDLE_ENFORCE(
min_val >= 0.0f,
PADDLE_ENFORCE_GE(
min_val, 0.0f,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
auto dims = var_tensor.dims();
constexpr int num_col_dims = 1;
......@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
const framework::LoDTensor& var_tensor, float min_val, float max_val,
size_t num_bins) const {
PADDLE_ENFORCE_GT(num_bins, 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, num_bins (" +
std::to_string(num_bins) + ") must be positive.");
PADDLE_ENFORCE_GT(
var_tensor.numel(), 0,
"MkldnnQuantizer: To calculate Histogram, the tensor must not be empty.");
PADDLE_ENFORCE(max_val >= min_val,
std::to_string(num_bins) + ") must be positive."));
PADDLE_ENFORCE_GT(var_tensor.numel(), 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, the tensor "
"must not be empty."));
PADDLE_ENFORCE_GE(max_val, min_val,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, max_val (" +
std::to_string(max_val) +
") must be greater or equal"
std::to_string(max_val) + ") must be greater or equal"
"to min_val (" +
std::to_string(min_val) + ").");
std::to_string(min_val) + ")."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1};
auto bin_width = std::abs(max_val - min_val) / num_bins;
......@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program()));
arg.SetMainGraph(graph.release());
auto* scope_ptr = arg.scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
auto* builder = predictor_.config_.pass_builder();
......@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const {
PrepareArgument();
auto& arg = predictor_.argument_;
Analyzer().Run(&arg);
PADDLE_ENFORCE(arg.scope_valid());
PADDLE_ENFORCE_EQ(
arg.scope_valid(), true,
platform::errors::PreconditionNotMet("The scope should be valid."));
VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program);
predictor_.inference_program_.reset(
......@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
VLOG(3) << "Predictor: run a quantization warmup iteration";
auto warmup_data = qconfig_->warmup_data();
PADDLE_ENFORCE_NOT_NULL(warmup_data,
"Warmup data cannot be NULL in the config.");
platform::errors::PreconditionNotMet(
"Warmup data cannot be NULL in the config."));
PrettyLogH1("--- Running warmup iteration for quantization");
// Run the inference program
......@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
std::vector<int> reference_distr_P, int P_sum,
std::vector<int> candidate_distr_Q, int Q_sum) const {
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size());
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size(),
platform::errors::InvalidArgument(
"The P size %d should be equal to Q size %d",
reference_distr_P.size(), candidate_distr_Q.size()));
float tmp_sum1 = 0;
float tmp_sum2 = 0;
for (size_t idx = 0; idx < reference_distr_P.size(); idx++) {
......@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
tmp_sum1 += 0;
tmp_sum2 += 0;
} else {
PADDLE_ENFORCE(q_idx != 0, "MkldnnQuantizer: Fatal error!, idx = " +
std::to_string(idx) +
" qindex = 0! p_idx = " +
std::to_string(p_idx));
PADDLE_ENFORCE_NE(
q_idx, 0,
platform::errors::PreconditionNotMet(
"MkldnnQuantizer: Fatal error!, idx = " + std::to_string(idx) +
" qindex = 0! p_idx = " + std::to_string(p_idx)));
}
tmp_sum1 += p_idx * (log(Q_sum * p_idx));
tmp_sum2 += p_idx * (log(P_sum * q_idx));
......
......@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_bfloat16_) {
passes_.push_back("cpu_bfloat16_placement_pass");
passes_.push_back("cpu_bfloat16_pass");
}
use_mkldnn_bfloat16_ = true;
#else
use_mkldnn_bfloat16_ = false;
......
......@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter {
itensors.push_back(engine_->GetITensor(input_name));
}
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis > 0,
"The axis attr of Concat op should be large than 0 for trt");
PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument(
"The axis attr of Concat"
" op should be larger than 0 for trt. "
"But received %d.",
axis));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(),
itensors.size());
......
......@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias);
PADDLE_ENFORCE(layer != nullptr);
PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
layer->setStride(nv_strides);
layer->setPadding(nv_paddings);
layer->setNbGroups(groups);
......
......@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(Y_v);
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr;
weight_data =
......@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
......
......@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter {
// NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override {
PADDLE_ENFORCE(out != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
PADDLE_ENFORCE_NOT_NULL(out,
platform::errors::InvalidArgument(
"The input param 'out' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = in.place();
size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size);
PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor in's memory_size shoule be less than or equal to "
"the input max_size. But in's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyHostToDevice, *stream_));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
out, in.data<float>(), size, cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_));
PADDLE_ENFORCE_EQ(
0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else {
PADDLE_THROW("Unknown device for converter");
PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
}
cudaStreamSynchronize(*stream_);
}
// NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override {
PADDLE_ENFORCE(in != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
PADDLE_ENFORCE_NOT_NULL(in,
platform::errors::InvalidArgument(
"The input param 'in' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = out->place();
size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size);
PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor out's memory_size shoule be less than or equal "
"to the input max_size. "
"But out's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_));
cudaMemcpyDeviceToHost, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToHost) error."));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_));
PADDLE_ENFORCE_EQ(
0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else {
PADDLE_THROW("Unknown device for converter");
PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
}
cudaStreamSynchronize(*stream_);
}
......
......@@ -44,10 +44,14 @@ class EngineIOConverter {
static void ConvertInput(const std::string& op_type, const LoDTensor& in,
void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in is not supported yet.", op_type.c_str()));
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
......@@ -55,10 +59,14 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size,
cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in not supported yet.", op_type.c_str()));
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
......
......@@ -53,7 +53,12 @@ class OpConverter {
OpConverter* it{nullptr};
if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
it = Registry<OpConverter>::Global().Lookup("fc");
......@@ -66,38 +71,51 @@ class OpConverter {
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"};
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
int op_type_len = op_desc.Type().size();
std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len);
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type);
PADDLE_ENFORCE_GT(
add_weight_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented(
"no OpConverter for optype [%s]", op_desc.Type()));
} else {
PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type);
PADDLE_ENFORCE_GT(
add_tensor_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor");
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine);
(*it)(op, scope, test_mode);
......@@ -149,9 +167,13 @@ class OpConverter {
for (auto& input : inputs) {
if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE(var, "no variable called %s", input);
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
"TensorRT engine only takes LoDTensor as input");
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("no variable called %s in block.",
input.c_str()));
PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input"));
auto var_shape = var->GetShape();
if (engine->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
......
......@@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter {
nvinfer1::Dims input_shape = input->getDimensions();
int nbDims = input_shape.nbDims;
int pad_size = static_cast<int>(paddings.size());
PADDLE_ENFORCE_GE(nbDims, 2);
PADDLE_ENFORCE_EQ((nbDims + 1) * 2, pad_size);
PADDLE_ENFORCE(pad_value == 0.0, "The pad layer of TRT only support zero.");
PADDLE_ENFORCE_GE(
nbDims, 2,
platform::errors::InvalidArgument(
"Input X[0]'s dimension should greater than or equal to 2. "
"But received %d.",
nbDims));
PADDLE_ENFORCE_EQ(
(nbDims + 1) * 2, pad_size,
platform::errors::InvalidArgument("Input X[0]'s dimension(nbDims for "
"short) should meet the condition:"
"(nbDims + 1) * 2 == pad_size. But "
"received nbDims:%d, pad_size:%d.",
nbDims, pad_size));
PADDLE_ENFORCE_EQ(pad_value, 0.0,
platform::errors::InvalidArgument(
"The pad layer of TRT only support zero."));
nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]);
nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]);
......@@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter {
*const_cast<nvinfer1::ITensor*>(input),
pre_pad, post_pad);
PADDLE_ENFORCE(layer != nullptr);
PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::External(
"add padding layer to tensorrt engine error"));
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode);
}
......
......@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
// This OP is implemented by trt dynamic shpae plugin.
// Dynamic shape plugin requires TRT version greater than 6.0.
#if IS_TRT_VERSION_GE(6000)
// This OP is implemented by trt dynamic shpae plugin.
// Dynamic shape plugin requires TRT version greater than 6.0.
VLOG(4) << "convert slice op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......@@ -38,27 +37,65 @@ class SliceOpConverter : public OpConverter {
std::vector<int> ends =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("ends"));
PADDLE_ENFORCE_EQ(
starts.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of starts must be equal to the size of axes."));
PADDLE_ENFORCE_EQ(
ends.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of ends must be equal to the size of axes."));
auto input_dims = input->getDimensions();
if (!engine_->with_dynamic_shape()) {
// notice that input shape is [CHW] without batch axis when input has
// static shape
for (size_t i = input_dims.nbDims; i > 0; i--) {
input_dims.d[i] = input_dims.d[i - 1];
}
input_dims.d[0] = 1; // fake batchsize, not useful here
for (size_t i = 0; i < axes.size(); i++) {
// split on batch is not supported in TensorRT
PADDLE_ENFORCE_NE(axes[i], 0, platform::errors::InvalidArgument(
"Invalid slice axis. Slice on batch "
"axis is not supported in TensorRT"));
if (starts[i] < 0) {
starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0);
}
if (ends[i] < 0) {
ends[i] = std::max(ends[i] + input_dims.d[axes[i]], 0);
}
ends[i] = std::min(ends[i], input_dims.d[axes[i]]);
PADDLE_ENFORCE_GT(
ends[i], starts[i],
platform::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received ends = %d, starts = %d.",
ends[i], starts[i]));
}
}
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePluginDynamic* plugin =
new plugin::SlicePluginDynamic(starts, ends, ends, ban_fp16);
new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16);
layer = engine_->AddPluginV2(&input, 1, plugin);
} else {
PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static"
"shape mode, which is not supported for the time being.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape mode."));
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "skip_layernorm", {output_name}, test_mode);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePlugin* plugin =
new plugin::SlicePlugin(starts, ends, axes, ban_fp16);
layer = engine_->AddPlugin(&input, 1, plugin);
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode);
}
};
......
......@@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
int input_num = op_desc.Input("X").size();
PADDLE_ENFORCE(input_num == 1);
PADDLE_ENFORCE_EQ(input_num, 1,
platform::errors::InvalidArgument(
"The input X's size must equal to 1 in TRT swish op."
" But received X's size %d.",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output
size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE(output_num == 1);
PADDLE_ENFORCE_EQ(
output_num, 1UL,
platform::errors::InvalidArgument(
"The ouput Out's size must equal to 1 in TRT swish op. "
"But received Out's size %u.",
output_num));
// Get attrs
float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta"));
......
......@@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
const platform::DeviceContext& ctx) {
auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0);
PADDLE_ENFORCE_GT(
num_elements, 0UL,
platform::errors::PermissionDenied("RandomizeTensor only can be used for "
"tensor which dims is not zero."));
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
......@@ -79,7 +82,8 @@ class TRTConvertValidation {
scope_(scope),
if_add_batch_(if_add_batch),
max_batch_size_(max_batch_size) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0,
platform::errors::External("cudaStreamCreate error."));
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size));
engine_->InitNetwork();
}
......@@ -154,7 +158,12 @@ class TRTConvertValidation {
void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op
PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
PADDLE_ENFORCE_LE(batch_size, max_batch_size_,
platform::errors::InvalidArgument(
"Runtime batch_size should be less than or equal to "
"max_batch_size_. "
"But received batch_size:%d, max_batch_size_:%d",
batch_size, max_batch_size_));
platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_);
cudaStreamSynchronize(stream_);
......
......@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("fused_embedding_eltwise_layernorm");
teller_set.insert("multihead_matmul");
teller_set.insert("skip_layernorm");
teller_set.insert("slice");
#endif
}
......
......@@ -26,8 +26,10 @@ namespace inference {
namespace tensorrt {
namespace plugin {
// Dynamic Plugin below.
#if IS_TRT_VERSION_GE(6000)
SlicePlugin *CreateSlicePluginDeserialize(const void *buffer, size_t length) {
return new SlicePlugin(buffer, length);
}
REGISTER_TRT_PLUGIN("slice_plugin", CreateSlicePluginDeserialize);
template <typename T>
__global__ void SliceKernel(int num, int dims, const T *input,
......@@ -56,11 +58,196 @@ __global__ void SliceKernel(int num, int dims, const T *input,
}
}
SlicePlugin::SlicePlugin(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePlugin::SlicePlugin(void const *serial_data, size_t serial_length) {
deserializeBase(serial_data, serial_length);
DeserializeValue(&serial_data, &serial_length, &starts_);
DeserializeValue(&serial_data, &serial_length, &ends_);
DeserializeValue(&serial_data, &serial_length, &axes_);
DeserializeValue(&serial_data, &serial_length, &ban_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePlugin::~SlicePlugin() {
cudaStreamDestroy(copy_stream_);
cudaEventDestroy(copy_event_);
cudaFree(offset_temp_data_);
}
SlicePlugin *SlicePlugin::clone() const {
return new SlicePlugin(starts_, ends_, axes_, ban_fp16_);
}
bool SlicePlugin::supportsFormat(nvinfer1::DataType type,
nvinfer1::PluginFormat format) const {
#ifdef SUPPORTS_CUDA_FP16
return ((type == nvinfer1::DataType::kFLOAT ||
type == nvinfer1::DataType::kHALF) &&
(format == nvinfer1::PluginFormat::kNCHW));
#else
return ((type == nvinfer1::DataType::kFLOAT) &&
(format == nvinfer1::PluginFormat::kNCHW));
#endif
}
nvinfer1::Dims SlicePlugin::getOutputDimensions(int index,
const nvinfer1::Dims *inputs,
int nb_input_dims) {
auto in_dims = inputs[0];
nvinfer1::Dims out_dims = in_dims;
for (size_t i = 0; i < axes_.size(); i++) {
int start = starts_[i];
int end = ends_[i];
out_dims.d[axes_[i] - 1] = end - start;
}
return out_dims;
}
int SlicePlugin::enqueue(int batch_size, const void *const *inputs,
void **outputs, void *workspace, cudaStream_t stream) {
auto input_dims = getInputDims(0);
// notice input dims is [C, H, W], add input batch dim here
auto out_dims = getOutputDimensions(0, &input_dims, 1);
input_dims.nbDims += 1;
out_dims.nbDims += 1;
for (auto i = input_dims.nbDims; i > 0; --i) {
input_dims.d[i] = input_dims.d[i - 1];
out_dims.d[i] = out_dims.d[i - 1];
}
input_dims.d[0] = batch_size;
out_dims.d[0] = batch_size;
auto num_dims = input_dims.nbDims;
size_t out_num = ProductDim(out_dims);
std::vector<int> seg_offsets;
std::vector<int> offsets;
std::vector<int> extends;
offsets.resize(num_dims);
extends.resize(num_dims);
seg_offsets.resize(num_dims);
seg_offsets[num_dims - 1] = 1;
for (int i = num_dims - 2; i >= 0; i--) {
seg_offsets[i] = input_dims.d[i + 1] * seg_offsets[i + 1];
}
for (size_t i = 0; i < num_dims; ++i) {
offsets[i] = 0;
extends[i] = out_dims.d[i];
}
for (size_t i = 0; i < axes_.size(); ++i) {
offsets[axes_[i]] = starts_[i];
}
std::vector<int> offset_info;
for (size_t i = 0; i < num_dims; ++i) {
offset_info.push_back(offsets[i]);
offset_info.push_back(extends[i]);
offset_info.push_back(seg_offsets[i]);
}
if (offset_temp_data_ == nullptr) {
cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int));
}
cudaMemcpyAsync(offset_temp_data_, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice,
copy_stream_);
cudaEventRecord(copy_event_, copy_stream_);
cudaStreamWaitEvent(stream, copy_event_, 0);
int threads = 256;
int blocks = (out_num + threads - 1) / threads;
auto input_type = getDataType();
if (input_type == nvinfer1::DataType::kFLOAT) {
const float *input1 = static_cast<const float *>(inputs[0]);
float *output = static_cast<float *>(outputs[0]);
SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data_, output);
} else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16
const half *input1 = static_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]);
SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data_, output);
#else
PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600."));
#endif
} else {
PADDLE_THROW(platform::errors::Fatal(
"The Slice TRT Plugin's input type should be float or half."));
}
return cudaGetLastError() != cudaSuccess;
}
size_t SlicePlugin::getSerializationSize() {
return getBaseSerializationSize() + SerializedSize(getPluginType()) +
SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_) + SerializedSize(ban_fp16_);
}
void SlicePlugin::serialize(void *buffer) {
SerializeValue(&buffer, getPluginType());
serializeBase(buffer);
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, ban_fp16_);
}
// Dynamic Plugin below.
#if IS_TRT_VERSION_GE(6000)
SlicePluginDynamic::SlicePluginDynamic(std::vector<int> starts,
std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePluginDynamic::SlicePluginDynamic(void const *serialData,
size_t serialLength) {
DeserializeValue(&serialData, &serialLength, &starts_);
DeserializeValue(&serialData, &serialLength, &ends_);
DeserializeValue(&serialData, &serialLength, &axes_);
DeserializeValue(&serialData, &serialLength, &ban_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
void SlicePluginDynamic::destroy() {
cudaStreamDestroy(copy_stream_);
cudaEventDestroy(copy_event_);
cudaFree(offset_temp_data_);
delete this;
}
int SlicePluginDynamic::initialize() { return 0; }
size_t SlicePluginDynamic::getSerializationSize() const { return 0; }
size_t SlicePluginDynamic::getSerializationSize() const {
size_t size = SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_) + SerializedSize(ban_fp16_);
void SlicePluginDynamic::serialize(void *buffer) const {}
return size;
}
void SlicePluginDynamic::serialize(void *buffer) const {
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, ban_fp16_);
}
nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
......@@ -136,9 +323,9 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
std::vector<int> offsets;
std::vector<int> extends;
offsets.reserve(num_dims);
extends.reserve(num_dims);
seg_offsets.reserve(num_dims);
offsets.resize(num_dims);
extends.resize(num_dims);
seg_offsets.resize(num_dims);
seg_offsets[num_dims - 1] = 1;
for (int i = num_dims - 2; i >= 0; i--) {
......@@ -160,16 +347,16 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
offset_info.push_back(seg_offsets[i]);
}
framework::Tensor offset_temp_tensor;
if (offset_temp_data_ == nullptr) {
cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int));
}
int device_id;
cudaGetDevice(&device_id);
offset_temp_tensor.Resize({3 * num_dims});
auto *offset_temp_data =
offset_temp_tensor.mutable_data<int>(platform::CUDAPlace(device_id));
cudaMemcpyAsync(offset_temp_data_, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice,
copy_stream_);
cudaMemcpyAsync(offset_temp_data, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice, stream);
cudaEventRecord(copy_event_, copy_stream_);
cudaStreamWaitEvent(stream, copy_event_, 0);
int threads = 256;
int blocks = (out_num + threads - 1) / threads;
......@@ -178,13 +365,13 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
const float *input1 = static_cast<const float *>(inputs[0]);
float *output = static_cast<float *>(outputs[0]);
SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data, output);
out_num, num_dims, input1, offset_temp_data_, output);
} else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16
const half *input1 = static_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]);
SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data, output);
out_num, num_dims, input1, offset_temp_data_, output);
#else
PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600."));
......
......@@ -26,17 +26,56 @@ namespace inference {
namespace tensorrt {
namespace plugin {
class SlicePlugin : public PluginTensorRT {
public:
explicit SlicePlugin(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16);
// It was used for tensorrt deserialization.
// It should not be called by users.
SlicePlugin(void const* serial_data, size_t serial_length);
~SlicePlugin();
SlicePlugin* clone() const override;
const char* getPluginType() const override { return "slice_plugin"; }
int getNbOutputs() const override { return 1; }
int initialize() override { return 0; }
bool supportsFormat(nvinfer1::DataType type,
nvinfer1::PluginFormat format) const override;
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nb_input_dims) override;
int enqueue(int batch_size, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream) override;
protected:
size_t getSerializationSize() override;
// TRT will call this func to serialize the configuration of TRT
// It should not be called by users.
void serialize(void* buffer) override;
private:
std::vector<int> starts_;
std::vector<int> ends_;
std::vector<int> axes_;
bool ban_fp16_{false};
int* offset_temp_data_{nullptr};
cudaEvent_t copy_event_;
cudaStream_t copy_stream_;
};
#if IS_TRT_VERSION_GE(6000)
class SlicePluginDynamic : public DynamicPluginTensorRT {
public:
explicit SlicePluginDynamic(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {}
SlicePluginDynamic(void const* serialData, size_t serialLength) {}
std::vector<int> axes, bool ban_fp16);
nvinfer1::IPluginV2DynamicExt* clone() const override {
return new SlicePluginDynamic(starts_, ends_, axes_, ban_fp16_);
}
SlicePluginDynamic(void const* serialData, size_t serialLength);
const char* getPluginType() const override { return "slice_plugin"; }
int getNbOutputs() const override { return 1; }
int initialize() override;
......@@ -72,15 +111,54 @@ class SlicePluginDynamic : public DynamicPluginTensorRT {
const nvinfer1::DataType* inputTypes,
int nbInputs) const override;
void destroy() override { delete this; }
void destroy() override;
private:
std::vector<int> starts_;
std::vector<int> ends_;
std::vector<int> axes_;
bool ban_fp16_{false};
int* offset_temp_data_{nullptr};
cudaEvent_t copy_event_;
cudaStream_t copy_stream_;
};
class SlicePluginV2Creator : public nvinfer1::IPluginCreator {
public:
SlicePluginV2Creator() {}
const char* getPluginName() const override { return "slice_plugin"; }
const char* getPluginVersion() const override { return "1"; }
const nvinfer1::PluginFieldCollection* getFieldNames() override {
return &field_collection_;
}
nvinfer1::IPluginV2* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override {
return nullptr;
}
nvinfer1::IPluginV2* deserializePlugin(const char* name,
const void* serialData,
size_t serialLength) override {
auto plugin = new SlicePluginDynamic(serialData, serialLength);
return plugin;
}
void setPluginNamespace(const char* libNamespace) override {
namespace_ = libNamespace;
}
const char* getPluginNamespace() const override { return namespace_.c_str(); }
private:
std::string namespace_;
nvinfer1::PluginFieldCollection field_collection_;
};
REGISTER_TRT_PLUGIN_V2(SlicePluginV2Creator);
#endif
} // namespace plugin
......
......@@ -480,10 +480,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz")
endif()
# disable test_trt_dynamic_shape_ernie_ser_deser temporary
#inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc
# EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
# ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized)
inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized)
endif()
......
......@@ -245,8 +245,14 @@ TEST(Analyzer_bert, transfer_scope_cache) {
// Since paddle::framework::global_transfer_scope_cache() and
// paddle::framework::global_transfer_data_cache() are thread_local,
// their pointer should be different among different thread id.
PADDLE_ENFORCE(global_transfer_scope_cache.size(), threads_num);
PADDLE_ENFORCE(global_transfer_data_cache.size(), threads_num);
PADDLE_ENFORCE_EQ(
global_transfer_scope_cache.size(), threads_num,
paddle::platform::errors::Fatal(
"The size of scope cache is not equal to thread number."));
PADDLE_ENFORCE_EQ(
global_transfer_data_cache.size(), threads_num,
paddle::platform::errors::Fatal(
"The size of data cache is not equal to thread number."));
}
} // namespace inference
......
......@@ -69,11 +69,13 @@ void PD_run() {
PD_DeletePaddleTensor(input);
int size;
const int* out_shape = PD_GetPaddleTensorShape(out_data, &size);
CHECK(size == 2) << "The Output shape's size is NOT match.";
PADDLE_ENFORCE_EQ(size, 2, paddle::platform::errors::InvalidArgument(
"The Output shape's size is NOT match."));
std::vector<int> ref_outshape_size({9, 6});
for (int i = 0; i < 2; ++i) {
CHECK(out_shape[i] == ref_outshape_size[i])
<< "The Output's shape is NOT match.";
PADDLE_ENFORCE_EQ(out_shape[i], ref_outshape_size[i],
paddle::platform::errors::InvalidArgument(
"The Output shape's size is NOT match."));
}
PD_DeletePaddleBuf(buf);
}
......
......@@ -36,9 +36,9 @@ void zero_copy_run() {
PD_SwitchIrDebug(config, true);
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
CHECK(!use_feed_fetch) << "NO";
EXPECT_FALSE(use_feed_fetch);
bool specify_input_names = PD_SpecifyInputName(config);
CHECK(specify_input_names) << "NO";
EXPECT_TRUE(specify_input_names);
const int batch_size = 1;
const int channels = 3;
......@@ -85,13 +85,13 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
PD_SwitchIrDebug(config, true);
PD_EnableMKLDNN(config);
bool mkldnn_enable = PD_MkldnnEnabled(config);
CHECK(mkldnn_enable) << "NO";
EXPECT_TRUE(mkldnn_enable);
PD_EnableMkldnnQuantizer(config);
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO";
EXPECT_TRUE(quantizer_enable);
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(bfloat16_enable) << "NO";
EXPECT_TRUE(bfloat16_enable);
PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_DeleteAnalysisConfig(config);
......
......@@ -126,7 +126,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
std::string turn_mask_pre = "turn_mask_";
auto one_batch = data->NextBatch();
PADDLE_ENFORCE(!one_batch.response.empty());
PADDLE_ENFORCE(
!one_batch.response.empty(),
paddle::platform::errors::Fatal("The response of one batch is empty."));
int size = one_batch.response[0].size();
CHECK_EQ(size, kMaxTurnLen);
// turn tensor assignment
......@@ -214,11 +216,17 @@ void profile(bool use_mkldnn = false) {
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of outputs should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
PADDLE_ENFORCE_GT(output.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3);
......
......@@ -146,8 +146,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
auto iterations = test_data.size();
PADDLE_ENFORCE_LE(
static_cast<size_t>(num_images), iterations * test_data_batch_size,
paddle::platform::errors::Fatal(
"The requested quantization warmup data size " +
std::to_string(num_images) + " is bigger than all test data size.");
std::to_string(num_images) + " is bigger than all test data size."));
PaddleTensor images;
images.name = "image";
......@@ -237,8 +238,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
}
PADDLE_ENFORCE_EQ(
static_cast<size_t>(num_objects), static_cast<size_t>(objects_accum),
"The requested num of objects " + std::to_string(num_objects) +
" is the same as objects_accum.");
paddle::platform::errors::Fatal("The requested num of objects " +
std::to_string(num_objects) +
" is the same as objects_accum."));
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(4);
(*warmup_data)[0] = std::move(images);
......
......@@ -98,7 +98,9 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_tensor.name = "word";
input_tensor.dtype = PaddleDType::INT64;
TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod);
PADDLE_ENFORCE_EQ(batch_size, static_cast<int>(one_batch.lod.size() - 1));
PADDLE_ENFORCE_EQ(
batch_size, static_cast<int>(one_batch.lod.size() - 1),
paddle::platform::errors::Fatal("The lod size of one batch is invaild."));
input_slots->assign({input_tensor});
}
......@@ -137,12 +139,17 @@ TEST(Analyzer_LAC, profile) {
24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25,
44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14,
15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]);
size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
PADDLE_ENFORCE_GE(size, batch1_size);
PADDLE_ENFORCE_GE(size, batch1_size, paddle::platform::errors::Fatal(
"The size of batch is invaild."));
int64_t *pdata = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < batch1_size; ++i) {
EXPECT_EQ(pdata[i], lac_ref_data[i]);
......
......@@ -117,11 +117,17 @@ void profile(bool memory_load = false) {
// the first inference result
const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48, 39, 38, 16, 25};
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
int64_t *result = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < std::min<size_t>(11, size); i++) {
EXPECT_EQ(result[i], chinese_ner_result_data[i]);
......
......@@ -136,11 +136,17 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
......
......@@ -135,11 +135,17 @@ TEST(Analyzer_rnn2, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
PADDLE_ENFORCE_GT(output.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3);
......
......@@ -47,7 +47,8 @@ struct DataRecord {
num_lines++;
std::vector<std::string> data;
split(line, '\t', &data);
PADDLE_ENFORCE(data.size() >= 4);
PADDLE_ENFORCE_GT(data.size(), 4, paddle::platform::errors::Fatal(
"The size of data is invaild."));
// load title1 data
std::vector<int64_t> title1_data;
split_to_int64(data[0], ' ', &title1_data);
......@@ -120,11 +121,17 @@ TEST(Analyzer_seq_conv1, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 0."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
......
......@@ -56,20 +56,26 @@ struct DataRecord {
std::vector<float> slot_data;
split_to_float(data[1], ' ', &slot_data);
std::string name = data[0];
PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL,
"line %d, %s should be divisible", num_lines, name);
PADDLE_ENFORCE_EQ(
slot_data.size() % 11, 0UL,
paddle::platform::errors::Fatal("line %d, %s should be divisible",
num_lines, name));
datasets[name].emplace_back(std::move(slot_data));
}
num_samples = num_lines / num_slots;
PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),
"num samples should be divisible");
PADDLE_ENFORCE_GT(num_samples, 0UL);
PADDLE_ENFORCE_EQ(
num_samples * num_slots, static_cast<size_t>(num_lines),
paddle::platform::errors::Fatal("num samples should be divisible"));
PADDLE_ENFORCE_GT(num_samples, 0UL,
paddle::platform::errors::Fatal(
"The num of samples should be greater than 0."));
}
void Prepare(int bs) {
for (auto it = datasets.begin(); it != datasets.end(); ++it) {
PADDLE_ENFORCE_EQ(it->second.size(), num_samples,
"size of each slot should be equal");
PADDLE_ENFORCE_EQ(
it->second.size(), num_samples,
paddle::platform::errors::Fatal("size of each slot should be equal"));
}
size_t num_batches = num_samples / bs;
EXPECT_GT(num_batches, 0UL);
......@@ -90,8 +96,10 @@ struct DataRecord {
std::copy(datas[id].begin(), datas[id].end(),
std::back_inserter(slot.data[k]));
size_t len = datas[id].size() / 11;
PADDLE_ENFORCE_EQ(len * 11, datas[id].size(),
"%s %d size should be divisible", slot.name, id);
PADDLE_ENFORCE_EQ(
len * 11, datas[id].size(),
paddle::platform::errors::Fatal("%s %d size should be divisible",
slot.name, id));
lod[k + 1] = lod[k] + len;
}
slot.shape.assign({static_cast<int>(lod[bs]), 11});
......
......@@ -22,7 +22,9 @@ struct DataReader {
: file(new std::ifstream(path)) {}
bool NextBatch(std::vector<PaddleTensor> *input, int batch_size) {
PADDLE_ENFORCE_EQ(batch_size, 1);
PADDLE_ENFORCE_EQ(batch_size, 1,
paddle::platform::errors::Fatal(
"The size of batch should be equal to 1."));
std::string line;
PaddleTensor tensor;
tensor.dtype = PaddleDType::INT64;
......@@ -81,7 +83,9 @@ TEST(Analyzer_Text_Classification, profile) {
if (FLAGS_num_threads == 1) {
// Get output
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
LOG(INFO) << "get outputs " << outputs.back().size();
for (auto &output : outputs.back()) {
LOG(INFO) << "output.shape: " << to_string(output.shape);
......
......@@ -59,7 +59,9 @@ void SetConfig(AnalysisConfig *cfg) {
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
PADDLE_ENFORCE_EQ(
FLAGS_test_all_data, 0,
paddle::platform::errors::Fatal("Only have single batch of data."));
std::string line;
std::ifstream file(FLAGS_infer_data);
std::getline(file, line);
......@@ -99,7 +101,9 @@ void profile(bool use_mkldnn = false) {
auto refer = ProcessALine(line);
file.close();
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto &output = outputs.back().front();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
CHECK_EQ(numel, refer.data.size());
......
......@@ -12,15 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <dirent.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <unistd.h>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
namespace paddle {
namespace inference {
int DeleteCache(std::string path) {
DIR* dir = opendir(path.c_str());
if (dir == NULL) return 0;
struct dirent* ptr;
while ((ptr = readdir(dir)) != NULL) {
if (std::strcmp(ptr->d_name, ".") == 0 ||
std::strcmp(ptr->d_name, "..") == 0) {
continue;
} else if (ptr->d_type == 8) {
std::string file_rm = path + "/" + ptr->d_name;
return remove(file_rm.c_str());
}
}
return 0;
}
void run(const AnalysisConfig& config, std::vector<float>* out_data) {
auto predictor = CreatePaddlePredictor(config);
auto input_names = predictor->GetInputNames();
......@@ -86,6 +104,11 @@ void run(const AnalysisConfig& config, std::vector<float>* out_data) {
void trt_ernie(bool with_fp16, std::vector<float> result) {
AnalysisConfig config;
std::string model_dir = FLAGS_infer_model;
// Delete serialization cache to perform serialization first rather than
// deserialization.
std::string opt_cache_dir = FLAGS_infer_model + "/_opt_cache";
DeleteCache(opt_cache_dir);
SetConfig(&config, model_dir, true /* use_gpu */);
config.SwitchUseFeedFetchOps(false);
......
......@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -162,7 +163,8 @@ void TestInference(const std::string& dirname,
// int device_id = place.GetDeviceId();
paddle::platform::SetDeviceId(0);
#else
PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
PADDLE_THROW(paddle::platform::errors::Unavailable(
"'CUDAPlace' is not supported in CPU only device."));
#endif
}
......
......@@ -16,6 +16,7 @@
#include <random>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
......@@ -41,12 +42,14 @@ TEST(BestFitAllocator, concurrent_cuda) {
LockedAllocator concurrent_allocator(
std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
auto th_main = [&](std::random_device::result_type seed) {
std::default_random_engine engine(seed);
std::uniform_int_distribution<size_t> dist(1U, 1024U);
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
std::array<size_t, 1024> buf;
for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine);
......
......@@ -110,10 +110,12 @@ struct VisitDataArgMinMaxFunctor {
CALL_ARG_MINMAX_FUNCTOR(6);
break;
default:
PADDLE_THROW(
PADDLE_ENFORCE_LE(
x_dims.size(), 6,
platform::errors::InvalidArgument(
"%s operator doesn't supports tensors whose ranks are greater "
"than 6.",
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"));
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")));
break;
#undef CALL_ARG_MINMAX_FUNCTOR
}
......@@ -164,7 +166,8 @@ class ArgMinMaxOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_LT(
axis, x_dims.size(),
platform::errors::InvalidArgument(
"'axis'(%d) must be less than Rank(X)(%d).", axis, x_dims.size()));
"'axis'(%d) must be less than Rank(X)(%d) of Input(X).", axis,
x_dims.size()));
const int& dtype = ctx->Attrs().Get<int>("dtype");
PADDLE_ENFORCE_EQ(
......@@ -192,10 +195,11 @@ class ArgMinMaxOp : public framework::OperatorWithKernel {
}
PADDLE_ENFORCE_LE(
all_element_num, INT_MAX,
platform::errors::InvalidArgument(
"The element num of the argmin/argmax input at axis is "
"%d, is larger than int32 maximum value:%d, you must "
"set the dtype of argmin/argmax to 'int64'.",
all_element_num, INT_MAX);
all_element_num, INT_MAX));
}
}
std::vector<int64_t> vec;
......
......@@ -52,7 +52,10 @@ class AssignFunctor {
template <typename T>
void operator()(const T &v) const {
PADDLE_THROW("Not support type for assign op %s", typeid(T).name());
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::PermissionDenied(
"Not support type for assign op with type %s", typeid(T).name()));
}
private:
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
namespace paddle {
namespace operators {
class ScopedRNNBase {
public:
ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size,
int num_layers, float dropout_prob, int seed, int weight_numel,
bool initialized, bool is_bidirec)
: seq_length_(seq_length),
batch_size_(batch_size),
input_size_(input_size),
hidden_size_(hidden_size),
num_layers_(num_layers),
dropout_prob_(dropout_prob),
seed_(seed),
weight_numel_(weight_numel),
initialized_(initialized),
is_bidirec_(is_bidirec) {}
template <typename T>
void Create(const cudnnHandle_t& handle, const platform::Place& place,
const std::vector<int>& sequence_length, size_t* workspace_size,
size_t* reserve_size, framework::Tensor* dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1;
cudnnDataType_t cudnn_type = platform::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1};
std::vector<int> dims_y = {batch_size_, hidden_size_ * numDirections, 1};
std::vector<int> strides_y = {hidden_size_ * numDirections, 1, 1};
for (int i = 0; i < seq_length_; ++i) {
x_descs_.emplace_back(x_desc_.descriptor<T>(dims_x, strides_x));
y_descs_.emplace_back(y_desc_.descriptor<T>(dims_y, strides_y));
}
if (!sequence_length.empty()) {
x_seq_desc_.descriptor<T>(seq_length_, batch_size_, input_size_, true,
sequence_length);
y_seq_desc_.descriptor<T>(seq_length_, batch_size_,
hidden_size_ * numDirections, true,
sequence_length);
}
// ------------------- cudnn hx, hy, cx, cy descriptors----------
std::vector<int> dims_hx = {num_layers_ * numDirections, batch_size_,
hidden_size_};
std::vector<int> strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1};
init_h_desc_.descriptor<T>(dims_hx, strides_hx);
init_c_desc_.descriptor<T>(dims_hx, strides_hx);
last_h_desc_.descriptor<T>(dims_hx, strides_hx);
last_c_desc_.descriptor<T>(dims_hx, strides_hx);
// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
if (!initialized_) {
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
}
dropout_desc_.descriptor(handle, place, initialized_, dropout_prob_,
dropout_state, seed_, state_size);
// ------------------- cudnn rnn descriptors ---------------------
#if CUDNN_VERSION >= 6000
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_.desc(), hidden_size_, num_layers_,
dropout_desc_.desc(), CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, cudnn_type));
#else
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(),
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
cudnn_type));
#endif
if (!sequence_length.empty()) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode(
rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED));
}
// ------------------- cudnn weights_size ---------------------
size_t weights_size_;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type));
PADDLE_ENFORCE_EQ(
weights_size_, sizeof(T) * weight_numel_,
platform::errors::InvalidArgument(
"The cudnn lstm and setting weight size should be same."));
// ------------------- cudnn weight descriptors ---------------------
platform::DataLayout layout = platform::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1};
weight_desc_.descriptor<T>(layout, dim_w);
// ------------------- cudnn workspace, reserve size ---------------------
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_.desc(), seq_length_, x_descs_.data(),
workspace_size));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_.desc(), seq_length_, x_descs_.data(),
reserve_size));
}
cudnnTensorDescriptor_t* x_descs() { return x_descs_.data(); }
cudnnTensorDescriptor_t* y_descs() { return y_descs_.data(); }
cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_.desc(); }
cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_.desc(); }
cudnnTensorDescriptor_t init_h_desc() { return init_h_desc_.desc(); }
cudnnTensorDescriptor_t init_c_desc() { return init_c_desc_.desc(); }
cudnnTensorDescriptor_t last_h_desc() { return last_h_desc_.desc(); }
cudnnTensorDescriptor_t last_c_desc() { return last_c_desc_.desc(); }
cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_.desc(); }
cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_.desc(); }
cudnnFilterDescriptor_t weight_desc() { return weight_desc_.desc(); }
private:
int seq_length_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
float dropout_prob_;
int seed_;
int weight_numel_;
bool initialized_;
bool is_bidirec_;
std::vector<cudnnTensorDescriptor_t> x_descs_;
std::vector<cudnnTensorDescriptor_t> y_descs_;
platform::ScopedTensorDescriptor x_desc_;
platform::ScopedTensorDescriptor y_desc_;
platform::ScopedRNNTensorDescriptor x_seq_desc_;
platform::ScopedRNNTensorDescriptor y_seq_desc_;
platform::ScopedTensorDescriptor init_h_desc_;
platform::ScopedTensorDescriptor init_c_desc_;
platform::ScopedTensorDescriptor last_h_desc_;
platform::ScopedTensorDescriptor last_c_desc_;
platform::ScopedDropoutDescriptor dropout_desc_;
platform::ScopedFilterDescriptor weight_desc_;
platform::ScopedRNNDescriptor rnn_desc_;
};
} // namespace operators
} // namespace paddle
......@@ -51,6 +51,16 @@ class CudnnLSTMOp : public framework::OperatorWithKernel {
"received InitH's rank is %d.",
init_h_dims.size()));
if (ctx->HasInput("SequenceLength")) {
auto seq_dims = ctx->GetInputDim("SequenceLength");
PADDLE_ENFORCE_EQ(
in_dims[1], seq_dims[0],
platform::errors::InvalidArgument(
"The size of SequenceLength has to equal the batch_size. But "
"received batch_size is %d and the size of SequenceLength is %d.",
in_dims[1], seq_dims[0]));
}
PADDLE_ENFORCE_EQ(
in_dims[1], init_h_dims[1],
platform::errors::InvalidArgument(
......@@ -113,6 +123,12 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) the learnable hidden-hidden weights."
" The shape is (N), where N is total weight size of the LSTM. "
" cudnn concatenate all the weight to one Tensor");
AddInput("SequenceLength",
"(Tensor) When the input data is padding, "
"set this parameter. This parameter represents "
"the variable sequence lengths in a batch. "
"The size of the vector has to equal the batch_size.")
.AsDispensable();
AddOutput("Reserve",
"(Tensor, a temporary output Tensor to store the reserve_data "
"of cudnn kernel.")
......@@ -155,13 +171,6 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(1);
AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false);
AddAttr<int>("seed", "seed to used if fix_seed is True").SetDefault(0);
AddAttr<std::vector<int>>("sequence_length",
"(vector<int>) When the input data is padding, "
"set this parameter. This parameter represents "
"the variable sequence"
"lengths in a batch. The size of the vector has "
"to equal the batch_size.")
.SetDefault({});
AddComment(R"DOC(
CUDNN LSTM implementation
......@@ -243,6 +252,9 @@ class CudnnLSTMGradOpMaker : public framework::SingleGradOpMaker<T> {
op->SetInput("InitH", this->Input("InitH"));
op->SetInput("InitC", this->Input("InitC"));
op->SetInput("W", this->Input("W"));
if (this->HasInput("SequenceLength")) {
op->SetInput("SequenceLength", this->Input("SequenceLength"));
}
op->SetInput("Reserve", this->Output("Reserve"));
op->SetInput("StateOut", this->Output("StateOut"));
op->SetInput("Out", this->Output("Out"));
......
......@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/cudnn_lstm_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/cudnn_desc.h"
#include "paddle/fluid/platform/cudnn_helper.h"
......@@ -24,6 +25,43 @@ namespace operators {
using LoDTensor = framework::LoDTensor;
using Tensor = framework::Tensor;
template <typename T>
void LSTMInferece(const bool &has_seq_length, const cudnnHandle_t &handle,
const int &seq_length, ScopedRNNBase *rnn, const T *x_data,
const T *init_h_data, const T *init_c_data, const T *w_data,
T *out_data, T *last_h_data, T *last_c_data,
framework::Tensor *workspace_data,
const size_t &workspace_size) {
if (!has_seq_length) {
// for inference
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference(
handle, rnn->rnn_desc(), seq_length, rnn->x_descs(), x_data,
rnn->init_h_desc(), init_h_data, rnn->init_c_desc(), init_c_data,
rnn->weight_desc(), w_data, rnn->y_descs(), out_data,
rnn->last_h_desc(), last_h_data, rnn->last_c_desc(), last_c_data,
workspace_data->data<uint8_t>(), workspace_size));
} else {
#if CUDNN_VERSION >= 7201
// for inference
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInferenceEx(
handle, rnn->rnn_desc(), rnn->x_seq_desc(), x_data, rnn->init_h_desc(),
init_h_data, rnn->init_c_desc(), init_c_data, rnn->weight_desc(),
w_data, rnn->y_seq_desc(), out_data, rnn->last_h_desc(), last_h_data,
rnn->last_c_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, workspace_data->data<uint8_t>(),
workspace_size));
#else
// CUDNN VERSION has to >=7.2.1
PADDLE_THROW(platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardInferenceEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
#endif
}
}
template <typename T>
class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
public:
......@@ -56,7 +94,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
int num_layers = ctx.Attr<int>("num_layers");
bool is_test = ctx.Attr<bool>("is_test");
int seed = ctx.Attr<int>("seed");
auto sequence_length = ctx.Attr<std::vector<int>>("sequence_length");
bool has_seq_length = ctx.HasInput("SequenceLength");
std::vector<int> SequenceLength;
if (has_seq_length) {
auto *sequence_length = ctx.Input<Tensor>("SequenceLength");
SequenceLength = operators::GetDataFromTensor<int>(sequence_length);
}
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto handle = dev_ctx.cudnn_handle();
......@@ -70,58 +114,32 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
size_t workspace_size;
size_t reserve_size;
platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel,
state_initialized, is_bidirec);
rnn.Create<T>(handle, ctx.GetPlace(), sequence_length, &workspace_size,
rnn.Create<T>(handle, ctx.GetPlace(), SequenceLength, &workspace_size,
&reserve_size, state_out);
framework::Tensor workspace_data_;
workspace_data_.Resize({static_cast<int64_t>(workspace_size)});
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace());
workspace_data_.mutable_data<uint8_t>(
{static_cast<int64_t>(workspace_size)}, ctx.GetPlace());
auto *reserve_data = reserve->mutable_data<uint8_t>(
{static_cast<int64_t>(reserve_size)}, ctx.GetPlace());
if (is_test) {
if (sequence_length.empty()) {
// for inference
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(),
last_h_data, rnn.cy_desc(), last_c_data,
workspace_data_.data<uint8_t>(), workspace_size));
LSTMInferece<T>(has_seq_length, handle, seq_length, &rnn, x_data,
init_h_data, init_c_data, w_data, out_data, last_h_data,
last_c_data, &workspace_data_, workspace_size);
} else {
#if CUDNN_VERSION >= 7201
// for inference
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnRNNForwardInferenceEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(),
init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data,
rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data,
rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr, platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardInferenceEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
#endif
}
} else {
if (sequence_length.empty()) {
if (!has_seq_length) {
// for train
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardTraining(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(),
last_h_data, rnn.cy_desc(), last_c_data,
handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), x_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.weight_desc(), w_data, rnn.y_descs(), out_data,
rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data,
workspace_data_.data<uint8_t>(), workspace_size, reserve_data,
reserve_size));
} else {
......@@ -130,16 +148,15 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnRNNForwardTrainingEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(),
init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data,
rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data,
rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size, reserve_data,
reserve_size));
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.weight_desc(), w_data, rnn.y_seq_desc(), out_data,
rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, workspace_data_.data<uint8_t>(), workspace_size,
reserve_data, reserve_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr, platform::errors::Unavailable(
PADDLE_THROW(platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardTrainingEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
......@@ -203,7 +220,13 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
int hidden_size = ctx.Attr<int>("hidden_size");
int num_layers = ctx.Attr<int>("num_layers");
int seed = ctx.Attr<int>("seed");
auto sequence_length = ctx.Attr<std::vector<int>>("sequence_length");
bool has_seq_length = ctx.HasInput("SequenceLength");
std::vector<int> SequenceLength;
if (has_seq_length) {
auto *sequence_length = ctx.Input<Tensor>("SequenceLength");
SequenceLength = operators::GetDataFromTensor<int>(sequence_length);
}
int seq_length = input_dims[0];
int batch_size = input->dims()[1];
......@@ -213,33 +236,33 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
size_t workspace_size;
size_t reserve_size;
platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel,
true, is_bidirec);
ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel, true,
is_bidirec);
rnn.Create<T>(handle, ctx.GetPlace(), sequence_length, &workspace_size,
rnn.Create<T>(handle, ctx.GetPlace(), SequenceLength, &workspace_size,
&reserve_size, const_cast<Tensor *>(state_out));
framework::Tensor workspace_data_;
workspace_data_.Resize({static_cast<int64_t>(workspace_size)});
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace());
workspace_data_.mutable_data<uint8_t>(
{static_cast<int64_t>(workspace_size)}, ctx.GetPlace());
const uint8_t *reserve_data = reserve->data<uint8_t>();
if (sequence_length.empty()) {
if (!has_seq_length) {
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardData(
handle, rnn.rnn_desc(), seq_length, rnn.y_desc(), out_data,
rnn.y_desc(), out_grad_data, rnn.hy_desc(), last_h_grad_data,
rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, rnn.x_desc(),
in_grad_data, rnn.hx_desc(), init_h_grad_data, rnn.cx_desc(),
init_c_grad_data, workspace_data_.data<uint8_t>(), workspace_size,
const_cast<uint8_t *>(reserve_data), reserve_size));
handle, rnn.rnn_desc(), seq_length, rnn.y_descs(), out_data,
rnn.y_descs(), out_grad_data, rnn.last_h_desc(), last_h_grad_data,
rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.x_descs(), in_grad_data, rnn.init_h_desc(), init_h_grad_data,
rnn.init_c_desc(), init_c_grad_data, workspace_data_.data<uint8_t>(),
workspace_size, const_cast<uint8_t *>(reserve_data), reserve_size));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeights(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), input->data<T>(),
rnn.hx_desc(), init_h->data<T>(), rnn.y_desc(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.w_desc(),
handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), input->data<T>(),
rnn.init_h_desc(), init_h->data<T>(), rnn.y_descs(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.weight_desc(),
weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data),
reserve_size));
} else {
......@@ -248,24 +271,22 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardDataEx(
handle, rnn.rnn_desc(), rnn.y_seq_desc(), out_data, rnn.y_seq_desc(),
out_grad_data, nullptr, nullptr, rnn.hy_desc(), last_h_grad_data,
rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.x_seq_desc(), in_grad_data, rnn.hx_desc(), init_h_grad_data,
rnn.cx_desc(), init_c_grad_data, nullptr, nullptr,
out_grad_data, nullptr, nullptr, rnn.last_h_desc(), last_h_grad_data,
rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.x_seq_desc(), in_grad_data, rnn.init_h_desc(), init_h_grad_data,
rnn.init_c_desc(), init_c_grad_data, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size,
const_cast<uint8_t *>(reserve_data), reserve_size));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeightsEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), input->data<T>(),
rnn.hx_desc(), init_h->data<T>(), rnn.y_seq_desc(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.w_desc(),
weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data),
reserve_size));
rnn.init_h_desc(), init_h->data<T>(), rnn.y_seq_desc(),
out->data<T>(), workspace_data_.data<uint8_t>(), workspace_size,
rnn.weight_desc(), weight_grad->data<T>(),
const_cast<uint8_t *>(reserve_data), reserve_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr,
platform::errors::Unavailable(
PADDLE_THROW(platform::errors::Unavailable(
"The padded input of rnn is supported by cudnnRNNBackwardDataEx, "
"cudnnRNNBackwardWeightsEx, but it only works when the version "
"of cudnn is larger than 7.2.1"));
......
......@@ -76,7 +76,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
}
#else
PADDLE_THROW("PaddlePaddle should compile with GPU.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU."));
#endif
}
};
......
......@@ -58,7 +58,8 @@ template <typename T>
class BroadcastOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW("Broadcast op can run on gpu place only for now.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Broadcast op can run on gpu place only for now."));
}
};
......
......@@ -68,10 +68,11 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
<< " From " << root_dev_id << " to " << dev_id;
if (ctx.Attr<bool>("sync_mode")) {
PADDLE_ENFORCE(cudaStreamSynchronize(stream));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
}
#else
PADDLE_THROW("PaddlePaddle should compile with GPU.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU."));
#endif
}
};
......
......@@ -33,9 +33,12 @@ namespace operators {
static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) {
if (copy_to_gpu) {
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE(cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice));
PADDLE_ENFORCE_CUDA_SUCCESS(
cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice));
#else
PADDLE_THROW("Not compiled with cuda");
PADDLE_THROW(
platform::errors::InvalidArgument("Check your paddle version, current "
"version is not compiled with cuda"));
#endif
} else {
std::memcpy(dst, src, n);
......@@ -88,11 +91,22 @@ bool TestMain(const platform::Place &place, const framework::DDim &dims,
framework::LoDTensor cpu_out;
auto &out_tensor = scope.FindVar(out_name)->Get<framework::LoDTensor>();
PADDLE_ENFORCE(scope.kids().empty());
PADDLE_ENFORCE_EQ(scope.kids().empty(), true,
platform::errors::InvalidArgument(
"The scope can not have the child scopes,"
"please check your code."));
if (inplace) {
PADDLE_ENFORCE_EQ(&out_tensor, x);
PADDLE_ENFORCE_EQ(
&out_tensor, x,
platform::errors::InvalidArgument(
"The output tensor should be same as input x in inplace mode,"
" but now is not same."));
} else {
PADDLE_ENFORCE_EQ(&out_tensor, z);
PADDLE_ENFORCE_EQ(
&out_tensor, z,
platform::errors::InvalidArgument(
"The output tensor should be same as output z in normal mode,"
" but now is not same."));
}
if (is_gpu_place) {
......
......@@ -92,7 +92,9 @@ class TestElementwiseOpGradGrad {
auto dst_place = BOOST_GET_CONST(platform::CUDAPlace, place_);
memory::Copy(dst_place, dst, src_place, src, bytes, nullptr);
#else
PADDLE_THROW("Not compiled with cuda");
PADDLE_THROW(platform::errors::InvalidArgument(
"Check your paddle version, current version is not compiled with "
"cuda"));
#endif
}
}
......@@ -107,7 +109,10 @@ class TestElementwiseOpGradGrad {
op->Run(scope_, place_);
platform::DeviceContextPool::Instance().Get(place_)->Wait();
framework::LoDTensor cpu_out;
PADDLE_ENFORCE_EQ(scope_.kids().empty(), true, "scope has child scopes");
PADDLE_ENFORCE_EQ(scope_.kids().empty(), true,
platform::errors::InvalidArgument(
"The scope can not have the child scopes,"
"please check your code."));
// get outputs from scope and compare them with expected_outs
bool all_equal = true;
......
......@@ -37,8 +37,21 @@ class GatherOp : public framework::OperatorWithKernel {
"Output(Out) of GatherOp should not be null."));
auto index_dims = ctx->GetInputDim("Index");
PADDLE_ENFORCE(index_dims.size() == 1 ||
(index_dims.size() == 2 && index_dims[1] == 1));
if (index_dims.size() == 2) {
PADDLE_ENFORCE_EQ(
index_dims[1], 1,
platform::errors::InvalidArgument(
"The last dim of index should be 1 when it is 2D, but we get %d",
index_dims[1]));
} else {
PADDLE_ENFORCE_EQ(
index_dims.size(), 1,
platform::errors::InvalidArgument(
"The index should be 1D, when it is not 2D, but we get %d",
index_dims.size()));
}
int batch_size = ctx->GetInputDim("Index")[0];
framework::DDim output_dims(ctx->GetInputDim("X"));
output_dims[0] = batch_size;
......
......@@ -43,7 +43,11 @@ class OverflowOp : public framework::OperatorWithKernel {
} else if (x_var->IsType<framework::SelectedRows>()) {
dtype = x_var->Get<framework::SelectedRows>().value().type();
} else {
PADDLE_THROW("Cannot find the input data type by all input data");
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::InvalidArgument(
"The input type mismatch, the type of Input(X) must be Tensor or "
"SelectedRows, please check your input."));
}
return framework::OpKernelType(framework::proto::VarType::Type(dtype),
ctx.GetPlace());
......
......@@ -57,7 +57,11 @@ class OverflowKernel : public framework::OpKernel<T> {
auto& in = ctx.Input<framework::SelectedRows>("X")->value();
functor(in, out);
} else {
PADDLE_THROW("Unsupported input type.");
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::InvalidArgument(
"The input type mismatch, the type of Input(X) must be Tensor or "
"SelectedRows, please check your input."));
}
}
};
......
......@@ -22,8 +22,6 @@ class LinspaceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Start"),
"Input(Start) of LinspaceOp should not be null.");
OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace");
OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace");
OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace");
......
......@@ -63,7 +63,10 @@ class CUDALinspaceKernel : public framework::OpKernel<T> {
framework::TensorCopy(*num_t, platform::CPUPlace(), &n);
int32_t num = n.data<int32_t>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument(
"The num of linspace op should be larger "
"than 0, but received num is %d",
num));
out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace());
......
......@@ -46,7 +46,10 @@ class CPULinspaceKernel : public framework::OpKernel<T> {
T start = start_t.data<T>()[0];
T stop = stop_t.data<T>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument(
"The num of linspace op should be larger "
"than 0, but received num is %d",
num));
out->Resize(framework::make_ddim({num}));
......
......@@ -48,6 +48,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>();
bool is_negative = ctx.Attr<bool>("is_negative_input");
bool bfloat16 = ctx.Attr<bool>("bfloat16");
std::string key =
platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data,
is_negative, ctx.OutputName("Output"));
......@@ -74,7 +75,10 @@ class QuantOpKernel : public framework::OpKernel<T> {
src_md, engine, to_void_cast<T>(input_data));
std::shared_ptr<mkldnn::memory::desc> dst_md;
if (is_negative) {
if (bfloat16) {
platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);
} else if (is_negative) {
platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine,
dst_md, dst_memory, out_format);
} else {
......@@ -96,7 +100,11 @@ class QuantOpKernel : public framework::OpKernel<T> {
dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem));
auto place = ctx.GetPlace();
if (is_negative) {
if (bfloat16) {
dst_memory->set_data_handle(
output->mutable_data<paddle::platform::bfloat16>(place));
} else if (is_negative) {
dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
} else {
dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
......
......@@ -40,6 +40,8 @@ void QuantOpMaker::Make() {
AddAttr<std::string>("output_format",
"Convert format to NHWC or NCHW during quantization.")
.SetDefault("NHWC");
AddAttr<bool>("bfloat16", "(bool, default false) Convert to bfloat16")
.SetDefault(false);
AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC");
}
......
......@@ -60,7 +60,10 @@ class ScaleKernel : public framework::OpKernel<T> {
out->mutable_data<T>(in->place());
PADDLE_ENFORCE_EQ(in->dims(), out->dims(),
"in and out should have the same dim");
paddle::platform::errors::InvalidArgument(
"the input and output should have the same dim"
"but input dim is %s, output dim is %s",
in->dims(), out->dims()));
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
......
......@@ -186,10 +186,17 @@ class SumOp : public framework::OperatorWithKernel {
}
}
}
PADDLE_THROW("Cannot find the input data type by all input data");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected each tensor in Input(x) in sum op has be initialized, but "
"some tensor in Input(x) is not be initialized, please check your "
"code.",
framework::ToTypeName(x_vars[0]->Type())));
}
PADDLE_THROW("Unexpected branch. Input type is %s",
framework::ToTypeName(x_vars[0]->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Input(X) must be Tensor, SelectedRows or "
"LodTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(x_vars[0]->Type())));
}
};
......
......@@ -169,8 +169,18 @@ void SumToLoDTensor(const framework::ExecutionContext &context) {
auto row_numel = sr_value.numel() / sr_rows.size();
auto out_dims = out->dims();
PADDLE_ENFORCE_EQ(sr.height(), out_dims[0]);
PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height());
PADDLE_ENFORCE_EQ(sr.height(), out_dims[0],
platform::errors::InvalidArgument(
"The table height of input must be same as output, "
"but received input height is %d"
", output height is %d",
sr.height(), out_dims[0]));
PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height(),
platform::errors::InvalidArgument(
"The table width of input must be same as output, "
"but received input width is %d"
", output width is %d",
row_numel, out->numel() / sr.height()));
auto *sr_data = sr_value.data<T>();
auto *sr_out_data = out->data<T>();
......@@ -231,8 +241,11 @@ class SumKernel<platform::CUDADeviceContext, T>
} else if (out_var->IsType<framework::LoDTensorArray>()) {
LodTensorArrayCompute<platform::CUDADeviceContext, T>(context);
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
framework::ToTypeName(out_var->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Ouput(out) must be Tensor, SelectedRows or "
"LodTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
}
};
......
......@@ -182,7 +182,11 @@ class SumKernel : public framework::OpKernel<T> {
auto &in_t = in_vars[i]->Get<framework::SelectedRows>();
functor(context.template device_context<DeviceContext>(), in_t, out);
} else {
PADDLE_THROW("Variable type must be LoDTensor/SelectedRows.");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Input(X) of %d-th must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(in_vars[i]->Type())));
}
}
} else if (out_var->IsType<framework::SelectedRows>()) {
......@@ -190,8 +194,11 @@ class SumKernel : public framework::OpKernel<T> {
} else if (out_var->IsType<framework::LoDTensorArray>()) {
LodTensorArrayCompute<DeviceContext, T>(context);
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
framework::ToTypeName(out_var->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Output(out) must be Tensor, SelectedRows, "
"LoDTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
}
};
......
......@@ -54,9 +54,11 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
tensor = out_var->GetMutable<framework::LoDTensor>();
if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape));
} else {
PADDLE_THROW(
"uniform_random_op's output only"
"supports SelectedRows and LoDTensor");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Output(out) in uniform_random_op must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
T *data = tensor->mutable_data<T>(ctx.GetPlace());
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册