提交 c6e0ee6d 编写于 作者: J jingqinghe
...@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME) ...@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
# No unit test should exceed 2 minutes. # No unit test should exceed 2 minutes.
if (APPLE OR WIN32) if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else() else()
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
endif() endif()
...@@ -748,7 +748,7 @@ function(py_test TARGET_NAME) ...@@ -748,7 +748,7 @@ function(py_test TARGET_NAME)
endif() endif()
if (APPLE OR WIN32) if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else() else()
# No unit test should exceed 2 minutes in Linux. # No unit test should exceed 2 minutes in Linux.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
......
...@@ -138,12 +138,17 @@ function(op_library TARGET) ...@@ -138,12 +138,17 @@ function(op_library TARGET)
# And for detail pybind information, please see generated paddle/pybind/pybind.h. # And for detail pybind information, please see generated paddle/pybind/pybind.h.
file(READ ${TARGET}.cc TARGET_CONTENT) file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}") string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}") # [ \t\r\n]* is used for blank characters
string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}")
if (one_register STREQUAL "") if (one_register STREQUAL "")
string(REPLACE "_op" "" TARGET "${TARGET}") string(REPLACE "_op" "" TARGET "${TARGET}")
else () else ()
string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}") string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
string(REPLACE "," "" TARGET "${TARGET}") string(REPLACE "," "" TARGET "${TARGET}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}")
endif() endif()
# pybind USE_NO_KERNEL_OP # pybind USE_NO_KERNEL_OP
......
...@@ -102,6 +102,8 @@ if(WITH_MKLDNN) ...@@ -102,6 +102,8 @@ if(WITH_MKLDNN)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn) pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn)
pass_library(fc_mkldnn_pass inference DIR mkldnn) pass_library(fc_mkldnn_pass inference DIR mkldnn)
pass_library(cpu_quantize_placement_pass base DIR mkldnn) pass_library(cpu_quantize_placement_pass base DIR mkldnn)
pass_library(cpu_quantize_pass inference DIR mkldnn) pass_library(cpu_quantize_pass inference DIR mkldnn)
...@@ -162,4 +164,6 @@ endif() ...@@ -162,4 +164,6 @@ endif()
cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass) cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass)
cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass) cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass)
cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass)
cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
endif () endif ()
...@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()( ...@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()(
return op; return op;
} }
PDNode *patterns::Bfloat16Placement::operator()(
const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>();
if (!bfloat16_enabled_op_types.empty()) {
supported_op_types = bfloat16_enabled_op_types;
}
auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types);
return op;
}
PDNode *patterns::OrphanedBfloat16::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
prev_op->LinksTo({prev_out});
op->LinksFrom({prev_out}).LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::LastBfloat16Ops::operator()() {
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
op->LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::FirstBfloat16Ops::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
auto *op_in = pattern->NewNode(op_in_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
prev_op->LinksTo({op_in});
op->LinksFrom({op_in});
return op;
}
PDNode *patterns::MKLDNNInPlace::operator()() { PDNode *patterns::MKLDNNInPlace::operator()() {
const std::unordered_set<std::string> &supported_op_types = { const std::unordered_set<std::string> &supported_op_types = {
"abs", "abs",
......
...@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase { ...@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase {
PATTERN_DECL_NODE(op); PATTERN_DECL_NODE(op);
}; };
struct Bfloat16Placement : public PatternBase {
Bfloat16Placement(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "bfloat16_placement") {}
PDNode* operator()(
const std::unordered_set<std::string>& bfloat16_enabled_op_types);
PATTERN_DECL_NODE(op);
};
struct OrphanedBfloat16 : public PatternBase {
OrphanedBfloat16(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "orphaned_bfloat16") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(prev_out);
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct LastBfloat16Ops : public PatternBase {
LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "last_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct FirstBfloat16Ops : public PatternBase {
FirstBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "first_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(op_in);
PATTERN_DECL_NODE(op);
};
// Pattern used for enforcing inplace computation for in-place computation // Pattern used for enforcing inplace computation for in-place computation
// supporting DNNL ops. softmax, batch_norm and layer_norm // supporting DNNL ops. softmax, batch_norm and layer_norm
struct MKLDNNInPlace : public PatternBase { struct MKLDNNInPlace : public PatternBase {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void UnlinkNodes(ir::Node* a, ir::Node* b) {
a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
a->outputs.end());
b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
b->inputs.end());
}
void CPUBFloat16Pass::SetInputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::FirstBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"first_bfloat16_ops"};
bfloat16_ops();
int quantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_in, op_in, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
if (op->Op()->Type() != "conv2d" && prev_op->Op()->Type() != "quantize") {
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
// create a quantize op node
OpDesc q_desc;
q_desc.SetType("quantize");
q_desc.SetInput("Input", std::vector<std::string>({op_in->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("Scale", 1.f);
q_desc.SetAttr("bfloat16", true);
q_desc.SetAttr("output_format", Has("data_layout")
? Get<std::string>("data_layout")
: "NCHW");
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.
std::string op_input_name;
for (auto name : op->Op()->InputNames()) {
for (auto input_name : op->Op()->Input(name)) {
if (input_name == op_in->Name()) op_input_name = name;
}
}
PADDLE_ENFORCE_NE(
op_input_name.empty(), true,
platform::errors::NotFound(
"Operator before operator should have input as op output"));
op->Op()->SetInput(op_input_name,
std::vector<std::string>({quantize_out_node->Name()}));
UnlinkNodes(op_in, op);
IR_NODE_LINK_TO(op_in, quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
IR_NODE_LINK_TO(quantize_out_node, op);
quantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d quantize op before bfloat16 op",
quantize_counter);
}
void CPUBFloat16Pass::SetOutputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::LastBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"last_bfloat16_ops"};
bfloat16_ops();
int force_fp32_counter = 0, dequantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_out, op_out, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, bfloat16_ops);
if ((op->Op()->HasAttr("force_fp32_output") ||
op->Op()->HasProtoAttr("force_fp32_output")) &&
!op->Op()->GetAttrIfExists<bool>("fuse_residual_connection")) {
op->Op()->SetAttr("force_fp32_output", true);
force_fp32_counter++;
} else if (op->Op()->Type() != "prior_box") {
// Create dequantize input variable
VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
// create a dequantize op node for output.
OpDesc deq_desc;
deq_desc.SetType("dequantize");
deq_desc.SetInput("Input",
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetOutput("Output", std::vector<std::string>({op_out->Name()}));
deq_desc.SetAttr("Scale", 1.0f);
auto dequantize_op = g->CreateOpNode(&deq_desc);
std::string op_output_name;
for (auto name : op->Op()->OutputNames()) {
for (auto output_name : op->Op()->Output(name)) {
if (output_name == op_out->Name()) op_output_name = name;
}
}
PADDLE_ENFORCE_NE(
op_output_name.empty(), true,
platform::errors::NotFound(
"Operator after operator should have input as op output"));
op->Op()->SetOutput(op_output_name, std::vector<std::string>(
{dequantize_in_node->Name()}));
UnlinkNodes(op, op_out);
IR_NODE_LINK_TO(op, dequantize_in_node);
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
IR_NODE_LINK_TO(dequantize_op, op_out);
dequantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d dequantize op and used %d force_fp32_output",
dequantize_counter, force_fp32_counter);
}
void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const {
SetInputDataType(graph);
SetOutputDataType(graph);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class CPUBFloat16Pass : public Pass {
protected:
void SetInputDataType(ir::Graph* graph) const;
void SetOutputDataType(ir::Graph* graph) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, bool use_mkldnn,
const std::string& mkldnn_data_type = "float32",
const bool force_fp32_output = false) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("name", name);
if (type == "conv2d") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("force_fp32_output", force_fp32_output);
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "dropout") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "fc") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "concat") {
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "matmul" || type == "elementwise_add") {
op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
}
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
graph->reset(pass->Apply(graph->release()));
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
ProgramDesc BuildProgramDesc(bool use_mkldnn) {
ProgramDesc prog;
for (auto& v : variable_names) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dropout", "Dropout1", {"z"}, {"a"}, use_mkldnn, "float32");
SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn, "bfloat16");
SetOp(&prog, "pool2d", "Pool1", {"b"}, {"c"}, use_mkldnn, "bfloat16");
SetOp(&prog, "conv2d", "Conv1", {"c"}, {"d"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout2", {"d"}, {"e"}, use_mkldnn, "float32");
SetOp(&prog, "transpose2", "Transpose1", {"e"}, {"f"}, use_mkldnn,
"bfloat16");
SetOp(&prog, "reshape2", "Reshape1", {"f"}, {"g"}, use_mkldnn, "bfloat16");
SetOp(&prog, "concat", "Concat1", {"g"}, {"h"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout3", {"h"}, {"i"}, use_mkldnn, "float32");
return prog;
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int transpose_count, int quant_count, int dequant_count,
int added_nodes_count) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names, &original_nodes_num,
&current_nodes_num);
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int conv2d_nodes_count = 0;
int pool2d_nodes_count = 0;
int transpose2_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "conv2d") {
conv2d_nodes_count++;
} else if (op->Type() == "pool2d") {
pool2d_nodes_count++;
} else if (op->Type() == "transpose2") {
transpose2_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(conv2d_nodes_count, conv_count);
EXPECT_EQ(pool2d_nodes_count, pool_count);
EXPECT_EQ(transpose2_nodes_count, transpose_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, quantize) {
bool use_mkldnn = true;
// 1 quantize + 1 dequantize
int added_nodes = 2;
MainTest(BuildProgramDesc(use_mkldnn), 2, 1, 1, 1, 2, added_nodes);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void CPUBfloat16PlacementPass::SetMkldnnDataType(
ir::Graph* graph, int* bfloat16_operators) const {
const auto& op_types_list =
Get<std::unordered_set<std::string>>("bfloat16_enabled_op_types");
// set mkldnn_data_type to bfloat16 to all operators that are in
// bfloat16_enabled_op_types vector or they are included to Bfloat16Placement
// pattern
GraphPatternDetector gpd;
patterns::Bfloat16Placement bfloat16_placement_pattern{gpd.mutable_pattern(),
"bfloat16_placement"};
bfloat16_placement_pattern(op_types_list);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_placement_pattern);
if ((op->Op()->HasAttr("mkldnn_data_type") ||
op->Op()->HasProtoAttr("mkldnn_data_type")) &&
!platform::HasOpINT8DataType(op->Op())) {
op->Op()->SetAttr("mkldnn_data_type", std::string("bfloat16"));
(*bfloat16_operators)++;
}
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::RemoveOrhanedOperators(
ir::Graph* graph, int* bfloat16_operators) const {
// find orphaned bfloat16 operator that is between two float32 operators
// revert mkldnn_data_type attr to float32
GraphPatternDetector gpd;
patterns::OrphanedBfloat16 orphaned_bfloat16_pattern{gpd.mutable_pattern(),
"orphaned_bfloat16"};
orphaned_bfloat16_pattern();
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, orphaned_bfloat16_pattern);
op->Op()->SetAttr("mkldnn_data_type", std::string("float32"));
bfloat16_operators--;
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const {
int bfloat16_operators = 0;
SetMkldnnDataType(graph, &bfloat16_operators);
RemoveOrhanedOperators(graph, &bfloat16_operators);
PrettyLogDetail("--- marked %d operators to bfloat16 ",
bfloat16_operators);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_placement_pass,
paddle::framework::ir::CPUBfloat16PlacementPass)
// a vector of operator type names with bfloat16 support ("conv2d" etc.)
// the second param is the default value for this vector
.DefaultPassAttr("bfloat16_enabled_op_types",
new std::unordered_set<std::string>());
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* Specifies which operators should be run on bfloat16.
*/
class CPUBfloat16PlacementPass : public Pass {
protected:
void SetMkldnnDataType(ir::Graph* graph, int* bfloat16_operators) const;
void RemoveOrhanedOperators(ir::Graph* graph, int* bfloat16_operators) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator mkldnn_data_type
// ---------------------------------------
// (a,b)->concat->c float32
// c->conv->f float32
// f->relu->g float32
// g->pool->h float32
// h->conv->k float32
// k->pool->l float32
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "f", "g", "h", "k", "l"})) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"});
SetOp(&prog, "conv2d", "conv1", {"c"}, {"f"});
SetOp(&prog, "relu", "relu1", {"f"}, {"g"});
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"});
SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"});
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"});
return prog;
}
void MainTest(std::initializer_list<std::string> bfloat16_enabled_op_types,
unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
pass->Set("bfloat16_enabled_op_types",
new std::unordered_set<std::string>(bfloat16_enabled_op_types));
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
TEST(Bfloat16PlacementPass, enable_all) {
MainTest({"conv2d", "pool2d", "relu", "concat"}, 6);
}
TEST(Bfloat16PlacementPass, enabled_conv_and_pool) {
// 2 conv2d + 2 pool2 - 1 orphaned conv2d
MainTest({"conv2d", "pool2d"}, 3);
}
TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); }
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_placement_pass);
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h" #include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { ...@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS(transpose_flatten_concat_fuse_pass, REGISTER_PASS(transpose_flatten_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass); paddle::framework::ir::TransposeFlattenConcatFusePass);
REGISTER_PASS_CAPABILITY(transpose_flatten_concat_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("transpose", 0)
.EQ("transpose2", 0)
.EQ("flatten", 0)
.EQ("concat", 0)
.EQ("fusion_transpose_flatten_concat", 0));
...@@ -69,7 +69,8 @@ class OpInfo { ...@@ -69,7 +69,8 @@ class OpInfo {
const OpCreator& Creator() const { const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_, PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator's Creator has not been registered"); platform::errors::NotFound(
"Operator's Creator has not been registered."));
return creator_; return creator_;
} }
...@@ -79,11 +80,12 @@ class OpInfo { ...@@ -79,11 +80,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown"; std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
grad_op_maker_, grad_op_maker_,
"Operator %s's GradOpMaker has not been " platform::errors::NotFound(
"registered.\nPlease check whether %s_op has " "Operator %s's GradOpMaker has not been "
"grad_op.\nIf not, please set stop_gradient to True " "registered.\nPlease check whether (%s) operator has "
"for its input and output variables using var.stop_gradient=True.", "gradient operator.\nIf not, please set stop_gradient to be True "
type.c_str(), type.c_str()); "for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str()));
return grad_op_maker_; return grad_op_maker_;
} }
...@@ -100,11 +102,12 @@ class OpInfo { ...@@ -100,11 +102,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown"; std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
dygraph_grad_op_maker_, dygraph_grad_op_maker_,
"Operator %s's DygraphGradOpMaker has not been " platform::errors::NotFound(
"registered.\nPlease check whether %s_op has " "Operator %s's DygraphGradOpMaker has not been "
"grad_op.\nIf not, please set stop_gradient to True " "registered.\nPlease check whether (%s) operator has "
"for its input and output variables using var.stop_gradient=True.", "gradient operator.\nIf not, please set stop_gradient to be True "
type.c_str(), type.c_str()); "for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str()));
return dygraph_grad_op_maker_; return dygraph_grad_op_maker_;
} }
...@@ -130,14 +133,17 @@ class OpInfoMap { ...@@ -130,14 +133,17 @@ class OpInfoMap {
} }
void Insert(const std::string& type, const OpInfo& info) { void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type); PADDLE_ENFORCE_NE(Has(type), true,
platform::errors::AlreadyExists(
"Operator (%s) has been registered.", type));
map_.insert({type, info}); map_.insert({type, info});
} }
const OpInfo& Get(const std::string& type) const { const OpInfo& Get(const std::string& type) const {
auto op_info_ptr = GetNullable(type); auto op_info_ptr = GetNullable(type);
PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered", PADDLE_ENFORCE_NOT_NULL(
type); op_info_ptr,
platform::errors::NotFound("Operator (%s) is not registered.", type));
return *op_info_ptr; return *op_info_ptr;
} }
......
...@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const { ...@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const {
cur_loc += OpKernelType::kLibBits; cur_loc += OpKernelType::kLibBits;
int customized_value = key.customized_type_value_; int customized_value = key.customized_type_value_;
PADDLE_ENFORCE(customized_value < (1 << OpKernelType::kCustomizeBits)); PADDLE_ENFORCE_LT(customized_value, (1 << OpKernelType::kCustomizeBits),
platform::errors::Unavailable(
"Too many custom OpKernel attribute values, expected "
"maximum value is %d, received value is %d.",
(1 << OpKernelType::kCustomizeBits), customized_value));
customized_value = customized_value << cur_loc; customized_value = customized_value << cur_loc;
cur_loc += OpKernelType::kCustomizeBits; cur_loc += OpKernelType::kCustomizeBits;
PADDLE_ENFORCE(cur_loc < 64); PADDLE_ENFORCE_LT(cur_loc, 64,
platform::errors::Unavailable(
"Too many OpKernel attribute values, expected maximum "
"value is 64, received value is %d.",
cur_loc));
std::hash<int> hasher; std::hash<int> hasher;
return hasher(place + data_type + data_layout + library_type + return hasher(place + data_type + data_layout + library_type +
......
...@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput( ...@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names; std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) { auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name); PADDLE_ENFORCE_EQ(
names.count(name), 0,
platform::errors::AlreadyExists("Attribute [%s] is duplicated.", name));
names.insert(name); names.insert(name);
}; };
for (auto& attr : proto_->attrs()) { for (auto& attr : proto_->attrs()) {
......
...@@ -54,9 +54,10 @@ class Registrar { ...@@ -54,9 +54,10 @@ class Registrar {
template <typename... ARGS> template <typename... ARGS>
struct OperatorRegistrar : public Registrar { struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) { explicit OperatorRegistrar(const char* op_type) {
if (OpInfoMap::Instance().Has(op_type)) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("'%s' is registered more than once.", op_type); OpInfoMap::Instance().Has(op_type), false,
} platform::errors::AlreadyExists(
"Operator '%s' is registered more than once.", op_type));
static_assert(sizeof...(ARGS) != 0, static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass"); "OperatorRegistrar should be invoked at least by OpClass");
OpInfo info; OpInfo info;
......
...@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { ...@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
AddInput("input", "input of cosine op").AsDuplicable(); AddInput("input", "input of cosine op").AsDuplicable();
AddOutput("output", "output of cosine op").AsIntermediate(); AddOutput("output", "output of cosine op").AsIntermediate();
auto my_checker = [](int i) { auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!"); PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument(
"'test_attr' must be even!"));
}; };
AddAttr<int>("test_attr", "a simple test attribute") AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker); .AddCustomChecker(my_checker);
......
...@@ -152,10 +152,10 @@ class OpVersionRegistrar { ...@@ -152,10 +152,10 @@ class OpVersionRegistrar {
return instance; return instance;
} }
OpVersion& Register(const std::string& op_type) { OpVersion& Register(const std::string& op_type) {
if (op_version_map_.find(op_type) != op_version_map_.end()) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("'%s' is registered in operator version more than once.", op_version_map_.find(op_type), op_version_map_.end(),
op_type); platform::errors::AlreadyExists(
} "'%s' is registered in operator version more than once.", op_type));
op_version_map_.insert({op_type, OpVersion()}); op_version_map_.insert({op_type, OpVersion()});
return op_version_map_[op_type]; return op_version_map_[op_type];
} }
......
...@@ -164,15 +164,20 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -164,15 +164,20 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(4) << place << " " << DebugStringEx(&scope); VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW(platform::errors::Unavailable(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with CUDA support.",
place));
#else #else
auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
#endif #endif
} else if (platform::is_xpu_place(place)) { } else if (platform::is_xpu_place(place)) {
#ifndef PADDLE_WITH_XPU #ifndef PADDLE_WITH_XPU
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unavailable(
"Cannot run operator on place %s", place)); "Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with XPU support.",
place));
#else #else
auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device; auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device;
platform::SetXPUDeviceId(dev_id); platform::SetXPUDeviceId(dev_id);
...@@ -214,7 +219,7 @@ std::string OperatorBase::Input(const std::string& name) const { ...@@ -214,7 +219,7 @@ std::string OperatorBase::Input(const std::string& name) const {
auto& ins = Inputs(name); auto& ins = Inputs(name);
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
ins.size(), 1UL, ins.size(), 1UL,
platform::errors::AlreadyExists( platform::errors::InvalidArgument(
"Operator %s's input %s should contain only one variable.", type_, "Operator %s's input %s should contain only one variable.", type_,
name)); name));
return ins.empty() ? kEmptyVarName : ins[0]; return ins.empty() ? kEmptyVarName : ins[0];
...@@ -223,8 +228,10 @@ std::string OperatorBase::Input(const std::string& name) const { ...@@ -223,8 +228,10 @@ std::string OperatorBase::Input(const std::string& name) const {
const std::vector<std::string>& OperatorBase::Inputs( const std::vector<std::string>& OperatorBase::Inputs(
const std::string& name) const { const std::string& name) const {
auto it = inputs_.find(name); auto it = inputs_.find(name);
PADDLE_ENFORCE(it != inputs_.end(), "Operator %s does not have the input %s.", PADDLE_ENFORCE_NE(
type_, name); it, inputs_.end(),
platform::errors::NotFound("Operator %s does not have the input %s.",
type_, name));
return it->second; return it->second;
} }
...@@ -238,17 +245,21 @@ bool OperatorBase::HasOutputs(const std::string& name) const { ...@@ -238,17 +245,21 @@ bool OperatorBase::HasOutputs(const std::string& name) const {
std::string OperatorBase::Output(const std::string& name) const { std::string OperatorBase::Output(const std::string& name) const {
auto& outs = Outputs(name); auto& outs = Outputs(name);
PADDLE_ENFORCE_LE(outs.size(), 1UL, PADDLE_ENFORCE_LE(
"Operator %s's output %s should contain only one variable.", outs.size(), 1UL,
type_, name); platform::errors::InvalidArgument(
"Operator %s's output %s should contain only one variable.", type_,
name));
return outs.empty() ? kEmptyVarName : outs[0]; return outs.empty() ? kEmptyVarName : outs[0];
} }
const std::vector<std::string>& OperatorBase::Outputs( const std::vector<std::string>& OperatorBase::Outputs(
const std::string& name) const { const std::string& name) const {
auto it = outputs_.find(name); auto it = outputs_.find(name);
PADDLE_ENFORCE(it != outputs_.end(), PADDLE_ENFORCE_NE(
"Operator %s does not have an output called %s.", type_, name); it, outputs_.end(),
platform::errors::NotFound(
"Operator %s does not have an output called %s.", type_, name));
return it->second; return it->second;
} }
...@@ -391,16 +402,19 @@ void OperatorBase::CheckAllInputOutputSet() const { ...@@ -391,16 +402,19 @@ void OperatorBase::CheckAllInputOutputSet() const {
for (auto& in : info_->Proto().inputs()) { for (auto& in : info_->Proto().inputs()) {
if (!in.dispensable()) { if (!in.dispensable()) {
PADDLE_ENFORCE(inputs_.find(in.name()) != inputs_.end(), PADDLE_ENFORCE_NE(
"Operator %s's input, %s, is not set", Type(), in.name()); inputs_.find(in.name()), inputs_.end(),
platform::errors::NotFound("Operator %s's input (%s) is not set.",
Type(), in.name()));
} }
} }
for (auto& out : info_->Proto().outputs()) { for (auto& out : info_->Proto().outputs()) {
if (!out.dispensable()) { if (!out.dispensable()) {
PADDLE_ENFORCE(outputs_.find(out.name()) != outputs_.end(), PADDLE_ENFORCE_NE(
"Operator %s's output, %s, is not set", Type(), outputs_.find(out.name()), outputs_.end(),
out.name()); platform::errors::NotFound("Operator %s's output (%s) is not set.",
Type(), out.name()));
} }
} }
} }
...@@ -428,8 +442,9 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) { ...@@ -428,8 +442,9 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
} else if (var.IsType<SelectedRows>()) { } else if (var.IsType<SelectedRows>()) {
return &(var.Get<SelectedRows>().value()); return &(var.Get<SelectedRows>().value());
} else { } else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", PADDLE_THROW(platform::errors::InvalidArgument(
ToTypeName(var.Type())); "Variable type is %s, expect LoDTensor or SelectedRows.",
ToTypeName(var.Type())));
} }
} }
...@@ -439,8 +454,9 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) { ...@@ -439,8 +454,9 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
return var->GetMutable<SelectedRows>()->mutable_value(); return var->GetMutable<SelectedRows>()->mutable_value();
} else { } else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", PADDLE_THROW(platform::errors::InvalidArgument(
ToTypeName(var->Type())); "Variable type is %s, expect LoDTensor or SelectedRows.",
ToTypeName(var->Type())));
} }
} }
...@@ -462,7 +478,7 @@ const Variable* ExecutionContext::InputVar(const std::string& name) const { ...@@ -462,7 +478,7 @@ const Variable* ExecutionContext::InputVar(const std::string& name) const {
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
it->second.size(), 1UL, it->second.size(), 1UL,
platform::errors::AlreadyExists( platform::errors::InvalidArgument(
"Operator %s's input %s should contain only one variable.", "Operator %s's input %s should contain only one variable.",
op_.Type(), name)); op_.Type(), name));
return it->second.empty() ? nullptr : it->second[0]; return it->second.empty() ? nullptr : it->second[0];
...@@ -472,9 +488,11 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const { ...@@ -472,9 +488,11 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const {
auto it = ctx_.outputs.find(name); auto it = ctx_.outputs.find(name);
if (it == ctx_.outputs.end()) return nullptr; if (it == ctx_.outputs.end()) return nullptr;
PADDLE_ENFORCE_LE(it->second.size(), 1UL, PADDLE_ENFORCE_LE(
"Operator %s's output %s should contain only one variable.", it->second.size(), 1UL,
op_.Type(), name); platform::errors::InvalidArgument(
"Operator %s's output %s should contain only one variable.",
op_.Type(), name));
return it->second.empty() ? nullptr : it->second[0]; return it->second.empty() ? nullptr : it->second[0];
} }
...@@ -497,10 +515,11 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>( ...@@ -497,10 +515,11 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
std::transform(vars.begin(), vars.end(), std::back_inserter(res), std::transform(vars.begin(), vars.end(), std::back_inserter(res),
[&](const Variable* var) -> const Tensor* { [&](const Variable* var) -> const Tensor* {
if (var == nullptr) return nullptr; if (var == nullptr) return nullptr;
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
var->IsType<LoDTensor>(), platform::errors::InvalidArgument(
"should be LoDTensor, but the received type is %s", "Input variable should be LoDTensor, "
ToTypeName(var->Type())); "but the received type is %s.",
ToTypeName(var->Type())));
return &(var->Get<LoDTensor>()); return &(var->Get<LoDTensor>());
}); });
return res; return res;
...@@ -558,8 +577,10 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -558,8 +577,10 @@ class RuntimeInferShapeContext : public InferShapeContext {
} }
const auto& in = it->second; const auto& in = it->second;
if (in.size() == 0) return false; if (in.size() == 0) return false;
PADDLE_ENFORCE_EQ(in.size(), 1UL, PADDLE_ENFORCE_EQ(
"Input %s should not have more than one inputs", name); in.size(), 1UL,
platform::errors::InvalidArgument(
"Input %s should not contain more than one inputs.", name));
return in[0] != nullptr; return in[0] != nullptr;
} }
...@@ -574,8 +595,10 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -574,8 +595,10 @@ class RuntimeInferShapeContext : public InferShapeContext {
if (out.size() == 0) { if (out.size() == 0) {
return false; return false;
} }
PADDLE_ENFORCE_EQ(out.size(), 1UL, PADDLE_ENFORCE_EQ(
"Output %s should not have more than one outputs", name); out.size(), 1UL,
platform::errors::InvalidArgument(
"Output %s should not contain more than one outputs.", name));
return out[0] != nullptr; return out[0] != nullptr;
} }
...@@ -644,16 +667,31 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -644,16 +667,31 @@ class RuntimeInferShapeContext : public InferShapeContext {
size_t j = 0) override { size_t j = 0) override {
auto in_it = ctx_.inputs.find(in); auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out); auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE(in_it != ctx_.inputs.end() && in_it->second.size() > i, PADDLE_ENFORCE_NE(
"Inputs %s should have %llu argument", in, i); in_it, ctx_.inputs.end(),
PADDLE_ENFORCE(out_it != ctx_.outputs.end() && out_it->second.size() > j, platform::errors::NotFound("Input %s does not exist.", in));
"Outputs %s should have %llu argument", out, j); PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second[i]; Variable* in_var = in_it->second[i];
Variable* out_var = out_it->second[j]; Variable* out_var = out_it->second[j];
PADDLE_ENFORCE(in_var->Type() == out_var->Type(), PADDLE_ENFORCE_EQ(
"The type of %s and %s is not the same.", in, out); in_var->Type(), out_var->Type(),
platform::errors::InvalidArgument(
"The type of input (%s) and output (%s) are inconsistent.", in,
out));
if (in_var->IsType<framework::SelectedRows>()) { if (in_var->IsType<framework::SelectedRows>()) {
auto& in_sele_rows = in_var->Get<framework::SelectedRows>(); auto& in_sele_rows = in_var->Get<framework::SelectedRows>();
...@@ -666,9 +704,9 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -666,9 +704,9 @@ class RuntimeInferShapeContext : public InferShapeContext {
auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>(); auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>();
out_lod_tensor->Resize(in_lod_tensor.dims()); out_lod_tensor->Resize(in_lod_tensor.dims());
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::Unimplemented(
"Currently, the input type of ShareDim only can be LoDTensor " "Currently, the input type of ShareDim only can be LoDTensor "
"or SelectedRows."); "or SelectedRows."));
} }
} }
...@@ -721,16 +759,30 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -721,16 +759,30 @@ class RuntimeInferShapeContext : public InferShapeContext {
size_t j = 0) const override { size_t j = 0) const override {
auto in_it = ctx_.inputs.find(in); auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out); auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE(in_it != ctx_.inputs.end() && in_it->second.size() > i, PADDLE_ENFORCE_NE(
"Inputs %s should have %llu argument", in, i); in_it, ctx_.inputs.end(),
PADDLE_ENFORCE(out_it != ctx_.outputs.end() && out_it->second.size() > j, platform::errors::NotFound("Input %s does not exist.", in));
"Outputs %s should have %llu argument", out, j); PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second.at(i); Variable* in_var = in_it->second.at(i);
if (!in_var->IsType<LoDTensor>()) return; if (!in_var->IsType<LoDTensor>()) return;
Variable* out_var = out_it->second.at(j); Variable* out_var = out_it->second.at(j);
PADDLE_ENFORCE(out_var->IsType<LoDTensor>(), PADDLE_ENFORCE_EQ(
"The %d-th output of Output(%s) must be LoDTensor.", j, out); out_var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"The %zu-th output of Output(%s) must be LoDTensor.", j, out));
auto& in_tensor = in_var->Get<LoDTensor>(); auto& in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>(); auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod()); out_tensor->set_lod(in_tensor.lod());
...@@ -757,18 +809,18 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -757,18 +809,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
} }
int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override { int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override {
PADDLE_THROW( PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetLoDLevel is only used in compile time. The calculation of " "GetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be " "output's actual lod is different among operators so that should be "
"set in the runtime kernel."); "set in the runtime kernel."));
} }
void SetLoDLevel(const std::string& out, int32_t lod_level, void SetLoDLevel(const std::string& out, int32_t lod_level,
size_t j = 0) const override { size_t j = 0) const override {
PADDLE_THROW( PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetLoDLevel is only used in compile time. The calculation of " "SetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be " "output's actual lod is different among operators so that should be "
"set in the runtime kernel."); "set in the runtime kernel."));
} }
bool IsRuntime() const override { return true; } bool IsRuntime() const override { return true; }
...@@ -794,9 +846,11 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -794,9 +846,11 @@ class RuntimeInferShapeContext : public InferShapeContext {
DDim GetInputDim(const std::string& name) const override { DDim GetInputDim(const std::string& name) const override {
const std::vector<Variable*>& vars = InputVars(name); const std::vector<Variable*>& vars = InputVars(name);
PADDLE_ENFORCE_EQ(vars.size(), 1UL, PADDLE_ENFORCE_EQ(
"Input(%s) should hold one element, but now it holds %d", vars.size(), 1UL,
name, vars.size()); platform::errors::InvalidArgument(
"Input(%s) should hold one element, but now it holds %zu elements.",
name, vars.size()));
return this->GetDim(vars[0]); return this->GetDim(vars[0]);
} }
...@@ -817,9 +871,11 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -817,9 +871,11 @@ class RuntimeInferShapeContext : public InferShapeContext {
void SetOutputDim(const std::string& name, const DDim& dim) override { void SetOutputDim(const std::string& name, const DDim& dim) override {
auto& vars = OutputVars(name); auto& vars = OutputVars(name);
PADDLE_ENFORCE_EQ(vars.size(), 1UL, PADDLE_ENFORCE_EQ(
"Output(%s) should hold one element, but now it holds %d", vars.size(), 1UL,
name, vars.size()); platform::errors::InvalidArgument("Output(%s) should hold one element, "
"but now it holds %zu elements.",
name, vars.size()));
SetDim(vars[0], dim); SetDim(vars[0], dim);
} }
...@@ -831,16 +887,17 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -831,16 +887,17 @@ class RuntimeInferShapeContext : public InferShapeContext {
protected: protected:
DDim GetDim(Variable* var) const { DDim GetDim(Variable* var) const {
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::InvalidArgument("Input variable is nullptr."));
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims(); return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().GetCompleteDims(); return var->Get<SelectedRows>().GetCompleteDims();
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables " "Only LoDTensor or SelectedRows support 'GetDim', but input "
"type_id is %s.", "Variable's type is %s.",
ToTypeName(var->Type())); ToTypeName(var->Type())));
} }
} }
...@@ -853,7 +910,8 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -853,7 +910,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
} }
std::vector<DDim> GetRepeatedDims(const std::string& name) const override { std::vector<DDim> GetRepeatedDims(const std::string& name) const override {
PADDLE_THROW("Only compile time support this method"); PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetRepeatedDims method only ban be used in compile time."));
} }
void SetDim(Variable* var, const DDim& dim) { void SetDim(Variable* var, const DDim& dim) {
...@@ -862,15 +920,22 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -862,15 +920,22 @@ class RuntimeInferShapeContext : public InferShapeContext {
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
var->GetMutable<SelectedRows>()->set_height(dim[0]); var->GetMutable<SelectedRows>()->set_height(dim[0]);
} else { } else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", PADDLE_THROW(platform::errors::Unimplemented(
ToTypeName(var->Type())); "Variable type error, expect LoDTensor or SelectedRows, but received "
"(%s).",
ToTypeName(var->Type())));
} }
} }
void SetDims(const std::vector<Variable*>& vars, void SetDims(const std::vector<Variable*>& vars,
const std::vector<DDim>& dims) { const std::vector<DDim>& dims) {
size_t length = vars.size(); size_t length = vars.size();
PADDLE_ENFORCE_EQ(length, dims.size()); PADDLE_ENFORCE_EQ(length, dims.size(),
platform::errors::InvalidArgument(
"The number of input variables do not match the "
"number of input dimensions, the number of variables "
"is %zu, the number of dimensions is %zu.",
length, dims.size()));
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
if (vars[i] == nullptr) { if (vars[i] == nullptr) {
continue; continue;
...@@ -881,7 +946,8 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -881,7 +946,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
void SetRepeatedDims(const std::string& name, void SetRepeatedDims(const std::string& name,
const std::vector<DDim>& dims) override { const std::vector<DDim>& dims) override {
PADDLE_THROW("Only compile time support this method"); PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetRepeatedDims method only can be used in compile time."));
} }
std::vector<proto::VarType::Type> GetVarTypes( std::vector<proto::VarType::Type> GetVarTypes(
...@@ -901,16 +967,19 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -901,16 +967,19 @@ class RuntimeInferShapeContext : public InferShapeContext {
private: private:
const std::vector<Variable*>& InputVars(const std::string& name) const { const std::vector<Variable*>& InputVars(const std::string& name) const {
auto it = ctx_.inputs.find(name); auto it = ctx_.inputs.find(name);
PADDLE_ENFORCE(it != ctx_.inputs.end(), PADDLE_ENFORCE_NE(
"Operator %s does not have the input %s.", op_.Type(), name); it, ctx_.inputs.end(),
platform::errors::NotFound(
"Operator (%s) does not have the input (%s).", op_.Type(), name));
return it->second; return it->second;
} }
const std::vector<Variable*>& OutputVars(const std::string& name) const { const std::vector<Variable*>& OutputVars(const std::string& name) const {
auto it = ctx_.outputs.find(name); auto it = ctx_.outputs.find(name);
PADDLE_ENFORCE(it != ctx_.outputs.end(), PADDLE_ENFORCE_NE(
"Operator %s does not have the outputs %s.", op_.Type(), it, ctx_.outputs.end(),
name); platform::errors::NotFound(
"Operator (%s) does not have the outputs (%s).", op_.Type(), name));
return it->second; return it->second;
} }
...@@ -928,10 +997,14 @@ static void CheckTensorNANOrInf(const std::string& op_type, ...@@ -928,10 +997,14 @@ static void CheckTensorNANOrInf(const std::string& op_type,
tensor.type() != proto::VarType::FP64) { tensor.type() != proto::VarType::FP64) {
return; return;
} }
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), PADDLE_ENFORCE_NE(
"Operator %s output Tensor %s contains Inf", op_type, name); framework::TensorContainsInf(tensor), true,
PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor), platform::errors::Fatal("Operator %s output Tensor %s contains Inf.",
"Operator %s output Tensor %s contains NAN", op_type, name); op_type, name));
PADDLE_ENFORCE_NE(
framework::TensorContainsNAN(tensor), true,
platform::errors::Fatal("Operator %s output Tensor %s contains NAN.",
op_type, name));
} }
void OperatorWithKernel::RuntimeInferShape(const Scope& scope, void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
...@@ -1074,10 +1147,11 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, ...@@ -1074,10 +1147,11 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
// check if op[type] has kernel registered. // check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels(); auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_); auto kernels_iter = all_op_kernels.find(type_);
if (kernels_iter == all_op_kernels.end()) { PADDLE_ENFORCE_NE(
PADDLE_THROW( kernels_iter, all_op_kernels.end(),
"There are no kernels which are registered in the %s operator.", type_); platform::errors::Unavailable(
} "There are no kernels which are registered in the %s operator.",
type_));
OpKernelMap& kernels = kernels_iter->second; OpKernelMap& kernels = kernels_iter->second;
...@@ -1131,10 +1205,10 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, ...@@ -1131,10 +1205,10 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
kernel_iter = kernels.find(expected_kernel_key); kernel_iter = kernels.find(expected_kernel_key);
} }
#endif #endif
if (kernel_iter == kernels.end()) { PADDLE_ENFORCE_NE(kernel_iter, kernels.end(),
PADDLE_THROW("op %s does not have kernel for %s", type_, platform::errors::NotFound(
KernelTypeToString(expected_kernel_key)); "Operator (%s) does not have kernel for %s.", type_,
} KernelTypeToString(expected_kernel_key)));
std::lock_guard<std::mutex> lock(cache_update_mutex_); std::lock_guard<std::mutex> lock(cache_update_mutex_);
if (kernel_type_.get() == nullptr || kernel_func_.get() == nullptr) { if (kernel_type_.get() == nullptr || kernel_func_.get() == nullptr) {
...@@ -1149,13 +1223,14 @@ void OperatorWithKernel::TransferInplaceVarsBack( ...@@ -1149,13 +1223,14 @@ void OperatorWithKernel::TransferInplaceVarsBack(
for (auto& var_name : inplace_vars) { for (auto& var_name : inplace_vars) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
auto* origin_var = scope.FindVar(var_name); auto* origin_var = scope.FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(origin_var, "The var[%s] should not be nullptr.", PADDLE_ENFORCE_NOT_NULL(origin_var,
var_name); platform::errors::InvalidArgument(
"The variable[%s] is nullptr.", var_name));
auto* original_tensor = auto* original_tensor =
GetMutableLoDTensorOrSelectedRowsValueFromVar(origin_var); GetMutableLoDTensorOrSelectedRowsValueFromVar(origin_var);
auto* var = transfer_scope.FindVar(var_name); auto* var = transfer_scope.FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var, "The var[%s] should not be nullptr.", PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument(
var_name); "The variable[%s] is nullptr.", var_name));
auto* transformed_tensor = GetLoDTensorOrSelectedRowsValueFromVar(*var); auto* transformed_tensor = GetLoDTensorOrSelectedRowsValueFromVar(*var);
auto original_dims = original_tensor->dims(); auto original_dims = original_tensor->dims();
original_tensor->ShareDataWith(*transformed_tensor); original_tensor->ShareDataWith(*transformed_tensor);
...@@ -1380,9 +1455,11 @@ proto::VarType::Type OperatorWithKernel::IndicateVarDataType( ...@@ -1380,9 +1455,11 @@ proto::VarType::Type OperatorWithKernel::IndicateVarDataType(
ParseInputDataType(ctx, name, &data_type); ParseInputDataType(ctx, name, &data_type);
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
data_type, dafault_data_type, data_type, dafault_data_type,
"The Input Variable(%s) of %s Op used to determine kernel data type " platform::errors::InvalidArgument(
"is empty or not LoDTensor or SelectedRows or LoDTensorArray.", "The Input Variable(%s) of (%s) Operator used to determine kernel "
name, Type()); "data type is empty or not LoDTensor or SelectedRows or "
"LoDTensorArray.",
name, Type()));
return data_type; return data_type;
} }
......
...@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) { ...@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) {
EXPECT_TRUE( EXPECT_TRUE(
ex_msg.find( ex_msg.find(
"The Input Variable(Other) of " "The Input Variable(Other) of "
"indicate_other_data_type_test Op used to " "(indicate_other_data_type_test) Operator used to "
"determine kernel data type " "determine kernel data type "
"is empty or not LoDTensor or SelectedRows or LoDTensorArray") != "is empty or not LoDTensor or SelectedRows or LoDTensorArray.") !=
std::string::npos); std::string::npos);
} }
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
......
...@@ -20,7 +20,10 @@ namespace framework { ...@@ -20,7 +20,10 @@ namespace framework {
void ReaderBase::ReadNext(std::vector<LoDTensor> *out) { void ReaderBase::ReadNext(std::vector<LoDTensor> *out) {
std::lock_guard<std::mutex> lock(mu_); std::lock_guard<std::mutex> lock(mu_);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning); PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning,
platform::errors::Unavailable(
"The current reader has stopped running and cannot "
"continue to read the next batch of data."));
ReadNextImpl(out); ReadNextImpl(out);
} }
......
...@@ -32,17 +32,21 @@ struct RWLock { ...@@ -32,17 +32,21 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); } ~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() { inline void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0, PADDLE_ENFORCE_EQ(
"acquire read lock failed"); pthread_rwlock_rdlock(&lock_), 0,
platform::errors::External("The pthread failed to acquire read lock."));
} }
inline void WRLock() { inline void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0, PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed"); platform::errors::External(
"The pthread failed to acquire write lock."));
} }
inline void UNLock() { inline void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed"); PADDLE_ENFORCE_EQ(
pthread_rwlock_unlock(&lock_), 0,
platform::errors::External("The pthread failed to unlock."));
} }
private: private:
......
...@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) { ...@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) {
VLOG(5) << "Can't read [" << length << "] from file" VLOG(5) << "Can't read [" << length << "] from file"
<< "file seems breakem"; << "file seems breakem";
PADDLE_THROW("Model load error, file seems breaken"); PADDLE_THROW(platform::errors::Unavailable(
"Model load failed, istream state error."));
} }
} }
...@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) { ...@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) {
sizeof(char) * tensor_number_mark.size()); sizeof(char) * tensor_number_mark.size());
std::string str_read_tensor_number_mark(tensor_number_mark_buffer, std::string str_read_tensor_number_mark(tensor_number_mark_buffer,
tensor_number_mark.size()); tensor_number_mark.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(tensor_number_mark, str_read_tensor_number_mark,
tensor_number_mark, str_read_tensor_number_mark, platform::errors::InvalidArgument(
"Tensor number mark not match, expect [%s], but read from file is [%]", "Tensor number mark does not match, expect mark is "
tensor_number_mark, str_read_tensor_number_mark); "[%s], but the mark read from file is [%s].",
tensor_number_mark, str_read_tensor_number_mark));
size_t tensor_number = 0; size_t tensor_number = 0;
istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number)); istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number));
...@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) { ...@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) {
std::string str_read_tensor_name_mark(name_mark_buffer, std::string str_read_tensor_name_mark(name_mark_buffer,
tensor_name_mark.size()); tensor_name_mark.size());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(tensor_name_mark, str_read_tensor_name_mark,
tensor_name_mark, str_read_tensor_name_mark, platform::errors::InvalidArgument(
"Tensor name mark not match, expect [%s], but read from file is [%]", "Tensor name mark does not match, expect mark is [%s], "
tensor_name_mark, str_read_tensor_name_mark); "but the mark read from file is [%s].",
tensor_name_mark, str_read_tensor_name_mark));
size_t tensor_name_length = 0; size_t tensor_name_length = 0;
istre.read(reinterpret_cast<char*>(&tensor_name_length), istre.read(reinterpret_cast<char*>(&tensor_name_length),
...@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk( ...@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NOT_NULL(
var_ptr, nullptr, var_ptr, platform::errors::NotFound("Variable (%s) is not found when "
"Variable find error, when save model, can't not find vairable [%s], " "saving model, please make sure "
"Please make sure you have run StartUpProgram", "that exe.run(startup_program) has "
vec_tensor_name_list[i]); "been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>(); Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed," platform::errors::PreconditionNotMet(
"Please make sure you have run StartUpProgram", "Paramter [%s] is not initialzed, please make sure "
vec_tensor_name_list[i]); "that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
map_tensor[vec_tensor_name_list[i]] = tensor; map_tensor[vec_tensor_name_list[i]] = tensor;
} }
...@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk( ...@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk(
Tensor* tensor = var_ptr->GetMutable<LoDTensor>(); Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed," platform::errors::PreconditionNotMet(
"Please make sure you have run StartUpProgram", "Paramter [%s] is not initialzed, please make sure "
vec_var_base_list[i]->Name()); "that exe.run(startup_program) has been executed.",
vec_var_base_list[i]->Name()));
map_tensor[vec_var_base_list[i]->Name()] = tensor; map_tensor[vec_var_base_list[i]->Name()] = tensor;
} }
...@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk( ...@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto it = map_load_tensor.find(vec_tensor_name_list[i]); auto it = map_load_tensor.find(vec_tensor_name_list[i]);
PADDLE_ENFORCE(it != map_load_tensor.end(), PADDLE_ENFORCE_NE(it, map_load_tensor.end(),
"Paramete not found in Model file, " platform::errors::NotFound(
"Can not find [%s] in model file [%s]", "Parameter (%s) not found in model file (%s).",
vec_tensor_name_list[i], file_name); vec_tensor_name_list[i], file_name));
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NOT_NULL(
var_ptr, nullptr, var_ptr,
"Parameter not created, when load model, can't not find parameter [%s] " platform::errors::PreconditionNotMet(
"please make sure you have run StartUpProgram", "Parameter (%s) is not created when loading model, "
vec_tensor_name_list[i]); "please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>(); Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NE(tensor, nullptr, PADDLE_ENFORCE_NOT_NULL(
"Paramter [%s] not initialzed " tensor,
"please make sure you have run startUpProgram", platform::errors::PreconditionNotMet(
vec_tensor_name_list[i]); "Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed " platform::errors::PreconditionNotMet(
"please make sure you have run StartUpProgram", "Paramter [%s] is not initialzed, "
vec_tensor_name_list[i]); "please make sure that exe.run(startup_program) has "
"been executed.v",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
tensor->dims(), it->second->dims(), tensor->dims(), it->second->dims(),
"Shape not matching: the Program requires a parameter with a shape of " platform::errors::InvalidArgument(
"(%s), " "Shape does not match, the program requires a parameter with a "
"while the loaded parameter (namely [ %s ]) has a shape of (%s).", "shape of "
tensor->dims(), vec_tensor_name_list[i], it->second->dims()); "(%s), while the loaded parameter (namely [ %s ]) has a shape of "
"(%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims()));
TensorCopySync(*(it->second.get()), tensor->place(), tensor); TensorCopySync(*(it->second.get()), tensor->place(), tensor);
...@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name,
MkDirRecursively(DirName(file_name).c_str()); MkDirRecursively(DirName(file_name).c_str());
std::ofstream fout(file_name, std::ios::binary); std::ofstream fout(file_name, std::ios::binary);
if (!fout) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("File open error. Can not open file [%s]", file_name); fout.is_open(), true,
} platform::errors::Unavailable("File (%s) open failed.", file_name));
// first 256 byte for reserve for fulture upgrade // first 256 byte for reserve for fulture upgrade
char* kReserveBuffer = new char[model_file_reserve_size]; char* kReserveBuffer = new char[model_file_reserve_size];
...@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name,
TensorCopySync(*tensor, platform::CPUPlace(), &temp); TensorCopySync(*tensor, platform::CPUPlace(), &temp);
data_ptr = temp.data<void>(); data_ptr = temp.data<void>();
#else #else
PADDLE_THROW( PADDLE_THROW(platform::errors::Unavailable(
"Tensor is in CUDA device, but paddle not compile with CUDA, this " "Tensor is in CUDA device, but paddle not compiled with CUDA."));
"should not happen");
#endif #endif
} }
fout.write(static_cast<const char*>(data_ptr), fout.write(static_cast<const char*>(data_ptr),
...@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name,
} }
if (!fout) { if (!fout) {
PADDLE_THROW("Model save failed, data write to model file [%s] error", PADDLE_THROW(platform::errors::Unavailable(
file_name); "Model save failed, error when writing data into model file [%s].",
file_name));
} }
fout.close(); fout.close();
...@@ -316,9 +329,9 @@ bool LoadTensorFromDisk( ...@@ -316,9 +329,9 @@ bool LoadTensorFromDisk(
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) { std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) {
std::ifstream fin(file_name, std::ios::binary); std::ifstream fin(file_name, std::ios::binary);
if (!fin) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("File open error. Can not open model file [%s]", file_name); fin.is_open(), true,
} platform::errors::Unavailable("File (%s) open failed.", file_name));
ReadReserveBuffer(fin); ReadReserveBuffer(fin);
...@@ -331,7 +344,8 @@ bool LoadTensorFromDisk( ...@@ -331,7 +344,8 @@ bool LoadTensorFromDisk(
uint32_t version; uint32_t version;
fin.read(reinterpret_cast<char*>(&version), sizeof(version)); fin.read(reinterpret_cast<char*>(&version), sizeof(version));
CheckInStreamState(fin, sizeof(version)); CheckInStreamState(fin, sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument(
"Only version 0 tensor is supported."));
proto::VarType::TensorDesc desc; proto::VarType::TensorDesc desc;
{ {
// int32_t size // int32_t size
...@@ -344,7 +358,7 @@ bool LoadTensorFromDisk( ...@@ -344,7 +358,7 @@ bool LoadTensorFromDisk(
CheckInStreamState(fin, sizeof(size)); CheckInStreamState(fin, sizeof(size));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
desc.ParseFromArray(buf.get(), size), true, desc.ParseFromArray(buf.get(), size), true,
platform::errors::InvalidArgument("Cannot parse tensor desc")); platform::errors::InvalidArgument("Parse tensor desc failed."));
} }
{ // read tensor { // read tensor
......
...@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows, ...@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
// the 1st field, unit32_t version for SelectedRows // the 1st field, unit32_t version for SelectedRows
uint32_t version; uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version)); is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U,
platform::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
} }
{ {
// the 2st field, rows information // the 2st field, rows information
...@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown, ...@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
auto iter = id_to_index_.find(key); auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) { if (iter == id_to_index_.end()) {
rwlock_->UNLock(); rwlock_->UNLock();
if (!auto_grown) { PADDLE_ENFORCE_EQ(
PADDLE_THROW("key %d not found", key); auto_grown, true,
} platform::errors::NotFound("Input key(%lld) is not found.", key));
rwlock_->WRLock(); rwlock_->WRLock();
auto map_size = id_to_index_.size(); auto map_size = id_to_index_.size();
auto vector_size = rows_.size(); auto vector_size = rows_.size();
if (map_size != vector_size) { if (map_size != vector_size) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"id_to_index_ size %d should have the same size with rows_ %d", "Row map size(%zu) should be equal to rows size(%zu).", map_size,
map_size, vector_size); vector_size));
} }
auto write_iter = id_to_index_.find(key); auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) { if (write_iter == id_to_index_.end()) {
int row_num = rows_.size(); int row_num = rows_.size();
if (row_num == value_->dims()[0]) { if (row_num == value_->dims()[0]) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num); PADDLE_THROW(platform::errors::InvalidArgument(
"Selected rows is full, then length exceed the length of first "
"dimension (%d).",
row_num));
} }
// key logic to put a key into id_to_index_ // key logic to put a key into id_to_index_
rows_.push_back(key); rows_.push_back(key);
...@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() { ...@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() {
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown, bool is_test) { bool auto_grown, bool is_test) {
PADDLE_ENFORCE(value->IsInitialized(), PADDLE_ENFORCE_EQ(value->IsInitialized(), true,
"The value tensor should be initialized."); platform::errors::InvalidArgument(
"The value tensor is not initialized."));
if (ids.numel() == 0) { if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!"; VLOG(3) << "keys is empty, please check data!";
} else { } else {
int64_t value_width = value_->numel() / value_->dims()[0]; int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(
"output tensor should have the same shape with table " value_width, value->numel() / value->dims()[0],
"except the dims[0]."); platform::errors::InvalidArgument(
"Output tensor should have the same shape with table "
"except the first dimmension, excepted value width not counting "
"the first dimension is %d, actual value width is %d.",
value_width, value->numel() / value->dims()[0]));
for (int i = 0; i < ids.numel(); ++i) { for (int i = 0; i < ids.numel(); ++i) {
auto id = ids.data<int64_t>()[i]; auto id = ids.data<int64_t>()[i];
int64_t index = AutoGrownIndex(id, auto_grown, is_test); int64_t index = AutoGrownIndex(id, auto_grown, is_test);
......
...@@ -82,7 +82,8 @@ class SelectedRows { ...@@ -82,7 +82,8 @@ class SelectedRows {
int64_t Index(int64_t key) const { int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key); auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) { if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key); PADDLE_THROW(platform::errors::NotFound(
"Input id (%lld) is not in current rows table.", key));
} }
return static_cast<int64_t>(std::distance(rows_.begin(), it)); return static_cast<int64_t>(std::distance(rows_.begin(), it));
} }
......
...@@ -25,20 +25,22 @@ namespace framework { ...@@ -25,20 +25,22 @@ namespace framework {
std::vector<DDim> InferShapeContext::GetReaderDims( std::vector<DDim> InferShapeContext::GetReaderDims(
const std::string &name) const { const std::string &name) const {
const std::vector<std::string> &arg_names = Inputs(name); const std::vector<std::string> &arg_names = Inputs(name);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
arg_names.size(), 1UL, platform::errors::InvalidArgument(
"Reader input '%s' should hold one element, but now it holds %d", name, "Reader input '%s' should hold one element, but now it "
arg_names.size()); "holds %d elements.",
name, arg_names.size()));
return this->GetRepeatedDims(arg_names[0]); return this->GetRepeatedDims(arg_names[0]);
} }
void InferShapeContext::SetReaderDims(const std::string &name, void InferShapeContext::SetReaderDims(const std::string &name,
const std::vector<DDim> &dims) { const std::vector<DDim> &dims) {
const std::vector<std::string> &arg_names = Outputs(name); const std::vector<std::string> &arg_names = Outputs(name);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
arg_names.size(), 1UL, platform::errors::InvalidArgument(
"Reader output '%s' should hold one element, but now it holds %d", name, "Reader output '%s' should hold one element, but now "
arg_names.size()); "it holds %d elements.",
name, arg_names.size()));
return this->SetRepeatedDims(arg_names[0], dims); return this->SetRepeatedDims(arg_names[0], dims);
} }
......
...@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place); auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place);
auto ctx_place = ctx.GetPlace(); auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Source place and context place do not match, source "
"place is %s, context place is %s.",
src_gpu_place, ctx_gpu_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
...@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place); auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace(); auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Destination place and context place do not match, "
"destination place is %s, context place is %s.",
dst_gpu_place, ctx_gpu_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
...@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace(); auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) { if (platform::is_same_place(src_place, dst_place)) {
...@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream); stream);
} else { } else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place."); PADDLE_THROW(platform::errors::Unavailable(
"Context place dose not match the source and destination place."));
} }
} }
} }
else { // NOLINT else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); PADDLE_THROW(platform::errors::Unimplemented(
"Copying from %s to %s is not supported.", src_place, dst_place));
} }
#endif #endif
} }
...@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
nullptr); nullptr);
} }
else { // NOLINT else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
} }
#endif #endif
} }
...@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor, ...@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst, void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
const platform::Place& dst_place) { const platform::Place& dst_place) {
// vector types not currently supported // vector types not currently supported
PADDLE_ENFORCE_LE(type.lanes, 1, "vector types not currently supported"); PADDLE_ENFORCE_LE(type.lanes, 1,
platform::errors::Unimplemented(
"Vector type is not supported currently."));
switch (type.bits) { switch (type.bits) {
case 8: case 8:
...@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst, ...@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
return static_cast<void*>(dst->mutable_data<int8_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int8_t>(dst_place));
if (type.code == kDLUInt) if (type.code == kDLUInt)
return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place)); return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 16: case 16:
if (type.code == kDLInt) if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int16_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int16_t>(dst_place));
if (type.code == kDLFloat) if (type.code == kDLFloat)
return static_cast<void*>( return static_cast<void*>(
dst->mutable_data<paddle::platform::float16>(dst_place)); dst->mutable_data<paddle::platform::float16>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 32: case 32:
if (type.code == kDLInt) if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int32_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int32_t>(dst_place));
if (type.code == kDLFloat) if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<float>(dst_place)); return static_cast<void*>(dst->mutable_data<float>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 64: case 64:
if (type.code == kDLInt) if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int64_t>(dst_place)); return static_cast<void*>(dst->mutable_data<int64_t>(dst_place));
if (type.code == kDLFloat) if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<double>(dst_place)); return static_cast<void*>(dst->mutable_data<double>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", PADDLE_THROW(platform::errors::Unimplemented(
type.code, type.bits); "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
default: default:
PADDLE_THROW("Unsupport type.bits %d", type.bits); PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported DLDataType.bits %d.", type.bits));
} }
} }
......
...@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) { ...@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
dst->resize(src.numel()); dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data()); auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true); PADDLE_ENFORCE_EQ(
platform::is_cpu_place(src.place()), true,
platform::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
src.place()));
memory::Copy(dst_place, dst_ptr, memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size); BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);
......
...@@ -27,8 +27,9 @@ Analyzer::Analyzer() {} ...@@ -27,8 +27,9 @@ Analyzer::Analyzer() {}
void Analyzer::Run(Argument *argument) { RunAnalysis(argument); } void Analyzer::Run(Argument *argument) { RunAnalysis(argument); }
void Analyzer::RunAnalysis(Argument *argument) { void Analyzer::RunAnalysis(Argument *argument) {
PADDLE_ENFORCE(argument->analysis_passes_valid(), PADDLE_ENFORCE_EQ(argument->analysis_passes_valid(), true,
"analsis_passes is not valid in the argument."); platform::errors::InvalidArgument(
"analsis_passes is not valid in the argument."));
const bool disable_logs = argument->disable_logs(); const bool disable_logs = argument->disable_logs();
for (auto &pass : argument->analysis_passes()) { for (auto &pass : argument->analysis_passes()) {
if (!disable_logs) { if (!disable_logs) {
...@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) { ...@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) {
continue; continue;
auto *ptr = PassRegistry::Global().Retreive(pass); auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass); PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet(
"no analysis pass called %s", pass));
ptr->Run(argument); ptr->Run(argument);
} }
} }
......
...@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) { ...@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) {
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
CHECK(predictor->Run(slots, &outputs)); CHECK(predictor->Run(slots, &outputs));
PADDLE_ENFORCE_EQ(outputs.size(), 1UL); PADDLE_ENFORCE_EQ(outputs.size(), 1UL,
platform::errors::PreconditionNotMet(
"Output size should be 1, but got %d", outputs.size()));
// Check the output buffer size and result of each tid. // Check the output buffer size and result of each tid.
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL); PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL,
platform::errors::PreconditionNotMet(
"Output's data length should be 33168 but got %d",
outputs.front().data.length()));
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
......
...@@ -76,53 +76,62 @@ struct Argument { ...@@ -76,53 +76,62 @@ struct Argument {
} }
} }
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ #define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \ public: \
type__& field__() { \ type__& field__() { \
PADDLE_ENFORCE(Has(#field__), "There is no such field"); \ PADDLE_ENFORCE_EQ( \
return field__##_; \ Has(#field__), true, \
} \ platform::errors::PreconditionNotMet("There is no such field")); \
void Set##Field(const type__& x) { \ return field__##_; \
field__##_ = x; \ } \
valid_fields_.insert(#field__); \ void Set##Field(const type__& x) { \
} \ field__##_ = x; \
DECL_ARGUMENT_FIELD_VALID(field__); \ valid_fields_.insert(#field__); \
type__* field__##_ptr() { return &field__##_; } \ } \
\ DECL_ARGUMENT_FIELD_VALID(field__); \
private: \ type__* field__##_ptr() { return &field__##_; } \
\
private: \
type__ field__##_; type__ field__##_;
#define DECL_ARGUMENT_FIELD_VALID(field__) \ #define DECL_ARGUMENT_FIELD_VALID(field__) \
bool field__##_valid() { return Has(#field__); } bool field__##_valid() { return Has(#field__); }
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \ #define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \ public: \
type__& field__() { \ type__& field__() { \
PADDLE_ENFORCE_NOT_NULL(field__##_); \ PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \
PADDLE_ENFORCE(Has(#field__)); \ "filed should not be null.")); \
return *static_cast<type__*>(field__##_.get()); \ PADDLE_ENFORCE_EQ( \
} \ Has(#field__), true, \
void Set##Field(type__* x) { \ platform::errors::PreconditionNotMet("There is no such field")); \
field__##_ = \ return *static_cast<type__*>(field__##_.get()); \
unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \ } \
valid_fields_.insert(#field__); \ void Set##Field(type__* x) { \
} \ field__##_ = \
void Set##Field##NotOwned(type__* x) { \ unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \
valid_fields_.insert(#field__); \ valid_fields_.insert(#field__); \
field__##_ = unique_ptr_t(x, [](void* x) {}); \ } \
} \ void Set##Field##NotOwned(type__* x) { \
DECL_ARGUMENT_FIELD_VALID(field__); \ valid_fields_.insert(#field__); \
type__* field__##_ptr() { \ field__##_ = unique_ptr_t(x, [](void* x) {}); \
PADDLE_ENFORCE(Has(#field__)); \ } \
return static_cast<type__*>(field__##_.get()); \ DECL_ARGUMENT_FIELD_VALID(field__); \
} \ type__* field__##_ptr() { \
type__* Release##Field() { \ PADDLE_ENFORCE_EQ( \
PADDLE_ENFORCE(Has(#field__)); \ Has(#field__), true, \
valid_fields_.erase(#field__); \ platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.release()); \ return static_cast<type__*>(field__##_.get()); \
} \ } \
\ type__* Release##Field() { \
private: \ PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
\
private: \
unique_ptr_t field__##_; unique_ptr_t field__##_;
DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int);
...@@ -227,8 +236,10 @@ struct Argument { ...@@ -227,8 +236,10 @@ struct Argument {
}; };
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \ #define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
PADDLE_ENFORCE(argument__->Has(#fieldname__), \ PADDLE_ENFORCE_EQ( \
"the argument field [%s] should be set", #fieldname__); argument__->Has(#fieldname__), true, \
platform::errors::PreconditionNotMet( \
"the argument field [%s] should be set", #fieldname__));
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
......
...@@ -73,12 +73,15 @@ struct DataTypeNamer { ...@@ -73,12 +73,15 @@ struct DataTypeNamer {
template <typename T> template <typename T>
const std::string &repr() const { const std::string &repr() const {
auto x = std::type_index(typeid(T)); auto x = std::type_index(typeid(T));
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation"); PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(x); return dic_.at(x);
} }
const std::string &repr(const std::type_index &type) const { // NOLINT const std::string &repr(const std::type_index &type) const { // NOLINT
PADDLE_ENFORCE(dic_.count(type), "unknown type for representation"); PADDLE_ENFORCE_GT(dic_.count(type), 0,
platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(type); return dic_.at(type);
} }
...@@ -116,7 +119,9 @@ template <typename T> ...@@ -116,7 +119,9 @@ template <typename T>
class OrderedRegistry { class OrderedRegistry {
public: public:
T *Register(const std::string &name, T *x) { T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name); PADDLE_ENFORCE_EQ(dic_.count(name), 0,
platform::errors::PreconditionNotMet(
"There exists duplicate key [%s]", name));
dic_[name] = elements_.size(); dic_[name] = elements_.size();
elements_.emplace_back(std::unique_ptr<T>(x)); elements_.emplace_back(std::unique_ptr<T>(x));
return elements_.back().get(); return elements_.back().get();
...@@ -136,14 +141,20 @@ class OrderedRegistry { ...@@ -136,14 +141,20 @@ class OrderedRegistry {
template <typename T> template <typename T>
T &GetFromScope(const framework::Scope &scope, const std::string &name) { T &GetFromScope(const framework::Scope &scope, const std::string &name) {
framework::Variable *var = scope.FindVar(name); framework::Variable *var = scope.FindVar(name);
PADDLE_ENFORCE(var != nullptr); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"The var which name is %s should not be nullptr.", name));
return *var->GetMutable<T>(); return *var->GetMutable<T>();
} }
static framework::proto::ProgramDesc LoadProgramDesc( static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) { const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary); std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(fin.is_open(), "Cannot open file %s", model_path); PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end); fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' '); std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg); fin.seekg(0, std::ios::beg);
...@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) { ...@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) {
static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) { static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) {
std::string opt_cache_dir = model_root + "/_opt_cache/"; std::string opt_cache_dir = model_root + "/_opt_cache/";
if (!PathExists(opt_cache_dir)) { if (!PathExists(opt_cache_dir)) {
PADDLE_ENFORCE(MKDIR(opt_cache_dir.c_str()) != -1, PADDLE_ENFORCE_NE(
"Can not create optimize cache directory: %s, Make sure you " MKDIR(opt_cache_dir.c_str()), -1,
"have permission to write", platform::errors::PreconditionNotMet(
opt_cache_dir); "Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
opt_cache_dir));
} }
return opt_cache_dir; return opt_cache_dir;
} }
......
...@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) { ...@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) {
graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program())); graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
if (argument->Has("scope")) { if (argument->Has("scope")) {
auto *scope_ptr = argument->scope_ptr(); auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr); PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
} }
...@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument,
std::string optim_cache_dir = argument->optim_cache_dir(); std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid = bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8); !(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid, PADDLE_ENFORCE_EQ(
"When you are in TRT INT8 mode, and load model from " int8_valid, true,
"memory, you should set optim_cache_dir using " platform::errors::PreconditionNotMet(
"config.SetOptimCacheDir()"); "When you are in TRT INT8 mode, and load model from "
PADDLE_ENFORCE(!(model_from_memory && use_static_engine), "memory, you should set optim_cache_dir using "
"When you are using Paddle-TRT, and also using load model " "config.SetOptimCacheDir()"));
"from memory, you should set the use_static to false."); PADDLE_ENFORCE_EQ(
!(model_from_memory && use_static_engine), true,
platform::errors::PreconditionNotMet(
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false."));
if (!optim_cache_dir.empty()) { if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir)); pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
......
...@@ -123,7 +123,9 @@ void RenameAndGetOutputs( ...@@ -123,7 +123,9 @@ void RenameAndGetOutputs(
auto add_block_var = [&](const std::string &graph_arg, auto add_block_var = [&](const std::string &graph_arg,
const std::string &block_arg) { const std::string &block_arg) {
auto arg_var_node = graph_var_map.find(graph_arg); auto arg_var_node = graph_var_map.find(graph_arg);
PADDLE_ENFORCE(arg_var_node != graph_var_map.end()); PADDLE_ENFORCE_NE(arg_var_node, graph_var_map.end(),
platform::errors::InvalidArgument(
"Can not find %s in graph_var_map", graph_arg));
auto *var_t = block_desc->Var(block_arg); auto *var_t = block_desc->Var(block_arg);
var_t->SetShape(arg_var_node->second->Var()->GetShape()); var_t->SetShape(arg_var_node->second->Var()->GetShape());
var_t->SetDataType(arg_var_node->second->Var()->GetDataType()); var_t->SetDataType(arg_var_node->second->Var()->GetDataType());
...@@ -133,7 +135,10 @@ void RenameAndGetOutputs( ...@@ -133,7 +135,10 @@ void RenameAndGetOutputs(
framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); framework::proto::OpDesc *op = block_desc->Op(index)->Proto();
framework::OpDesc op_desc(*op, nullptr); framework::OpDesc op_desc(*op, nullptr);
auto correspond_node = subgraph_nodes[index]; auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type(),
platform::errors::PreconditionNotMet(
"We should get %s, but get %s", op->type(),
correspond_node->Name()));
std::unordered_map<std::string, size_t> var2id; std::unordered_map<std::string, size_t> var2id;
std::unordered_map<std::string, framework::ir::Node *> in_vars; std::unordered_map<std::string, framework::ir::Node *> in_vars;
......
...@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::vector<std::string> *repetitive_params) const { std::vector<std::string> *repetitive_params) const {
auto *op_desc = node->Op(); auto *op_desc = node->Op();
auto &subgraph = *framework::ir::Agent(node).subgraph(); auto &subgraph = *framework::ir::Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty()); PADDLE_ENFORCE_EQ(subgraph.empty(), false,
platform::errors::PreconditionNotMet(
"The subgraph should not be empty."));
framework::ProgramDesc *program_desc = framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program"); Get<framework::ProgramDesc *>("program");
...@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// to Tensor. // to Tensor.
std::vector<std::string> output_mapping; std::vector<std::string> output_mapping;
for (auto name : output_names) { for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0); PADDLE_ENFORCE_NE(output_name_map.count(name), 0,
platform::errors::PreconditionNotMet(
"The output_name_map should have %s", name));
output_mapping.push_back(output_name_map[name]); output_mapping.push_back(output_name_map[name]);
} }
PADDLE_ENFORCE(!output_mapping.empty()); PADDLE_ENFORCE_EQ(output_mapping.empty(), false,
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), platform::errors::PreconditionNotMet(
"the block has no var-desc"); "The output_mapping should not be empty."));
PADDLE_ENFORCE_EQ(
!block_desc.Proto()->vars().empty(), true,
platform::errors::PreconditionNotMet("the block has no var-desc"));
// Set attrs // Set attrs
op_desc->SetType("tensorrt_engine"); op_desc->SetType("tensorrt_engine");
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include <memory>
#include <utility>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h"
...@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) { ...@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
// Apply passes. // Apply passes.
IRPassManager the_ir_manager(argument); IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph)); graph = the_ir_manager.Apply(std::move(graph));
PADDLE_ENFORCE_GT(graph->Nodes().size(), 0); PADDLE_ENFORCE_GT(
graph->Nodes().size(), 0,
platform::errors::PreconditionNotMet(
"The graph nodes size should be greater than 0, but got 0"));
argument->SetMainGraph(graph.release()); argument->SetMainGraph(graph.release());
CollectFusionStatis(argument); CollectFusionStatis(argument);
} }
......
...@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if (!argument->scope_valid()) { if (!argument->scope_valid()) {
argument->SetScope(new framework::Scope); argument->SetScope(new framework::Scope);
} }
PADDLE_ENFORCE(argument->use_gpu_valid()); PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
// The load program should run on the same device with the inference program, // The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying // so that the parameters will on the same device, or they will keep copying
...@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument->model_from_memory_valid() && argument->model_from_memory()); argument->model_from_memory_valid() && argument->model_from_memory());
argument->SetMainProgram(program.release()); argument->SetMainProgram(program.release());
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::PreconditionNotMet(
"either model_dir or (program path and parameter path) should be set."); "either model_dir or (program path and parameter path) should be "
"set."));
} }
auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program())); auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program()));
argument->SetMainGraph(graph.release()); argument->SetMainGraph(graph.release());
auto *scope_ptr = argument->scope_ptr(); auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr); PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
} }
......
...@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) { ...@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) {
std::unordered_set<const framework::ir::Node*> invalid_nodes; std::unordered_set<const framework::ir::Node*> invalid_nodes;
int valid_op = 0; int valid_op = 0;
for (auto* node : graph.Nodes()) { for (auto* node : graph.Nodes()) {
PADDLE_ENFORCE_NOT_NULL(node); PADDLE_ENFORCE_NOT_NULL(node, platform::errors::PreconditionNotMet(
"The node should not be nullptr."));
if (is_valid_node(node)) { if (is_valid_node(node)) {
invalid_nodes.insert(node); invalid_nodes.insert(node);
} else if (node->IsOp()) { } else if (node->IsOp()) {
......
...@@ -23,8 +23,12 @@ namespace inference { ...@@ -23,8 +23,12 @@ namespace inference {
namespace analysis { namespace analysis {
void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE(argument->scope_valid()); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(argument->use_gpu_valid()); argument->scope_valid(), true,
platform::errors::PreconditionNotMet("The scope field should be valid"));
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
platform::Place place; platform::Place place;
...@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
LOG(INFO) << "Sync params from CPU to GPU"; LOG(INFO) << "Sync params from CPU to GPU";
PADDLE_ENFORCE(argument->gpu_device_id_valid()); PADDLE_ENFORCE_EQ(argument->gpu_device_id_valid(), true,
platform::errors::PreconditionNotMet(
"The gpu_device_id field should be valid"));
place = platform::CUDAPlace(argument->gpu_device_id()); place = platform::CUDAPlace(argument->gpu_device_id());
auto *scope = argument->scope_ptr(); auto *scope = argument->scope_ptr();
...@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
continue; continue;
} }
auto *var = scope->FindLocalVar(var_name); auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE(var != nullptr); PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"The var should not be nullptr"));
if (var->IsType<framework::LoDTensor>() || if (var->IsType<framework::LoDTensor>() ||
var->IsType<framework::Tensor>()) { var->IsType<framework::Tensor>()) {
auto *t = var->GetMutable<framework::LoDTensor>(); auto *t = var->GetMutable<framework::LoDTensor>();
......
...@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse( ...@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse(
// modify the graph // modify the graph
for (auto input_node : node->inputs) { for (auto input_node : node->inputs) {
PADDLE_ENFORCE(input_node->IsVar()); PADDLE_ENFORCE_EQ(input_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The input node should be a variable."));
std::string input_node_name = input_node->Name(); std::string input_node_name = input_node->Name();
if (reuse_table.count(input_node_name) && if (reuse_table.count(input_node_name) &&
reuse_table.at(input_node_name) != input_node_name) { reuse_table.at(input_node_name) != input_node_name) {
...@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse( ...@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse(
// modify the graph // modify the graph
for (auto out_node : node->outputs) { for (auto out_node : node->outputs) {
PADDLE_ENFORCE(out_node->IsVar()); PADDLE_ENFORCE_EQ(out_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The output node should be a variable."));
std::string out_node_name = out_node->Name(); std::string out_node_name = out_node->Name();
if (reuse_table.count(out_node_name) && if (reuse_table.count(out_node_name) &&
reuse_table.at(out_node_name) != out_node_name) { reuse_table.at(out_node_name) != out_node_name) {
......
...@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() { ...@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet."); platform::errors::PreconditionNotMet(
"MkldnnQuantizer was not enabled yet."));
return mkldnn_quantizer_config_.get(); return mkldnn_quantizer_config_.get();
} }
......
...@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope( ...@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope(
if (parent_scope) { if (parent_scope) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
parent_scope, parent_scope,
"Both program and parent_scope should be set in Clone mode."); platform::errors::PreconditionNotMet(
"Both program and parent_scope should be set in Clone mode."));
scope_ = parent_scope; scope_ = parent_scope;
status_is_cloned_ = true; status_is_cloned_ = true;
} else { } else {
...@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() { ...@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare(sub_scope_, *inference_program_, 0, executor_->Prepare(sub_scope_, *inference_program_, 0,
config_.use_feed_fetch_ops_); config_.use_feed_fetch_ops_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_); PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
return true; return true;
} }
...@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
timer.tic(); timer.tic();
// set feed variable // set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get(); framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(scope, "The scope should not be nullptr."); PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) { if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed"; LOG(ERROR) << "fail to set feed";
return false; return false;
...@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetches_.size()); outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) { for (size_t i = 0; i < fetches_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col")); int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i); PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
...@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() {
if (!config_.model_dir().empty()) { if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir()); argument_.SetModelDir(config_.model_dir());
} else { } else {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(config_.params_file().empty(), false,
!config_.params_file().empty(), platform::errors::PreconditionNotMet(
"Either model_dir or (param_file, prog_file) should be set."); "Either model_dir or param_file should be set."));
PADDLE_ENFORCE(!config_.prog_file().empty()); PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or prog_file should be set."));
std::string dir = inference::analysis::GetDirRoot(config_.prog_file()); std::string dir = inference::analysis::GetDirRoot(config_.prog_file());
argument_.SetModelProgramPath(config_.prog_file()); argument_.SetModelProgramPath(config_.prog_file());
...@@ -503,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -503,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
PrepareArgument(); PrepareArgument();
Analyzer().Run(&argument_); Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid()); PADDLE_ENFORCE_EQ(
argument_.scope_valid(), true,
platform::errors::InvalidArgument("The argument scope should be valid."));
VLOG(5) << "to prepare executor"; VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
inference_program_.reset( inference_program_.reset(
...@@ -525,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -525,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
FLAGS_minloglevel = 2; // GLOG_ERROR FLAGS_minloglevel = 2; // GLOG_ERROR
} }
VLOG(3) << "create AnalysisConfig"; VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE(config.is_valid(), PADDLE_ENFORCE_EQ(
"Note: Each config can only be used for one predictor."); config.is_valid(), true,
platform::errors::InvalidArgument(
"Note: Each config can only be used for one predictor."));
if (config.use_gpu()) { if (config.use_gpu()) {
static std::once_flag gflags_initialized; static std::once_flag gflags_initialized;
...@@ -623,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() { ...@@ -623,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() {
} }
void AnalysisPredictor::PrepareFeedFetch() { void AnalysisPredictor::PrepareFeedFetch() {
PADDLE_ENFORCE_NOT_NULL(sub_scope_); PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::InvalidArgument(
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_); CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) { for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") { if (op->Type() == "feed") {
...@@ -646,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() { ...@@ -646,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() {
} }
void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) { void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope); PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument(
"The scope should not be nullptr."));
auto *var = scope->Var("feed"); auto *var = scope->Var("feed");
var->GetMutable<framework::FeedList>(); var->GetMutable<framework::FeedList>();
var = scope->Var("fetch"); var = scope->Var("fetch");
...@@ -667,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() { ...@@ -667,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() {
std::vector<std::string> names = GetInputNames(); std::vector<std::string> names = GetInputNames();
for (std::string name : names) { for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name); auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "input %s does not exist.", name); PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"Input %s does not exist.", name));
input_shapes[name] = var->GetShape(); input_shapes[name] = var->GetShape();
} }
return input_shapes; return input_shapes;
...@@ -683,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() { ...@@ -683,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor( std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) { const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"The variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res( std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope()))); new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = true; res->input_or_output_ = true;
...@@ -700,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor( ...@@ -700,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor( std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
const std::string &name) { const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"he variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res( std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope()))); new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = false; res->input_or_output_ = false;
...@@ -761,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() { ...@@ -761,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() {
std::string pb_content; std::string pb_content;
// Read binary // Read binary
std::ifstream fin(filename, std::ios::in | std::ios::binary); std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin.is_open()), "Cannot open file %s", PADDLE_ENFORCE_EQ(
filename); static_cast<bool>(fin.is_open()), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
filename));
fin.seekg(0, std::ios::end); fin.seekg(0, std::ios::end);
pb_content.resize(fin.tellg()); pb_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg); fin.seekg(0, std::ios::beg);
...@@ -779,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() { ...@@ -779,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() {
bool AnalysisPredictor::LoadParameters() { bool AnalysisPredictor::LoadParameters() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(), PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
"The inference program should be loaded first."); platform::errors::PreconditionNotMet(
"The inference program should be loaded first."));
const auto &global_block = inference_program_->MutableBlock(0); const auto &global_block = inference_program_->MutableBlock(0);
...@@ -855,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() { ...@@ -855,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() {
#if PADDLE_WITH_TENSORRT #if PADDLE_WITH_TENSORRT
bool AnalysisPredictor::SaveTrtCalibToDisk() { bool AnalysisPredictor::SaveTrtCalibToDisk() {
PADDLE_ENFORCE(config_.tensorrt_engine_enabled(), PADDLE_ENFORCE_EQ(config_.tensorrt_engine_enabled(), true,
"This func can be invoked only in trt mode"); platform::errors::PreconditionNotMet(
"This func can be invoked only in trt mode"));
auto &block = inference_program_->Block(0); auto &block = inference_program_->Block(0);
for (auto &op_desc : block.AllOps()) { for (auto &op_desc : block.AllOps()) {
if (op_desc->Type() == "tensorrt_engine") { if (op_desc->Type() == "tensorrt_engine") {
......
...@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) { ...@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
if (other.length() && other.data()) if (other.length() && other.data())
memcpy(data_, other.data(), other.length()); memcpy(data_, other.data(), other.length());
else if (other.length()) else if (other.length())
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid argument, null pointer data with length %u is passed", "Invalid argument, null pointer data with length %u is passed",
other.length()); other.length()));
length_ = other.length(); length_ = other.length();
memory_owned_ = true; memory_owned_ = true;
...@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) { ...@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) {
length_ = length; length_ = length;
memory_owned_ = true; memory_owned_ = true;
} else { } else {
PADDLE_THROW("The memory is allocated externally, can not Resized"); PADDLE_THROW(platform::errors::PreconditionNotMet(
"The memory is allocated externally, can not Resized"));
} }
} }
...@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) { ...@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) {
void PaddleBuf::Free() { void PaddleBuf::Free() {
if (memory_owned_ && data_) { if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0UL); PADDLE_ENFORCE_GT(
length_, 0UL,
platform::errors::PreconditionNotMet(
"The memory used in PaddleBuf %d should be greater than 0",
length_));
delete[] static_cast<char *>(data_); delete[] static_cast<char *>(data_);
data_ = nullptr; data_ = nullptr;
length_ = 0; length_ = 0;
......
...@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init( ...@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init(
if (parent_scope) { if (parent_scope) {
scope_ = parent_scope; scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope()); sub_scope_ = &(parent_scope->NewScope());
PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail"); PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
} else { } else {
paddle::framework::InitDevices(false); paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope()); scope_.reset(new paddle::framework::Scope());
...@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() { ...@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_)); std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread. // Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here. // TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get())); PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(cls.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) { if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init"; LOG(ERROR) << "fail to call Init";
return nullptr; return nullptr;
...@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false; return false;
} }
PADDLE_ENFORCE_NOT_NULL(input_ptr); PADDLE_ENFORCE_NOT_NULL(input_ptr,
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data()); platform::errors::InvalidArgument(
"The input_ptr should not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(
inputs[i].data.data(),
platform::errors::InvalidArgument(
"The data of input tensor should not be null."));
if (platform::is_cpu_place(place_)) { if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(), std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
...@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
platform::CPUPlace(), inputs[i].data.data(), platform::CPUPlace(), inputs[i].data.data(),
inputs[i].data.length(), dev_ctx->stream()); inputs[i].data.length(), dev_ctx->stream());
#else #else
PADDLE_THROW("Not compile with CUDA, should not reach here."); PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif #endif
} }
...@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetchs_.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col")); int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i); PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var); auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
...@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG(3) << "create NativePaddlePredictor"; VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memory // 1. GPU memory
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
config.fraction_of_gpu_memory, 0.f, platform::errors::InvalidArgument(
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"); "fraction_of_gpu_memory in the config should be set "
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); "to range (0., 1.]"));
PADDLE_ENFORCE_GE(config.device, 0,
platform::errors::PreconditionNotMet(
"Invalid device id %d, the device id should be "
"greater than or equal to 0.",
config.device));
std::vector<std::string> flags; std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f || if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) { config.fraction_of_gpu_memory <= 0.95f) {
...@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config)); std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get())); dynamic_cast<NativePaddlePredictor *>(predictor.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) { if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr; return nullptr;
} }
......
...@@ -112,16 +112,19 @@ static T convert(const std::string &item, ...@@ -112,16 +112,19 @@ static T convert(const std::string &item,
std::string message = std::string message =
"invalid_argument exception when try to convert : " + item; "invalid_argument exception when try to convert : " + item;
LOG(ERROR) << message; LOG(ERROR) << message;
PADDLE_THROW(message); PADDLE_THROW(platform::errors::InvalidArgument(
"invalid_argument exception when try to convert %s.", item));
} catch (std::out_of_range &e) { } catch (std::out_of_range &e) {
std::string message = std::string message =
"out_of_range exception when try to convert : " + item; "out_of_range exception when try to convert : " + item;
LOG(ERROR) << message; LOG(ERROR) << message;
PADDLE_THROW(message); PADDLE_THROW(platform::errors::InvalidArgument(
"out_of_range exception when try to convert %s.", item));
} catch (...) { } catch (...) {
std::string message = "unexpected exception when try to convert " + item; std::string message = "unexpected exception when try to convert " + item;
LOG(ERROR) << message; LOG(ERROR) << message;
PADDLE_THROW(message); PADDLE_THROW(platform::errors::InvalidArgument(
"unexpected exception when try to convert %s.", item));
} }
return res; return res;
} }
...@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, ...@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1, double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type = const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) { framework::proto::VarType::FP32) {
PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size."); PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument(
"Non-positive batch size."));
double sample_latency = batch_latency / batch_size; double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======"; << " ======";
......
...@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
if (scales_.find(var_name) != scales_.end()) continue; if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name); auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name); PADDLE_ENFORCE_NOT_NULL(var,
PADDLE_ENFORCE(var->IsType<LoDTensor>(), platform::errors::PreconditionNotMet(
"Only support lod tensor now."); "%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"Only support lod tensor now."));
LoDTensor* var_tensor = var->GetMutable<LoDTensor>(); LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
// force unsigned type if already know it // force unsigned type if already know it
...@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
} else if (op->Type() == "transpose2" || } else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") { op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0]; auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(), PADDLE_ENFORCE_NE(
"Input scales must be calculated before the " scales_.find(input_var_name), scales_.end(),
"output scales to infer if output is unsigned."); platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned."));
if (scales_.find(input_var_name) != scales_.end()) { if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name]; scales_[var_name] = scales_[input_var_name];
} }
...@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
is_unsigned = true; is_unsigned = true;
double min_scale = std::numeric_limits<double>::max(); double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) { for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE( PADDLE_ENFORCE_NE(
scales_.find(input_var_name) != scales_.end(), scales_.find(input_var_name), scales_.end(),
"Input scales must be calculated before the " platform::errors::PreconditionNotMet(
"output scales to infer if output is unsigned."); "Input scales must be calculated before the "
"output scales to infer if output is unsigned."));
is_unsigned = is_unsigned && scales_[input_var_name].first; is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min( min_scale = std::min(
min_scale, min_scale,
...@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale( ...@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
auto rule = qconfig_->scale_algo(op_type_name, conn_name); auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return; if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE( PADDLE_ENFORCE_GT(
var_tensor.numel() > 0, var_tensor.numel(), 0,
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op " platform::errors::InvalidArgument(
"%s of connection %s should not be empty.", "MkldnnQuantizer: LoDTensor of variable %s for quantization of op "
var_name, op_type_name, conn_name); "%s of connection %s should not be empty.",
var_name, op_type_name, conn_name));
switch (rule) { switch (rule) {
case ScaleAlgo::MAX: case ScaleAlgo::MAX:
...@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor( ...@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
float min_val = eigen_tensor.minCoeff(); float min_val = eigen_tensor.minCoeff();
bool is_positive = min_val >= 0.0f; bool is_positive = min_val >= 0.0f;
if (is_unsigned) if (is_unsigned)
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
is_positive, is_positive, true,
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", platform::errors::InvalidArgument(
min_val); "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
int num_quantized_bins = 255; int num_quantized_bins = 255;
...@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( ...@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
float max_abs = eigen_tensor.abs().maxCoeff(); float max_abs = eigen_tensor.abs().maxCoeff();
float min_val = eigen_tensor.minCoeff(); float min_val = eigen_tensor.minCoeff();
if (is_unsigned) if (is_unsigned)
PADDLE_ENFORCE( PADDLE_ENFORCE_GE(
min_val >= 0.0f, min_val, 0.0f,
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", platform::errors::InvalidArgument(
min_val); "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
LoDTensor scale_tensor = CreateScaleTensor(); LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs; scale_tensor.data<double>()[0] = 1.0 / max_abs;
...@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( ...@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
std::pair<bool, LoDTensor> std::pair<bool, LoDTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const { const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const {
PADDLE_ENFORCE(var_tensor.dims().size() > 0, "Tensor dimension is empty."); PADDLE_ENFORCE_GT(
var_tensor.dims().size(), 0,
platform::errors::InvalidArgument("Tensor dimension is empty."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(), ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1}; var_tensor.numel(), 1};
float min_val = eigen_tensor.minCoeff(); float min_val = eigen_tensor.minCoeff();
if (is_unsigned) if (is_unsigned)
PADDLE_ENFORCE( PADDLE_ENFORCE_GE(
min_val >= 0.0f, min_val, 0.0f,
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", platform::errors::InvalidArgument(
min_val); "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
auto dims = var_tensor.dims(); auto dims = var_tensor.dims();
constexpr int num_col_dims = 1; constexpr int num_col_dims = 1;
...@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram( ...@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
const framework::LoDTensor& var_tensor, float min_val, float max_val, const framework::LoDTensor& var_tensor, float min_val, float max_val,
size_t num_bins) const { size_t num_bins) const {
PADDLE_ENFORCE_GT(num_bins, 0, PADDLE_ENFORCE_GT(num_bins, 0,
"MkldnnQuantizer: To calculate Histogram, num_bins (" + platform::errors::InvalidArgument(
std::to_string(num_bins) + ") must be positive."); "MkldnnQuantizer: To calculate Histogram, num_bins (" +
PADDLE_ENFORCE_GT( std::to_string(num_bins) + ") must be positive."));
var_tensor.numel(), 0, PADDLE_ENFORCE_GT(var_tensor.numel(), 0,
"MkldnnQuantizer: To calculate Histogram, the tensor must not be empty."); platform::errors::InvalidArgument(
PADDLE_ENFORCE(max_val >= min_val, "MkldnnQuantizer: To calculate Histogram, the tensor "
"MkldnnQuantizer: To calculate Histogram, max_val (" + "must not be empty."));
std::to_string(max_val) + PADDLE_ENFORCE_GE(max_val, min_val,
") must be greater or equal" platform::errors::InvalidArgument(
"to min_val (" + "MkldnnQuantizer: To calculate Histogram, max_val (" +
std::to_string(min_val) + ")."); std::to_string(max_val) + ") must be greater or equal"
"to min_val (" +
std::to_string(min_val) + ")."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(), ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1}; var_tensor.numel(), 1};
auto bin_width = std::abs(max_val - min_val) / num_bins; auto bin_width = std::abs(max_val - min_val) / num_bins;
...@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { ...@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program())); auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program()));
arg.SetMainGraph(graph.release()); arg.SetMainGraph(graph.release());
auto* scope_ptr = arg.scope_ptr(); auto* scope_ptr = arg.scope_ptr();
PADDLE_ENFORCE(scope_ptr); PADDLE_ENFORCE_NOT_NULL(scope_ptr, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
auto* builder = predictor_.config_.pass_builder(); auto* builder = predictor_.config_.pass_builder();
...@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const { ...@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const {
PrepareArgument(); PrepareArgument();
auto& arg = predictor_.argument_; auto& arg = predictor_.argument_;
Analyzer().Run(&arg); Analyzer().Run(&arg);
PADDLE_ENFORCE(arg.scope_valid()); PADDLE_ENFORCE_EQ(
arg.scope_valid(), true,
platform::errors::PreconditionNotMet("The scope should be valid."));
VLOG(5) << "to prepare executor"; VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program); ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program);
predictor_.inference_program_.reset( predictor_.inference_program_.reset(
...@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const { ...@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
VLOG(3) << "Predictor: run a quantization warmup iteration"; VLOG(3) << "Predictor: run a quantization warmup iteration";
auto warmup_data = qconfig_->warmup_data(); auto warmup_data = qconfig_->warmup_data();
PADDLE_ENFORCE_NOT_NULL(warmup_data, PADDLE_ENFORCE_NOT_NULL(warmup_data,
"Warmup data cannot be NULL in the config."); platform::errors::PreconditionNotMet(
"Warmup data cannot be NULL in the config."));
PrettyLogH1("--- Running warmup iteration for quantization"); PrettyLogH1("--- Running warmup iteration for quantization");
// Run the inference program // Run the inference program
...@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const { ...@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
float AnalysisPredictor::MkldnnQuantizer::SafeEntropy( float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
std::vector<int> reference_distr_P, int P_sum, std::vector<int> reference_distr_P, int P_sum,
std::vector<int> candidate_distr_Q, int Q_sum) const { std::vector<int> candidate_distr_Q, int Q_sum) const {
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size()); PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size(),
platform::errors::InvalidArgument(
"The P size %d should be equal to Q size %d",
reference_distr_P.size(), candidate_distr_Q.size()));
float tmp_sum1 = 0; float tmp_sum1 = 0;
float tmp_sum2 = 0; float tmp_sum2 = 0;
for (size_t idx = 0; idx < reference_distr_P.size(); idx++) { for (size_t idx = 0; idx < reference_distr_P.size(); idx++) {
...@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy( ...@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
tmp_sum1 += 0; tmp_sum1 += 0;
tmp_sum2 += 0; tmp_sum2 += 0;
} else { } else {
PADDLE_ENFORCE(q_idx != 0, "MkldnnQuantizer: Fatal error!, idx = " + PADDLE_ENFORCE_NE(
std::to_string(idx) + q_idx, 0,
" qindex = 0! p_idx = " + platform::errors::PreconditionNotMet(
std::to_string(p_idx)); "MkldnnQuantizer: Fatal error!, idx = " + std::to_string(idx) +
" qindex = 0! p_idx = " + std::to_string(p_idx)));
} }
tmp_sum1 += p_idx * (log(Q_sum * p_idx)); tmp_sum1 += p_idx * (log(Q_sum * p_idx));
tmp_sum2 += p_idx * (log(P_sum * q_idx)); tmp_sum2 += p_idx * (log(P_sum * q_idx));
......
...@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { ...@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
void CpuPassStrategy::EnableMkldnnBfloat16() { void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_bfloat16_) {
passes_.push_back("cpu_bfloat16_placement_pass");
passes_.push_back("cpu_bfloat16_pass");
}
use_mkldnn_bfloat16_ = true; use_mkldnn_bfloat16_ = true;
#else #else
use_mkldnn_bfloat16_ = false; use_mkldnn_bfloat16_ = false;
......
...@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter { ...@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter {
itensors.push_back(engine_->GetITensor(input_name)); itensors.push_back(engine_->GetITensor(input_name));
} }
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis > 0, PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument(
"The axis attr of Concat op should be large than 0 for trt"); "The axis attr of Concat"
" op should be larger than 0 for trt. "
"But received %d.",
axis));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(),
itensors.size()); itensors.size());
......
...@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op, ...@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input, auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias); nv_ksize, weight, bias);
PADDLE_ENFORCE(layer != nullptr); PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
layer->setStride(nv_strides); layer->setStride(nv_strides);
layer->setPadding(nv_paddings); layer->setPadding(nv_paddings);
layer->setNbGroups(groups); layer->setNbGroups(groups);
......
...@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer"; VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight op_desc.Input("X").size(), 1,
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(Y_v); PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>(); auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr; float* weight_data = nullptr;
weight_data = weight_data =
...@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter { ...@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight op_desc.Input("X").size(), 1,
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
......
...@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter { ...@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter {
// NOTE out is GPU memory. // NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out, virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(out != nullptr); PADDLE_ENFORCE_NOT_NULL(out,
PADDLE_ENFORCE(stream_ != nullptr); platform::errors::InvalidArgument(
"The input param 'out' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = in.place(); const auto& place = in.place();
size_t size = in.memory_size(); size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size); PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor in's memory_size shoule be less than or equal to "
"the input max_size. But in's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size, PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
cudaMemcpyHostToDevice, *stream_)); out, in.data<float>(), size, cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size, PADDLE_ENFORCE_EQ(
cudaMemcpyDeviceToDevice, *stream_)); 0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
} }
cudaStreamSynchronize(*stream_); cudaStreamSynchronize(*stream_);
} }
// NOTE in is GPU memory. // NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out, virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(in != nullptr); PADDLE_ENFORCE_NOT_NULL(in,
PADDLE_ENFORCE(stream_ != nullptr); platform::errors::InvalidArgument(
"The input param 'in' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = out->place(); const auto& place = out->place();
size_t size = out->memory_size(); size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size); PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor out's memory_size shoule be less than or equal "
"to the input max_size. "
"But out's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_)); cudaMemcpyDeviceToHost, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToHost) error."));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(
cudaMemcpyDeviceToDevice, *stream_)); 0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
} }
cudaStreamSynchronize(*stream_); cudaStreamSynchronize(*stream_);
} }
......
...@@ -44,10 +44,14 @@ class EngineIOConverter { ...@@ -44,10 +44,14 @@ class EngineIOConverter {
static void ConvertInput(const std::string& op_type, const LoDTensor& in, static void ConvertInput(const std::string& op_type, const LoDTensor& in,
void* out, size_t max_size, cudaStream_t* stream) { void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup( auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in is not supported yet.", op_type.c_str()));
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
...@@ -55,10 +59,14 @@ class EngineIOConverter { ...@@ -55,10 +59,14 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, const void* in, static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size, LoDTensor* out, size_t max_size,
cudaStream_t* stream) { cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup( auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in not supported yet.", op_type.c_str()));
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
......
...@@ -53,7 +53,12 @@ class OpConverter { ...@@ -53,7 +53,12 @@ class OpConverter {
OpConverter* it{nullptr}; OpConverter* it{nullptr};
if (op_desc.Type() == "mul") { if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
std::string Y = op_desc.Input("Y")[0]; std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) { if (parameters.count(Y)) {
it = Registry<OpConverter>::Global().Lookup("fc"); it = Registry<OpConverter>::Global().Lookup("fc");
...@@ -66,38 +71,51 @@ class OpConverter { ...@@ -66,38 +71,51 @@ class OpConverter {
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul", // static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"}; // "sub", "div"};
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"}; static std::unordered_set<std::string> add_weight_op_set{"add", "mul"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
int op_type_len = op_desc.Type().size(); int op_type_len = op_desc.Type().size();
std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len); std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len);
std::string Y = op_desc.Input("Y")[0]; std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) { if (parameters.count(Y)) {
PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, PADDLE_ENFORCE_GT(
"Unsupported elementwise type" + op_type); add_weight_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type + it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight"); "_weight");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented(
"no OpConverter for optype [%s]", op_desc.Type()));
} else { } else {
PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, PADDLE_ENFORCE_GT(
"Unsupported elementwise type" + op_type); add_tensor_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type + it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor"); "_tensor");
} }
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (op_desc.Type() == "depthwise_conv2d") { if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d"); it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (!it) { if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type()); it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
} }
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine); it->SetEngine(engine);
(*it)(op, scope, test_mode); (*it)(op, scope, test_mode);
...@@ -149,9 +167,13 @@ class OpConverter { ...@@ -149,9 +167,13 @@ class OpConverter {
for (auto& input : inputs) { for (auto& input : inputs) {
if (parameters.count(input)) continue; if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input); auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE(var, "no variable called %s", input); PADDLE_ENFORCE_NOT_NULL(
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, var, platform::errors::NotFound("no variable called %s in block.",
"TensorRT engine only takes LoDTensor as input"); input.c_str()));
PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input"));
auto var_shape = var->GetShape(); auto var_shape = var->GetShape();
if (engine->with_dynamic_shape()) { if (engine->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
......
...@@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter { ...@@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter {
nvinfer1::Dims input_shape = input->getDimensions(); nvinfer1::Dims input_shape = input->getDimensions();
int nbDims = input_shape.nbDims; int nbDims = input_shape.nbDims;
int pad_size = static_cast<int>(paddings.size()); int pad_size = static_cast<int>(paddings.size());
PADDLE_ENFORCE_GE(nbDims, 2); PADDLE_ENFORCE_GE(
PADDLE_ENFORCE_EQ((nbDims + 1) * 2, pad_size); nbDims, 2,
PADDLE_ENFORCE(pad_value == 0.0, "The pad layer of TRT only support zero."); platform::errors::InvalidArgument(
"Input X[0]'s dimension should greater than or equal to 2. "
"But received %d.",
nbDims));
PADDLE_ENFORCE_EQ(
(nbDims + 1) * 2, pad_size,
platform::errors::InvalidArgument("Input X[0]'s dimension(nbDims for "
"short) should meet the condition:"
"(nbDims + 1) * 2 == pad_size. But "
"received nbDims:%d, pad_size:%d.",
nbDims, pad_size));
PADDLE_ENFORCE_EQ(pad_value, 0.0,
platform::errors::InvalidArgument(
"The pad layer of TRT only support zero."));
nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]); nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]);
nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]); nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]);
...@@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter { ...@@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter {
*const_cast<nvinfer1::ITensor*>(input), *const_cast<nvinfer1::ITensor*>(input),
pre_pad, post_pad); pre_pad, post_pad);
PADDLE_ENFORCE(layer != nullptr); PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::External(
"add padding layer to tensorrt engine error"));
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode); RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode);
} }
......
...@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter { ...@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
// This OP is implemented by trt dynamic shpae plugin. // This OP is implemented by trt dynamic shpae plugin.
// Dynamic shape plugin requires TRT version greater than 6.0. // Dynamic shape plugin requires TRT version greater than 6.0.
#if IS_TRT_VERSION_GE(6000)
VLOG(4) << "convert slice op to tensorrt layer"; VLOG(4) << "convert slice op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
...@@ -38,27 +37,65 @@ class SliceOpConverter : public OpConverter { ...@@ -38,27 +37,65 @@ class SliceOpConverter : public OpConverter {
std::vector<int> ends = std::vector<int> ends =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("ends")); BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("ends"));
PADDLE_ENFORCE_EQ(
starts.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of starts must be equal to the size of axes."));
PADDLE_ENFORCE_EQ(
ends.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of ends must be equal to the size of axes."));
auto input_dims = input->getDimensions();
if (!engine_->with_dynamic_shape()) {
// notice that input shape is [CHW] without batch axis when input has
// static shape
for (size_t i = input_dims.nbDims; i > 0; i--) {
input_dims.d[i] = input_dims.d[i - 1];
}
input_dims.d[0] = 1; // fake batchsize, not useful here
for (size_t i = 0; i < axes.size(); i++) {
// split on batch is not supported in TensorRT
PADDLE_ENFORCE_NE(axes[i], 0, platform::errors::InvalidArgument(
"Invalid slice axis. Slice on batch "
"axis is not supported in TensorRT"));
if (starts[i] < 0) {
starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0);
}
if (ends[i] < 0) {
ends[i] = std::max(ends[i] + input_dims.d[axes[i]], 0);
}
ends[i] = std::min(ends[i], input_dims.d[axes[i]]);
PADDLE_ENFORCE_GT(
ends[i], starts[i],
platform::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received ends = %d, starts = %d.",
ends[i], starts[i]));
}
}
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
bool ban_fp16 = engine_->disable_trt_plugin_fp16(); bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePluginDynamic* plugin = plugin::SlicePluginDynamic* plugin =
new plugin::SlicePluginDynamic(starts, ends, ends, ban_fp16); new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16);
layer = engine_->AddPluginV2(&input, 1, plugin); layer = engine_->AddPluginV2(&input, 1, plugin);
} else { #else
PADDLE_THROW(platform::errors::Fatal( PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static" "You are running the TRT Dynamic Shape mode, need to confirm that "
"shape mode, which is not supported for the time being.\n" "your TRT version is no less than 6.0"));
"You can use the config.SetTRTDynamicShapeInfo(...) interface" #endif
" to set the shape information to run the dynamic shape mode.")); } else {
bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePlugin* plugin =
new plugin::SlicePlugin(starts, ends, axes, ban_fp16);
layer = engine_->AddPlugin(&input, 1, plugin);
} }
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "skip_layernorm", {output_name}, test_mode); RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} }
}; };
......
...@@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter { ...@@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
int input_num = op_desc.Input("X").size(); int input_num = op_desc.Input("X").size();
PADDLE_ENFORCE(input_num == 1); PADDLE_ENFORCE_EQ(input_num, 1,
platform::errors::InvalidArgument(
"The input X's size must equal to 1 in TRT swish op."
" But received X's size %d.",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output // Get output
size_t output_num = op_desc.Output("Out").size(); size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE(output_num == 1); PADDLE_ENFORCE_EQ(
output_num, 1UL,
platform::errors::InvalidArgument(
"The ouput Out's size must equal to 1 in TRT swish op. "
"But received Out's size %u.",
output_num));
// Get attrs // Get attrs
float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta")); float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta"));
......
...@@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, ...@@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
auto dims = tensor->dims(); auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size()); size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0); PADDLE_ENFORCE_GT(
num_elements, 0UL,
platform::errors::PermissionDenied("RandomizeTensor only can be used for "
"tensor which dims is not zero."));
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor; framework::LoDTensor temp_tensor;
...@@ -79,7 +82,8 @@ class TRTConvertValidation { ...@@ -79,7 +82,8 @@ class TRTConvertValidation {
scope_(scope), scope_(scope),
if_add_batch_(if_add_batch), if_add_batch_(if_add_batch),
max_batch_size_(max_batch_size) { max_batch_size_(max_batch_size) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0,
platform::errors::External("cudaStreamCreate error."));
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size)); engine_.reset(new TensorRTEngine(max_batch_size, workspace_size));
engine_->InitNetwork(); engine_->InitNetwork();
} }
...@@ -154,7 +158,12 @@ class TRTConvertValidation { ...@@ -154,7 +158,12 @@ class TRTConvertValidation {
void Execute(int batch_size, void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) { std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op // Execute Fluid Op
PADDLE_ENFORCE_LE(batch_size, max_batch_size_); PADDLE_ENFORCE_LE(batch_size, max_batch_size_,
platform::errors::InvalidArgument(
"Runtime batch_size should be less than or equal to "
"max_batch_size_. "
"But received batch_size:%d, max_batch_size_:%d",
batch_size, max_batch_size_));
platform::CUDADeviceContext ctx(place_); platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_); op_->Run(scope_, place_);
cudaStreamSynchronize(stream_); cudaStreamSynchronize(stream_);
......
...@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("fused_embedding_eltwise_layernorm"); teller_set.insert("fused_embedding_eltwise_layernorm");
teller_set.insert("multihead_matmul"); teller_set.insert("multihead_matmul");
teller_set.insert("skip_layernorm"); teller_set.insert("skip_layernorm");
teller_set.insert("slice");
#endif #endif
} }
......
...@@ -26,8 +26,10 @@ namespace inference { ...@@ -26,8 +26,10 @@ namespace inference {
namespace tensorrt { namespace tensorrt {
namespace plugin { namespace plugin {
// Dynamic Plugin below. SlicePlugin *CreateSlicePluginDeserialize(const void *buffer, size_t length) {
#if IS_TRT_VERSION_GE(6000) return new SlicePlugin(buffer, length);
}
REGISTER_TRT_PLUGIN("slice_plugin", CreateSlicePluginDeserialize);
template <typename T> template <typename T>
__global__ void SliceKernel(int num, int dims, const T *input, __global__ void SliceKernel(int num, int dims, const T *input,
...@@ -56,11 +58,196 @@ __global__ void SliceKernel(int num, int dims, const T *input, ...@@ -56,11 +58,196 @@ __global__ void SliceKernel(int num, int dims, const T *input,
} }
} }
SlicePlugin::SlicePlugin(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePlugin::SlicePlugin(void const *serial_data, size_t serial_length) {
deserializeBase(serial_data, serial_length);
DeserializeValue(&serial_data, &serial_length, &starts_);
DeserializeValue(&serial_data, &serial_length, &ends_);
DeserializeValue(&serial_data, &serial_length, &axes_);
DeserializeValue(&serial_data, &serial_length, &ban_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePlugin::~SlicePlugin() {
cudaStreamDestroy(copy_stream_);
cudaEventDestroy(copy_event_);
cudaFree(offset_temp_data_);
}
SlicePlugin *SlicePlugin::clone() const {
return new SlicePlugin(starts_, ends_, axes_, ban_fp16_);
}
bool SlicePlugin::supportsFormat(nvinfer1::DataType type,
nvinfer1::PluginFormat format) const {
#ifdef SUPPORTS_CUDA_FP16
return ((type == nvinfer1::DataType::kFLOAT ||
type == nvinfer1::DataType::kHALF) &&
(format == nvinfer1::PluginFormat::kNCHW));
#else
return ((type == nvinfer1::DataType::kFLOAT) &&
(format == nvinfer1::PluginFormat::kNCHW));
#endif
}
nvinfer1::Dims SlicePlugin::getOutputDimensions(int index,
const nvinfer1::Dims *inputs,
int nb_input_dims) {
auto in_dims = inputs[0];
nvinfer1::Dims out_dims = in_dims;
for (size_t i = 0; i < axes_.size(); i++) {
int start = starts_[i];
int end = ends_[i];
out_dims.d[axes_[i] - 1] = end - start;
}
return out_dims;
}
int SlicePlugin::enqueue(int batch_size, const void *const *inputs,
void **outputs, void *workspace, cudaStream_t stream) {
auto input_dims = getInputDims(0);
// notice input dims is [C, H, W], add input batch dim here
auto out_dims = getOutputDimensions(0, &input_dims, 1);
input_dims.nbDims += 1;
out_dims.nbDims += 1;
for (auto i = input_dims.nbDims; i > 0; --i) {
input_dims.d[i] = input_dims.d[i - 1];
out_dims.d[i] = out_dims.d[i - 1];
}
input_dims.d[0] = batch_size;
out_dims.d[0] = batch_size;
auto num_dims = input_dims.nbDims;
size_t out_num = ProductDim(out_dims);
std::vector<int> seg_offsets;
std::vector<int> offsets;
std::vector<int> extends;
offsets.resize(num_dims);
extends.resize(num_dims);
seg_offsets.resize(num_dims);
seg_offsets[num_dims - 1] = 1;
for (int i = num_dims - 2; i >= 0; i--) {
seg_offsets[i] = input_dims.d[i + 1] * seg_offsets[i + 1];
}
for (size_t i = 0; i < num_dims; ++i) {
offsets[i] = 0;
extends[i] = out_dims.d[i];
}
for (size_t i = 0; i < axes_.size(); ++i) {
offsets[axes_[i]] = starts_[i];
}
std::vector<int> offset_info;
for (size_t i = 0; i < num_dims; ++i) {
offset_info.push_back(offsets[i]);
offset_info.push_back(extends[i]);
offset_info.push_back(seg_offsets[i]);
}
if (offset_temp_data_ == nullptr) {
cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int));
}
cudaMemcpyAsync(offset_temp_data_, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice,
copy_stream_);
cudaEventRecord(copy_event_, copy_stream_);
cudaStreamWaitEvent(stream, copy_event_, 0);
int threads = 256;
int blocks = (out_num + threads - 1) / threads;
auto input_type = getDataType();
if (input_type == nvinfer1::DataType::kFLOAT) {
const float *input1 = static_cast<const float *>(inputs[0]);
float *output = static_cast<float *>(outputs[0]);
SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data_, output);
} else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16
const half *input1 = static_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]);
SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data_, output);
#else
PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600."));
#endif
} else {
PADDLE_THROW(platform::errors::Fatal(
"The Slice TRT Plugin's input type should be float or half."));
}
return cudaGetLastError() != cudaSuccess;
}
size_t SlicePlugin::getSerializationSize() {
return getBaseSerializationSize() + SerializedSize(getPluginType()) +
SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_) + SerializedSize(ban_fp16_);
}
void SlicePlugin::serialize(void *buffer) {
SerializeValue(&buffer, getPluginType());
serializeBase(buffer);
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, ban_fp16_);
}
// Dynamic Plugin below.
#if IS_TRT_VERSION_GE(6000)
SlicePluginDynamic::SlicePluginDynamic(std::vector<int> starts,
std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePluginDynamic::SlicePluginDynamic(void const *serialData,
size_t serialLength) {
DeserializeValue(&serialData, &serialLength, &starts_);
DeserializeValue(&serialData, &serialLength, &ends_);
DeserializeValue(&serialData, &serialLength, &axes_);
DeserializeValue(&serialData, &serialLength, &ban_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
void SlicePluginDynamic::destroy() {
cudaStreamDestroy(copy_stream_);
cudaEventDestroy(copy_event_);
cudaFree(offset_temp_data_);
delete this;
}
int SlicePluginDynamic::initialize() { return 0; } int SlicePluginDynamic::initialize() { return 0; }
size_t SlicePluginDynamic::getSerializationSize() const { return 0; } size_t SlicePluginDynamic::getSerializationSize() const {
size_t size = SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_) + SerializedSize(ban_fp16_);
void SlicePluginDynamic::serialize(void *buffer) const {} return size;
}
void SlicePluginDynamic::serialize(void *buffer) const {
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, ban_fp16_);
}
nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions( nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
...@@ -136,9 +323,9 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, ...@@ -136,9 +323,9 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
std::vector<int> offsets; std::vector<int> offsets;
std::vector<int> extends; std::vector<int> extends;
offsets.reserve(num_dims); offsets.resize(num_dims);
extends.reserve(num_dims); extends.resize(num_dims);
seg_offsets.reserve(num_dims); seg_offsets.resize(num_dims);
seg_offsets[num_dims - 1] = 1; seg_offsets[num_dims - 1] = 1;
for (int i = num_dims - 2; i >= 0; i--) { for (int i = num_dims - 2; i >= 0; i--) {
...@@ -160,16 +347,16 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, ...@@ -160,16 +347,16 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
offset_info.push_back(seg_offsets[i]); offset_info.push_back(seg_offsets[i]);
} }
framework::Tensor offset_temp_tensor; if (offset_temp_data_ == nullptr) {
cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int));
}
int device_id; cudaMemcpyAsync(offset_temp_data_, offset_info.data(),
cudaGetDevice(&device_id); sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice,
offset_temp_tensor.Resize({3 * num_dims}); copy_stream_);
auto *offset_temp_data =
offset_temp_tensor.mutable_data<int>(platform::CUDAPlace(device_id));
cudaMemcpyAsync(offset_temp_data, offset_info.data(), cudaEventRecord(copy_event_, copy_stream_);
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice, stream); cudaStreamWaitEvent(stream, copy_event_, 0);
int threads = 256; int threads = 256;
int blocks = (out_num + threads - 1) / threads; int blocks = (out_num + threads - 1) / threads;
...@@ -178,13 +365,13 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, ...@@ -178,13 +365,13 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
const float *input1 = static_cast<const float *>(inputs[0]); const float *input1 = static_cast<const float *>(inputs[0]);
float *output = static_cast<float *>(outputs[0]); float *output = static_cast<float *>(outputs[0]);
SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>( SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data, output); out_num, num_dims, input1, offset_temp_data_, output);
} else if (input_type == nvinfer1::DataType::kHALF) { } else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16 #ifdef SUPPORTS_CUDA_FP16
const half *input1 = static_cast<const half *>(inputs[0]); const half *input1 = static_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]); half *output = static_cast<half *>(outputs[0]);
SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>( SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data, output); out_num, num_dims, input1, offset_temp_data_, output);
#else #else
PADDLE_THROW(platform::errors::Fatal( PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600.")); "The cuda archs you specific should greater than 600."));
......
...@@ -26,17 +26,56 @@ namespace inference { ...@@ -26,17 +26,56 @@ namespace inference {
namespace tensorrt { namespace tensorrt {
namespace plugin { namespace plugin {
class SlicePlugin : public PluginTensorRT {
public:
explicit SlicePlugin(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16);
// It was used for tensorrt deserialization.
// It should not be called by users.
SlicePlugin(void const* serial_data, size_t serial_length);
~SlicePlugin();
SlicePlugin* clone() const override;
const char* getPluginType() const override { return "slice_plugin"; }
int getNbOutputs() const override { return 1; }
int initialize() override { return 0; }
bool supportsFormat(nvinfer1::DataType type,
nvinfer1::PluginFormat format) const override;
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nb_input_dims) override;
int enqueue(int batch_size, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream) override;
protected:
size_t getSerializationSize() override;
// TRT will call this func to serialize the configuration of TRT
// It should not be called by users.
void serialize(void* buffer) override;
private:
std::vector<int> starts_;
std::vector<int> ends_;
std::vector<int> axes_;
bool ban_fp16_{false};
int* offset_temp_data_{nullptr};
cudaEvent_t copy_event_;
cudaStream_t copy_stream_;
};
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
class SlicePluginDynamic : public DynamicPluginTensorRT { class SlicePluginDynamic : public DynamicPluginTensorRT {
public: public:
explicit SlicePluginDynamic(std::vector<int> starts, std::vector<int> ends, explicit SlicePluginDynamic(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16) std::vector<int> axes, bool ban_fp16);
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {}
SlicePluginDynamic(void const* serialData, size_t serialLength) {}
nvinfer1::IPluginV2DynamicExt* clone() const override { nvinfer1::IPluginV2DynamicExt* clone() const override {
return new SlicePluginDynamic(starts_, ends_, axes_, ban_fp16_); return new SlicePluginDynamic(starts_, ends_, axes_, ban_fp16_);
} }
SlicePluginDynamic(void const* serialData, size_t serialLength);
const char* getPluginType() const override { return "slice_plugin"; } const char* getPluginType() const override { return "slice_plugin"; }
int getNbOutputs() const override { return 1; } int getNbOutputs() const override { return 1; }
int initialize() override; int initialize() override;
...@@ -72,15 +111,54 @@ class SlicePluginDynamic : public DynamicPluginTensorRT { ...@@ -72,15 +111,54 @@ class SlicePluginDynamic : public DynamicPluginTensorRT {
const nvinfer1::DataType* inputTypes, const nvinfer1::DataType* inputTypes,
int nbInputs) const override; int nbInputs) const override;
void destroy() override { delete this; } void destroy() override;
private: private:
std::vector<int> starts_; std::vector<int> starts_;
std::vector<int> ends_; std::vector<int> ends_;
std::vector<int> axes_; std::vector<int> axes_;
bool ban_fp16_{false}; bool ban_fp16_{false};
int* offset_temp_data_{nullptr};
cudaEvent_t copy_event_;
cudaStream_t copy_stream_;
}; };
class SlicePluginV2Creator : public nvinfer1::IPluginCreator {
public:
SlicePluginV2Creator() {}
const char* getPluginName() const override { return "slice_plugin"; }
const char* getPluginVersion() const override { return "1"; }
const nvinfer1::PluginFieldCollection* getFieldNames() override {
return &field_collection_;
}
nvinfer1::IPluginV2* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override {
return nullptr;
}
nvinfer1::IPluginV2* deserializePlugin(const char* name,
const void* serialData,
size_t serialLength) override {
auto plugin = new SlicePluginDynamic(serialData, serialLength);
return plugin;
}
void setPluginNamespace(const char* libNamespace) override {
namespace_ = libNamespace;
}
const char* getPluginNamespace() const override { return namespace_.c_str(); }
private:
std::string namespace_;
nvinfer1::PluginFieldCollection field_collection_;
};
REGISTER_TRT_PLUGIN_V2(SlicePluginV2Creator);
#endif #endif
} // namespace plugin } // namespace plugin
......
...@@ -480,10 +480,9 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -480,10 +480,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz") inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz")
endif() endif()
# disable test_trt_dynamic_shape_ernie_ser_deser temporary inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc
#inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
# EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized)
# ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized)
endif() endif()
......
...@@ -245,8 +245,14 @@ TEST(Analyzer_bert, transfer_scope_cache) { ...@@ -245,8 +245,14 @@ TEST(Analyzer_bert, transfer_scope_cache) {
// Since paddle::framework::global_transfer_scope_cache() and // Since paddle::framework::global_transfer_scope_cache() and
// paddle::framework::global_transfer_data_cache() are thread_local, // paddle::framework::global_transfer_data_cache() are thread_local,
// their pointer should be different among different thread id. // their pointer should be different among different thread id.
PADDLE_ENFORCE(global_transfer_scope_cache.size(), threads_num); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(global_transfer_data_cache.size(), threads_num); global_transfer_scope_cache.size(), threads_num,
paddle::platform::errors::Fatal(
"The size of scope cache is not equal to thread number."));
PADDLE_ENFORCE_EQ(
global_transfer_data_cache.size(), threads_num,
paddle::platform::errors::Fatal(
"The size of data cache is not equal to thread number."));
} }
} // namespace inference } // namespace inference
......
...@@ -69,11 +69,13 @@ void PD_run() { ...@@ -69,11 +69,13 @@ void PD_run() {
PD_DeletePaddleTensor(input); PD_DeletePaddleTensor(input);
int size; int size;
const int* out_shape = PD_GetPaddleTensorShape(out_data, &size); const int* out_shape = PD_GetPaddleTensorShape(out_data, &size);
CHECK(size == 2) << "The Output shape's size is NOT match."; PADDLE_ENFORCE_EQ(size, 2, paddle::platform::errors::InvalidArgument(
"The Output shape's size is NOT match."));
std::vector<int> ref_outshape_size({9, 6}); std::vector<int> ref_outshape_size({9, 6});
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
CHECK(out_shape[i] == ref_outshape_size[i]) PADDLE_ENFORCE_EQ(out_shape[i], ref_outshape_size[i],
<< "The Output's shape is NOT match."; paddle::platform::errors::InvalidArgument(
"The Output shape's size is NOT match."));
} }
PD_DeletePaddleBuf(buf); PD_DeletePaddleBuf(buf);
} }
......
...@@ -36,9 +36,9 @@ void zero_copy_run() { ...@@ -36,9 +36,9 @@ void zero_copy_run() {
PD_SwitchIrDebug(config, true); PD_SwitchIrDebug(config, true);
PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_SetModel(config, prog_file.c_str(), params_file.c_str());
bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config); bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
CHECK(!use_feed_fetch) << "NO"; EXPECT_FALSE(use_feed_fetch);
bool specify_input_names = PD_SpecifyInputName(config); bool specify_input_names = PD_SpecifyInputName(config);
CHECK(specify_input_names) << "NO"; EXPECT_TRUE(specify_input_names);
const int batch_size = 1; const int batch_size = 1;
const int channels = 3; const int channels = 3;
...@@ -85,13 +85,13 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { ...@@ -85,13 +85,13 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
PD_SwitchIrDebug(config, true); PD_SwitchIrDebug(config, true);
PD_EnableMKLDNN(config); PD_EnableMKLDNN(config);
bool mkldnn_enable = PD_MkldnnEnabled(config); bool mkldnn_enable = PD_MkldnnEnabled(config);
CHECK(mkldnn_enable) << "NO"; EXPECT_TRUE(mkldnn_enable);
PD_EnableMkldnnQuantizer(config); PD_EnableMkldnnQuantizer(config);
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO"; EXPECT_TRUE(quantizer_enable);
PD_EnableMkldnnBfloat16(config); PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(bfloat16_enable) << "NO"; EXPECT_TRUE(bfloat16_enable);
PD_SetMkldnnCacheCapacity(config, 0); PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_DeleteAnalysisConfig(config); PD_DeleteAnalysisConfig(config);
......
...@@ -126,7 +126,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -126,7 +126,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
std::string turn_mask_pre = "turn_mask_"; std::string turn_mask_pre = "turn_mask_";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
PADDLE_ENFORCE(!one_batch.response.empty()); PADDLE_ENFORCE(
!one_batch.response.empty(),
paddle::platform::errors::Fatal("The response of one batch is empty."));
int size = one_batch.response[0].size(); int size = one_batch.response[0].size();
CHECK_EQ(size, kMaxTurnLen); CHECK_EQ(size, kMaxTurnLen);
// turn tensor assignment // turn tensor assignment
...@@ -214,11 +216,17 @@ void profile(bool use_mkldnn = false) { ...@@ -214,11 +216,17 @@ void profile(bool use_mkldnn = false) {
input_slots_all, &outputs, FLAGS_num_threads); input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of outputs should be greater than 0."));
auto output = outputs.back(); auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0); PADDLE_ENFORCE_GT(output.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
size_t size = GetSize(output[0]); size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3); EXPECT_NEAR(result[i], result_data[i], 1e-3);
......
...@@ -146,8 +146,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData( ...@@ -146,8 +146,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
auto iterations = test_data.size(); auto iterations = test_data.size();
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
static_cast<size_t>(num_images), iterations * test_data_batch_size, static_cast<size_t>(num_images), iterations * test_data_batch_size,
"The requested quantization warmup data size " + paddle::platform::errors::Fatal(
std::to_string(num_images) + " is bigger than all test data size."); "The requested quantization warmup data size " +
std::to_string(num_images) + " is bigger than all test data size."));
PaddleTensor images; PaddleTensor images;
images.name = "image"; images.name = "image";
...@@ -237,8 +238,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData( ...@@ -237,8 +238,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
static_cast<size_t>(num_objects), static_cast<size_t>(objects_accum), static_cast<size_t>(num_objects), static_cast<size_t>(objects_accum),
"The requested num of objects " + std::to_string(num_objects) + paddle::platform::errors::Fatal("The requested num of objects " +
" is the same as objects_accum."); std::to_string(num_objects) +
" is the same as objects_accum."));
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(4); auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(4);
(*warmup_data)[0] = std::move(images); (*warmup_data)[0] = std::move(images);
......
...@@ -98,7 +98,9 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -98,7 +98,9 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_tensor.name = "word"; input_tensor.name = "word";
input_tensor.dtype = PaddleDType::INT64; input_tensor.dtype = PaddleDType::INT64;
TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod); TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod);
PADDLE_ENFORCE_EQ(batch_size, static_cast<int>(one_batch.lod.size() - 1)); PADDLE_ENFORCE_EQ(
batch_size, static_cast<int>(one_batch.lod.size() - 1),
paddle::platform::errors::Fatal("The lod size of one batch is invaild."));
input_slots->assign({input_tensor}); input_slots->assign({input_tensor});
} }
...@@ -137,12 +139,17 @@ TEST(Analyzer_LAC, profile) { ...@@ -137,12 +139,17 @@ TEST(Analyzer_LAC, profile) {
24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25, 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25,
44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14, 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14,
15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back(); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL); PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]); size_t size = GetSize(output[0]);
size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
PADDLE_ENFORCE_GE(size, batch1_size); PADDLE_ENFORCE_GE(size, batch1_size, paddle::platform::errors::Fatal(
"The size of batch is invaild."));
int64_t *pdata = static_cast<int64_t *>(output[0].data.data()); int64_t *pdata = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < batch1_size; ++i) { for (size_t i = 0; i < batch1_size; ++i) {
EXPECT_EQ(pdata[i], lac_ref_data[i]); EXPECT_EQ(pdata[i], lac_ref_data[i]);
......
...@@ -117,11 +117,17 @@ void profile(bool memory_load = false) { ...@@ -117,11 +117,17 @@ void profile(bool memory_load = false) {
// the first inference result // the first inference result
const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48, 39, 38, 16, 25}; 48, 39, 38, 16, 25};
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back(); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL); PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]); size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
int64_t *result = static_cast<int64_t *>(output[0].data.data()); int64_t *result = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < std::min<size_t>(11, size); i++) { for (size_t i = 0; i < std::min<size_t>(11, size); i++) {
EXPECT_EQ(result[i], chinese_ner_result_data[i]); EXPECT_EQ(result[i], chinese_ner_result_data[i]);
......
...@@ -136,11 +136,17 @@ TEST(Analyzer_Pyramid_DNN, profile) { ...@@ -136,11 +136,17 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all, &outputs, FLAGS_num_threads); input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) {
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back(); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL); PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]); size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1). // output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
......
...@@ -135,11 +135,17 @@ TEST(Analyzer_rnn2, profile) { ...@@ -135,11 +135,17 @@ TEST(Analyzer_rnn2, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back(); auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0); PADDLE_ENFORCE_GT(output.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
size_t size = GetSize(output[0]); size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3); EXPECT_NEAR(result[i], result_data[i], 1e-3);
......
...@@ -47,7 +47,8 @@ struct DataRecord { ...@@ -47,7 +47,8 @@ struct DataRecord {
num_lines++; num_lines++;
std::vector<std::string> data; std::vector<std::string> data;
split(line, '\t', &data); split(line, '\t', &data);
PADDLE_ENFORCE(data.size() >= 4); PADDLE_ENFORCE_GT(data.size(), 4, paddle::platform::errors::Fatal(
"The size of data is invaild."));
// load title1 data // load title1 data
std::vector<int64_t> title1_data; std::vector<int64_t> title1_data;
split_to_int64(data[0], ' ', &title1_data); split_to_int64(data[0], ' ', &title1_data);
...@@ -120,11 +121,17 @@ TEST(Analyzer_seq_conv1, profile) { ...@@ -120,11 +121,17 @@ TEST(Analyzer_seq_conv1, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back(); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL); PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 0."));
size_t size = GetSize(output[0]); size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1). // output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
......
...@@ -56,20 +56,26 @@ struct DataRecord { ...@@ -56,20 +56,26 @@ struct DataRecord {
std::vector<float> slot_data; std::vector<float> slot_data;
split_to_float(data[1], ' ', &slot_data); split_to_float(data[1], ' ', &slot_data);
std::string name = data[0]; std::string name = data[0];
PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL, PADDLE_ENFORCE_EQ(
"line %d, %s should be divisible", num_lines, name); slot_data.size() % 11, 0UL,
paddle::platform::errors::Fatal("line %d, %s should be divisible",
num_lines, name));
datasets[name].emplace_back(std::move(slot_data)); datasets[name].emplace_back(std::move(slot_data));
} }
num_samples = num_lines / num_slots; num_samples = num_lines / num_slots;
PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines), PADDLE_ENFORCE_EQ(
"num samples should be divisible"); num_samples * num_slots, static_cast<size_t>(num_lines),
PADDLE_ENFORCE_GT(num_samples, 0UL); paddle::platform::errors::Fatal("num samples should be divisible"));
PADDLE_ENFORCE_GT(num_samples, 0UL,
paddle::platform::errors::Fatal(
"The num of samples should be greater than 0."));
} }
void Prepare(int bs) { void Prepare(int bs) {
for (auto it = datasets.begin(); it != datasets.end(); ++it) { for (auto it = datasets.begin(); it != datasets.end(); ++it) {
PADDLE_ENFORCE_EQ(it->second.size(), num_samples, PADDLE_ENFORCE_EQ(
"size of each slot should be equal"); it->second.size(), num_samples,
paddle::platform::errors::Fatal("size of each slot should be equal"));
} }
size_t num_batches = num_samples / bs; size_t num_batches = num_samples / bs;
EXPECT_GT(num_batches, 0UL); EXPECT_GT(num_batches, 0UL);
...@@ -90,8 +96,10 @@ struct DataRecord { ...@@ -90,8 +96,10 @@ struct DataRecord {
std::copy(datas[id].begin(), datas[id].end(), std::copy(datas[id].begin(), datas[id].end(),
std::back_inserter(slot.data[k])); std::back_inserter(slot.data[k]));
size_t len = datas[id].size() / 11; size_t len = datas[id].size() / 11;
PADDLE_ENFORCE_EQ(len * 11, datas[id].size(), PADDLE_ENFORCE_EQ(
"%s %d size should be divisible", slot.name, id); len * 11, datas[id].size(),
paddle::platform::errors::Fatal("%s %d size should be divisible",
slot.name, id));
lod[k + 1] = lod[k] + len; lod[k + 1] = lod[k] + len;
} }
slot.shape.assign({static_cast<int>(lod[bs]), 11}); slot.shape.assign({static_cast<int>(lod[bs]), 11});
......
...@@ -22,7 +22,9 @@ struct DataReader { ...@@ -22,7 +22,9 @@ struct DataReader {
: file(new std::ifstream(path)) {} : file(new std::ifstream(path)) {}
bool NextBatch(std::vector<PaddleTensor> *input, int batch_size) { bool NextBatch(std::vector<PaddleTensor> *input, int batch_size) {
PADDLE_ENFORCE_EQ(batch_size, 1); PADDLE_ENFORCE_EQ(batch_size, 1,
paddle::platform::errors::Fatal(
"The size of batch should be equal to 1."));
std::string line; std::string line;
PaddleTensor tensor; PaddleTensor tensor;
tensor.dtype = PaddleDType::INT64; tensor.dtype = PaddleDType::INT64;
...@@ -81,7 +83,9 @@ TEST(Analyzer_Text_Classification, profile) { ...@@ -81,7 +83,9 @@ TEST(Analyzer_Text_Classification, profile) {
if (FLAGS_num_threads == 1) { if (FLAGS_num_threads == 1) {
// Get output // Get output
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
LOG(INFO) << "get outputs " << outputs.back().size(); LOG(INFO) << "get outputs " << outputs.back().size();
for (auto &output : outputs.back()) { for (auto &output : outputs.back()) {
LOG(INFO) << "output.shape: " << to_string(output.shape); LOG(INFO) << "output.shape: " << to_string(output.shape);
......
...@@ -59,7 +59,9 @@ void SetConfig(AnalysisConfig *cfg) { ...@@ -59,7 +59,9 @@ void SetConfig(AnalysisConfig *cfg) {
} }
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); PADDLE_ENFORCE_EQ(
FLAGS_test_all_data, 0,
paddle::platform::errors::Fatal("Only have single batch of data."));
std::string line; std::string line;
std::ifstream file(FLAGS_infer_data); std::ifstream file(FLAGS_infer_data);
std::getline(file, line); std::getline(file, line);
...@@ -99,7 +101,9 @@ void profile(bool use_mkldnn = false) { ...@@ -99,7 +101,9 @@ void profile(bool use_mkldnn = false) {
auto refer = ProcessALine(line); auto refer = ProcessALine(line);
file.close(); file.close();
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto &output = outputs.back().front(); auto &output = outputs.back().front();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
CHECK_EQ(numel, refer.data.size()); CHECK_EQ(numel, refer.data.size());
......
...@@ -12,15 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,15 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <dirent.h>
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <unistd.h>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
int DeleteCache(std::string path) {
DIR* dir = opendir(path.c_str());
if (dir == NULL) return 0;
struct dirent* ptr;
while ((ptr = readdir(dir)) != NULL) {
if (std::strcmp(ptr->d_name, ".") == 0 ||
std::strcmp(ptr->d_name, "..") == 0) {
continue;
} else if (ptr->d_type == 8) {
std::string file_rm = path + "/" + ptr->d_name;
return remove(file_rm.c_str());
}
}
return 0;
}
void run(const AnalysisConfig& config, std::vector<float>* out_data) { void run(const AnalysisConfig& config, std::vector<float>* out_data) {
auto predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor(config);
auto input_names = predictor->GetInputNames(); auto input_names = predictor->GetInputNames();
...@@ -86,6 +104,11 @@ void run(const AnalysisConfig& config, std::vector<float>* out_data) { ...@@ -86,6 +104,11 @@ void run(const AnalysisConfig& config, std::vector<float>* out_data) {
void trt_ernie(bool with_fp16, std::vector<float> result) { void trt_ernie(bool with_fp16, std::vector<float> result) {
AnalysisConfig config; AnalysisConfig config;
std::string model_dir = FLAGS_infer_model; std::string model_dir = FLAGS_infer_model;
// Delete serialization cache to perform serialization first rather than
// deserialization.
std::string opt_cache_dir = FLAGS_infer_model + "/_opt_cache";
DeleteCache(opt_cache_dir);
SetConfig(&config, model_dir, true /* use_gpu */); SetConfig(&config, model_dir, true /* use_gpu */);
config.SwitchUseFeedFetchOps(false); config.SwitchUseFeedFetchOps(false);
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -162,7 +163,8 @@ void TestInference(const std::string& dirname, ...@@ -162,7 +163,8 @@ void TestInference(const std::string& dirname,
// int device_id = place.GetDeviceId(); // int device_id = place.GetDeviceId();
paddle::platform::SetDeviceId(0); paddle::platform::SetDeviceId(0);
#else #else
PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); PADDLE_THROW(paddle::platform::errors::Unavailable(
"'CUDAPlace' is not supported in CPU only device."));
#endif #endif
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <random> #include <random>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cuda_allocator.h" #include "paddle/fluid/memory/allocation/cuda_allocator.h"
...@@ -41,12 +42,14 @@ TEST(BestFitAllocator, concurrent_cuda) { ...@@ -41,12 +42,14 @@ TEST(BestFitAllocator, concurrent_cuda) {
LockedAllocator concurrent_allocator( LockedAllocator concurrent_allocator(
std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get()))); std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
auto th_main = [&](std::random_device::result_type seed) { auto th_main = [&](std::random_device::result_type seed) {
std::default_random_engine engine(seed); std::default_random_engine engine(seed);
std::uniform_int_distribution<size_t> dist(1U, 1024U); std::uniform_int_distribution<size_t> dist(1U, 1024U);
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
std::array<size_t, 1024> buf; std::array<size_t, 1024> buf;
for (size_t i = 0; i < 128; ++i) { for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine); size_t allocate_size = dist(engine);
......
...@@ -110,10 +110,12 @@ struct VisitDataArgMinMaxFunctor { ...@@ -110,10 +110,12 @@ struct VisitDataArgMinMaxFunctor {
CALL_ARG_MINMAX_FUNCTOR(6); CALL_ARG_MINMAX_FUNCTOR(6);
break; break;
default: default:
PADDLE_THROW( PADDLE_ENFORCE_LE(
"%s operator doesn't supports tensors whose ranks are greater " x_dims.size(), 6,
"than 6.", platform::errors::InvalidArgument(
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")); "%s operator doesn't supports tensors whose ranks are greater "
"than 6.",
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")));
break; break;
#undef CALL_ARG_MINMAX_FUNCTOR #undef CALL_ARG_MINMAX_FUNCTOR
} }
...@@ -164,7 +166,8 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { ...@@ -164,7 +166,8 @@ class ArgMinMaxOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
axis, x_dims.size(), axis, x_dims.size(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"'axis'(%d) must be less than Rank(X)(%d).", axis, x_dims.size())); "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", axis,
x_dims.size()));
const int& dtype = ctx->Attrs().Get<int>("dtype"); const int& dtype = ctx->Attrs().Get<int>("dtype");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -192,10 +195,11 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { ...@@ -192,10 +195,11 @@ class ArgMinMaxOp : public framework::OperatorWithKernel {
} }
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
all_element_num, INT_MAX, all_element_num, INT_MAX,
"The element num of the argmin/argmax input at axis is " platform::errors::InvalidArgument(
"%d, is larger than int32 maximum value:%d, you must " "The element num of the argmin/argmax input at axis is "
"set the dtype of argmin/argmax to 'int64'.", "%d, is larger than int32 maximum value:%d, you must "
all_element_num, INT_MAX); "set the dtype of argmin/argmax to 'int64'.",
all_element_num, INT_MAX));
} }
} }
std::vector<int64_t> vec; std::vector<int64_t> vec;
......
...@@ -52,7 +52,10 @@ class AssignFunctor { ...@@ -52,7 +52,10 @@ class AssignFunctor {
template <typename T> template <typename T>
void operator()(const T &v) const { void operator()(const T &v) const {
PADDLE_THROW("Not support type for assign op %s", typeid(T).name()); PADDLE_ENFORCE_EQ(
true, false,
platform::errors::PermissionDenied(
"Not support type for assign op with type %s", typeid(T).name()));
} }
private: private:
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
namespace paddle {
namespace operators {
class ScopedRNNBase {
public:
ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size,
int num_layers, float dropout_prob, int seed, int weight_numel,
bool initialized, bool is_bidirec)
: seq_length_(seq_length),
batch_size_(batch_size),
input_size_(input_size),
hidden_size_(hidden_size),
num_layers_(num_layers),
dropout_prob_(dropout_prob),
seed_(seed),
weight_numel_(weight_numel),
initialized_(initialized),
is_bidirec_(is_bidirec) {}
template <typename T>
void Create(const cudnnHandle_t& handle, const platform::Place& place,
const std::vector<int>& sequence_length, size_t* workspace_size,
size_t* reserve_size, framework::Tensor* dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1;
cudnnDataType_t cudnn_type = platform::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1};
std::vector<int> dims_y = {batch_size_, hidden_size_ * numDirections, 1};
std::vector<int> strides_y = {hidden_size_ * numDirections, 1, 1};
for (int i = 0; i < seq_length_; ++i) {
x_descs_.emplace_back(x_desc_.descriptor<T>(dims_x, strides_x));
y_descs_.emplace_back(y_desc_.descriptor<T>(dims_y, strides_y));
}
if (!sequence_length.empty()) {
x_seq_desc_.descriptor<T>(seq_length_, batch_size_, input_size_, true,
sequence_length);
y_seq_desc_.descriptor<T>(seq_length_, batch_size_,
hidden_size_ * numDirections, true,
sequence_length);
}
// ------------------- cudnn hx, hy, cx, cy descriptors----------
std::vector<int> dims_hx = {num_layers_ * numDirections, batch_size_,
hidden_size_};
std::vector<int> strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1};
init_h_desc_.descriptor<T>(dims_hx, strides_hx);
init_c_desc_.descriptor<T>(dims_hx, strides_hx);
last_h_desc_.descriptor<T>(dims_hx, strides_hx);
last_c_desc_.descriptor<T>(dims_hx, strides_hx);
// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
if (!initialized_) {
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
}
dropout_desc_.descriptor(handle, place, initialized_, dropout_prob_,
dropout_state, seed_, state_size);
// ------------------- cudnn rnn descriptors ---------------------
#if CUDNN_VERSION >= 6000
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_.desc(), hidden_size_, num_layers_,
dropout_desc_.desc(), CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, cudnn_type));
#else
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(),
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
cudnn_type));
#endif
if (!sequence_length.empty()) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode(
rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED));
}
// ------------------- cudnn weights_size ---------------------
size_t weights_size_;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type));
PADDLE_ENFORCE_EQ(
weights_size_, sizeof(T) * weight_numel_,
platform::errors::InvalidArgument(
"The cudnn lstm and setting weight size should be same."));
// ------------------- cudnn weight descriptors ---------------------
platform::DataLayout layout = platform::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1};
weight_desc_.descriptor<T>(layout, dim_w);
// ------------------- cudnn workspace, reserve size ---------------------
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_.desc(), seq_length_, x_descs_.data(),
workspace_size));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_.desc(), seq_length_, x_descs_.data(),
reserve_size));
}
cudnnTensorDescriptor_t* x_descs() { return x_descs_.data(); }
cudnnTensorDescriptor_t* y_descs() { return y_descs_.data(); }
cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_.desc(); }
cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_.desc(); }
cudnnTensorDescriptor_t init_h_desc() { return init_h_desc_.desc(); }
cudnnTensorDescriptor_t init_c_desc() { return init_c_desc_.desc(); }
cudnnTensorDescriptor_t last_h_desc() { return last_h_desc_.desc(); }
cudnnTensorDescriptor_t last_c_desc() { return last_c_desc_.desc(); }
cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_.desc(); }
cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_.desc(); }
cudnnFilterDescriptor_t weight_desc() { return weight_desc_.desc(); }
private:
int seq_length_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
float dropout_prob_;
int seed_;
int weight_numel_;
bool initialized_;
bool is_bidirec_;
std::vector<cudnnTensorDescriptor_t> x_descs_;
std::vector<cudnnTensorDescriptor_t> y_descs_;
platform::ScopedTensorDescriptor x_desc_;
platform::ScopedTensorDescriptor y_desc_;
platform::ScopedRNNTensorDescriptor x_seq_desc_;
platform::ScopedRNNTensorDescriptor y_seq_desc_;
platform::ScopedTensorDescriptor init_h_desc_;
platform::ScopedTensorDescriptor init_c_desc_;
platform::ScopedTensorDescriptor last_h_desc_;
platform::ScopedTensorDescriptor last_c_desc_;
platform::ScopedDropoutDescriptor dropout_desc_;
platform::ScopedFilterDescriptor weight_desc_;
platform::ScopedRNNDescriptor rnn_desc_;
};
} // namespace operators
} // namespace paddle
...@@ -51,6 +51,16 @@ class CudnnLSTMOp : public framework::OperatorWithKernel { ...@@ -51,6 +51,16 @@ class CudnnLSTMOp : public framework::OperatorWithKernel {
"received InitH's rank is %d.", "received InitH's rank is %d.",
init_h_dims.size())); init_h_dims.size()));
if (ctx->HasInput("SequenceLength")) {
auto seq_dims = ctx->GetInputDim("SequenceLength");
PADDLE_ENFORCE_EQ(
in_dims[1], seq_dims[0],
platform::errors::InvalidArgument(
"The size of SequenceLength has to equal the batch_size. But "
"received batch_size is %d and the size of SequenceLength is %d.",
in_dims[1], seq_dims[0]));
}
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_dims[1], init_h_dims[1], in_dims[1], init_h_dims[1],
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -113,6 +123,12 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -113,6 +123,12 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) the learnable hidden-hidden weights." "(Tensor) the learnable hidden-hidden weights."
" The shape is (N), where N is total weight size of the LSTM. " " The shape is (N), where N is total weight size of the LSTM. "
" cudnn concatenate all the weight to one Tensor"); " cudnn concatenate all the weight to one Tensor");
AddInput("SequenceLength",
"(Tensor) When the input data is padding, "
"set this parameter. This parameter represents "
"the variable sequence lengths in a batch. "
"The size of the vector has to equal the batch_size.")
.AsDispensable();
AddOutput("Reserve", AddOutput("Reserve",
"(Tensor, a temporary output Tensor to store the reserve_data " "(Tensor, a temporary output Tensor to store the reserve_data "
"of cudnn kernel.") "of cudnn kernel.")
...@@ -155,13 +171,6 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -155,13 +171,6 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(1); .SetDefault(1);
AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false); AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false);
AddAttr<int>("seed", "seed to used if fix_seed is True").SetDefault(0); AddAttr<int>("seed", "seed to used if fix_seed is True").SetDefault(0);
AddAttr<std::vector<int>>("sequence_length",
"(vector<int>) When the input data is padding, "
"set this parameter. This parameter represents "
"the variable sequence"
"lengths in a batch. The size of the vector has "
"to equal the batch_size.")
.SetDefault({});
AddComment(R"DOC( AddComment(R"DOC(
CUDNN LSTM implementation CUDNN LSTM implementation
...@@ -243,6 +252,9 @@ class CudnnLSTMGradOpMaker : public framework::SingleGradOpMaker<T> { ...@@ -243,6 +252,9 @@ class CudnnLSTMGradOpMaker : public framework::SingleGradOpMaker<T> {
op->SetInput("InitH", this->Input("InitH")); op->SetInput("InitH", this->Input("InitH"));
op->SetInput("InitC", this->Input("InitC")); op->SetInput("InitC", this->Input("InitC"));
op->SetInput("W", this->Input("W")); op->SetInput("W", this->Input("W"));
if (this->HasInput("SequenceLength")) {
op->SetInput("SequenceLength", this->Input("SequenceLength"));
}
op->SetInput("Reserve", this->Output("Reserve")); op->SetInput("Reserve", this->Output("Reserve"));
op->SetInput("StateOut", this->Output("StateOut")); op->SetInput("StateOut", this->Output("StateOut"));
op->SetInput("Out", this->Output("Out")); op->SetInput("Out", this->Output("Out"));
......
...@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h" #include "paddle/fluid/operators/cudnn_lstm_cache.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/cudnn_desc.h" #include "paddle/fluid/platform/cudnn_desc.h"
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
...@@ -24,6 +25,43 @@ namespace operators { ...@@ -24,6 +25,43 @@ namespace operators {
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename T>
void LSTMInferece(const bool &has_seq_length, const cudnnHandle_t &handle,
const int &seq_length, ScopedRNNBase *rnn, const T *x_data,
const T *init_h_data, const T *init_c_data, const T *w_data,
T *out_data, T *last_h_data, T *last_c_data,
framework::Tensor *workspace_data,
const size_t &workspace_size) {
if (!has_seq_length) {
// for inference
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference(
handle, rnn->rnn_desc(), seq_length, rnn->x_descs(), x_data,
rnn->init_h_desc(), init_h_data, rnn->init_c_desc(), init_c_data,
rnn->weight_desc(), w_data, rnn->y_descs(), out_data,
rnn->last_h_desc(), last_h_data, rnn->last_c_desc(), last_c_data,
workspace_data->data<uint8_t>(), workspace_size));
} else {
#if CUDNN_VERSION >= 7201
// for inference
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInferenceEx(
handle, rnn->rnn_desc(), rnn->x_seq_desc(), x_data, rnn->init_h_desc(),
init_h_data, rnn->init_c_desc(), init_c_data, rnn->weight_desc(),
w_data, rnn->y_seq_desc(), out_data, rnn->last_h_desc(), last_h_data,
rnn->last_c_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, workspace_data->data<uint8_t>(),
workspace_size));
#else
// CUDNN VERSION has to >=7.2.1
PADDLE_THROW(platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardInferenceEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
#endif
}
}
template <typename T> template <typename T>
class CudnnLSTMGPUKernel : public framework::OpKernel<T> { class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -56,7 +94,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -56,7 +94,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
int num_layers = ctx.Attr<int>("num_layers"); int num_layers = ctx.Attr<int>("num_layers");
bool is_test = ctx.Attr<bool>("is_test"); bool is_test = ctx.Attr<bool>("is_test");
int seed = ctx.Attr<int>("seed"); int seed = ctx.Attr<int>("seed");
auto sequence_length = ctx.Attr<std::vector<int>>("sequence_length");
bool has_seq_length = ctx.HasInput("SequenceLength");
std::vector<int> SequenceLength;
if (has_seq_length) {
auto *sequence_length = ctx.Input<Tensor>("SequenceLength");
SequenceLength = operators::GetDataFromTensor<int>(sequence_length);
}
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto handle = dev_ctx.cudnn_handle(); auto handle = dev_ctx.cudnn_handle();
...@@ -70,58 +114,32 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -70,58 +114,32 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
size_t workspace_size; size_t workspace_size;
size_t reserve_size; size_t reserve_size;
platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size, ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel, num_layers, dropout_prob, seed, weight_numel,
state_initialized, is_bidirec); state_initialized, is_bidirec);
rnn.Create<T>(handle, ctx.GetPlace(), sequence_length, &workspace_size, rnn.Create<T>(handle, ctx.GetPlace(), SequenceLength, &workspace_size,
&reserve_size, state_out); &reserve_size, state_out);
framework::Tensor workspace_data_; framework::Tensor workspace_data_;
workspace_data_.Resize({static_cast<int64_t>(workspace_size)}); workspace_data_.mutable_data<uint8_t>(
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace()); {static_cast<int64_t>(workspace_size)}, ctx.GetPlace());
auto *reserve_data = reserve->mutable_data<uint8_t>( auto *reserve_data = reserve->mutable_data<uint8_t>(
{static_cast<int64_t>(reserve_size)}, ctx.GetPlace()); {static_cast<int64_t>(reserve_size)}, ctx.GetPlace());
if (is_test) { if (is_test) {
if (sequence_length.empty()) { LSTMInferece<T>(has_seq_length, handle, seq_length, &rnn, x_data,
// for inference init_h_data, init_c_data, w_data, out_data, last_h_data,
// This interface is used when the input/output is unpadded. last_c_data, &workspace_data_, workspace_size);
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(),
last_h_data, rnn.cy_desc(), last_c_data,
workspace_data_.data<uint8_t>(), workspace_size));
} else {
#if CUDNN_VERSION >= 7201
// for inference
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnRNNForwardInferenceEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(),
init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data,
rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data,
rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr, platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardInferenceEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
#endif
}
} else { } else {
if (sequence_length.empty()) { if (!has_seq_length) {
// for train // for train
// This interface is used when the input/output is unpadded. // This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardTraining( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardTraining(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data, handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), x_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(), rnn.weight_desc(), w_data, rnn.y_descs(), out_data,
last_h_data, rnn.cy_desc(), last_c_data, rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data,
workspace_data_.data<uint8_t>(), workspace_size, reserve_data, workspace_data_.data<uint8_t>(), workspace_size, reserve_data,
reserve_size)); reserve_size));
} else { } else {
...@@ -130,19 +148,18 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -130,19 +148,18 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
// This interface is used when the input/output is padded. // This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS( PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnRNNForwardTrainingEx( platform::dynload::cudnnRNNForwardTrainingEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(), handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data,
init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data, rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data, rnn.weight_desc(), w_data, rnn.y_seq_desc(), out_data,
rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr, rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size, reserve_data, nullptr, workspace_data_.data<uint8_t>(), workspace_size,
reserve_size)); reserve_data, reserve_size));
#else #else
PADDLE_ENFORCE_NOT_NULL( PADDLE_THROW(platform::errors::Unavailable(
nullptr, platform::errors::Unavailable( "The padded input is supported by "
"The padded input is supported by " "cudnnRNNForwardTrainingEx, but it only works when "
"cudnnRNNForwardTrainingEx, but it only works when " "the version of cudnn is larger than 7.2.1"));
"the version of cudnn is larger than 7.2.1"));
#endif #endif
} }
} }
...@@ -203,7 +220,13 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> { ...@@ -203,7 +220,13 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
int hidden_size = ctx.Attr<int>("hidden_size"); int hidden_size = ctx.Attr<int>("hidden_size");
int num_layers = ctx.Attr<int>("num_layers"); int num_layers = ctx.Attr<int>("num_layers");
int seed = ctx.Attr<int>("seed"); int seed = ctx.Attr<int>("seed");
auto sequence_length = ctx.Attr<std::vector<int>>("sequence_length");
bool has_seq_length = ctx.HasInput("SequenceLength");
std::vector<int> SequenceLength;
if (has_seq_length) {
auto *sequence_length = ctx.Input<Tensor>("SequenceLength");
SequenceLength = operators::GetDataFromTensor<int>(sequence_length);
}
int seq_length = input_dims[0]; int seq_length = input_dims[0];
int batch_size = input->dims()[1]; int batch_size = input->dims()[1];
...@@ -213,33 +236,33 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> { ...@@ -213,33 +236,33 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
size_t workspace_size; size_t workspace_size;
size_t reserve_size; size_t reserve_size;
platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size, ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel, num_layers, dropout_prob, seed, weight_numel, true,
true, is_bidirec); is_bidirec);
rnn.Create<T>(handle, ctx.GetPlace(), sequence_length, &workspace_size, rnn.Create<T>(handle, ctx.GetPlace(), SequenceLength, &workspace_size,
&reserve_size, const_cast<Tensor *>(state_out)); &reserve_size, const_cast<Tensor *>(state_out));
framework::Tensor workspace_data_; framework::Tensor workspace_data_;
workspace_data_.Resize({static_cast<int64_t>(workspace_size)}); workspace_data_.mutable_data<uint8_t>(
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace()); {static_cast<int64_t>(workspace_size)}, ctx.GetPlace());
const uint8_t *reserve_data = reserve->data<uint8_t>(); const uint8_t *reserve_data = reserve->data<uint8_t>();
if (sequence_length.empty()) { if (!has_seq_length) {
// This interface is used when the input/output is unpadded. // This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardData( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardData(
handle, rnn.rnn_desc(), seq_length, rnn.y_desc(), out_data, handle, rnn.rnn_desc(), seq_length, rnn.y_descs(), out_data,
rnn.y_desc(), out_grad_data, rnn.hy_desc(), last_h_grad_data, rnn.y_descs(), out_grad_data, rnn.last_h_desc(), last_h_grad_data,
rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data, rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, rnn.x_desc(), rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
in_grad_data, rnn.hx_desc(), init_h_grad_data, rnn.cx_desc(), rnn.x_descs(), in_grad_data, rnn.init_h_desc(), init_h_grad_data,
init_c_grad_data, workspace_data_.data<uint8_t>(), workspace_size, rnn.init_c_desc(), init_c_grad_data, workspace_data_.data<uint8_t>(),
const_cast<uint8_t *>(reserve_data), reserve_size)); workspace_size, const_cast<uint8_t *>(reserve_data), reserve_size));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeights( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeights(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), input->data<T>(), handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), input->data<T>(),
rnn.hx_desc(), init_h->data<T>(), rnn.y_desc(), out->data<T>(), rnn.init_h_desc(), init_h->data<T>(), rnn.y_descs(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.w_desc(), workspace_data_.data<uint8_t>(), workspace_size, rnn.weight_desc(),
weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data), weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data),
reserve_size)); reserve_size));
} else { } else {
...@@ -248,27 +271,25 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> { ...@@ -248,27 +271,25 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
// This interface is used when the input/output is padded. // This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardDataEx( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardDataEx(
handle, rnn.rnn_desc(), rnn.y_seq_desc(), out_data, rnn.y_seq_desc(), handle, rnn.rnn_desc(), rnn.y_seq_desc(), out_data, rnn.y_seq_desc(),
out_grad_data, nullptr, nullptr, rnn.hy_desc(), last_h_grad_data, out_grad_data, nullptr, nullptr, rnn.last_h_desc(), last_h_grad_data,
rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data, rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.x_seq_desc(), in_grad_data, rnn.hx_desc(), init_h_grad_data, rnn.x_seq_desc(), in_grad_data, rnn.init_h_desc(), init_h_grad_data,
rnn.cx_desc(), init_c_grad_data, nullptr, nullptr, rnn.init_c_desc(), init_c_grad_data, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size, workspace_data_.data<uint8_t>(), workspace_size,
const_cast<uint8_t *>(reserve_data), reserve_size)); const_cast<uint8_t *>(reserve_data), reserve_size));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeightsEx( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeightsEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), input->data<T>(), handle, rnn.rnn_desc(), rnn.x_seq_desc(), input->data<T>(),
rnn.hx_desc(), init_h->data<T>(), rnn.y_seq_desc(), out->data<T>(), rnn.init_h_desc(), init_h->data<T>(), rnn.y_seq_desc(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.w_desc(), out->data<T>(), workspace_data_.data<uint8_t>(), workspace_size,
weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data), rnn.weight_desc(), weight_grad->data<T>(),
reserve_size)); const_cast<uint8_t *>(reserve_data), reserve_size));
#else #else
PADDLE_ENFORCE_NOT_NULL( PADDLE_THROW(platform::errors::Unavailable(
nullptr, "The padded input of rnn is supported by cudnnRNNBackwardDataEx, "
platform::errors::Unavailable( "cudnnRNNBackwardWeightsEx, but it only works when the version "
"The padded input of rnn is supported by cudnnRNNBackwardDataEx, " "of cudnn is larger than 7.2.1"));
"cudnnRNNBackwardWeightsEx, but it only works when the version "
"of cudnn is larger than 7.2.1"));
#endif #endif
} }
} }
......
...@@ -76,7 +76,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> { ...@@ -76,7 +76,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
} }
#else #else
PADDLE_THROW("PaddlePaddle should compile with GPU."); PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU."));
#endif #endif
} }
}; };
......
...@@ -58,7 +58,8 @@ template <typename T> ...@@ -58,7 +58,8 @@ template <typename T>
class BroadcastOpKernel : public framework::OpKernel<T> { class BroadcastOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW("Broadcast op can run on gpu place only for now."); PADDLE_THROW(platform::errors::PreconditionNotMet(
"Broadcast op can run on gpu place only for now."));
} }
}; };
......
...@@ -68,10 +68,11 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> { ...@@ -68,10 +68,11 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
<< " From " << root_dev_id << " to " << dev_id; << " From " << root_dev_id << " to " << dev_id;
if (ctx.Attr<bool>("sync_mode")) { if (ctx.Attr<bool>("sync_mode")) {
PADDLE_ENFORCE(cudaStreamSynchronize(stream)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
} }
#else #else
PADDLE_THROW("PaddlePaddle should compile with GPU."); PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU."));
#endif #endif
} }
}; };
......
...@@ -33,9 +33,12 @@ namespace operators { ...@@ -33,9 +33,12 @@ namespace operators {
static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) { static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) {
if (copy_to_gpu) { if (copy_to_gpu) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE(cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice)); PADDLE_ENFORCE_CUDA_SUCCESS(
cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice));
#else #else
PADDLE_THROW("Not compiled with cuda"); PADDLE_THROW(
platform::errors::InvalidArgument("Check your paddle version, current "
"version is not compiled with cuda"));
#endif #endif
} else { } else {
std::memcpy(dst, src, n); std::memcpy(dst, src, n);
...@@ -88,11 +91,22 @@ bool TestMain(const platform::Place &place, const framework::DDim &dims, ...@@ -88,11 +91,22 @@ bool TestMain(const platform::Place &place, const framework::DDim &dims,
framework::LoDTensor cpu_out; framework::LoDTensor cpu_out;
auto &out_tensor = scope.FindVar(out_name)->Get<framework::LoDTensor>(); auto &out_tensor = scope.FindVar(out_name)->Get<framework::LoDTensor>();
PADDLE_ENFORCE(scope.kids().empty()); PADDLE_ENFORCE_EQ(scope.kids().empty(), true,
platform::errors::InvalidArgument(
"The scope can not have the child scopes,"
"please check your code."));
if (inplace) { if (inplace) {
PADDLE_ENFORCE_EQ(&out_tensor, x); PADDLE_ENFORCE_EQ(
&out_tensor, x,
platform::errors::InvalidArgument(
"The output tensor should be same as input x in inplace mode,"
" but now is not same."));
} else { } else {
PADDLE_ENFORCE_EQ(&out_tensor, z); PADDLE_ENFORCE_EQ(
&out_tensor, z,
platform::errors::InvalidArgument(
"The output tensor should be same as output z in normal mode,"
" but now is not same."));
} }
if (is_gpu_place) { if (is_gpu_place) {
......
...@@ -92,7 +92,9 @@ class TestElementwiseOpGradGrad { ...@@ -92,7 +92,9 @@ class TestElementwiseOpGradGrad {
auto dst_place = BOOST_GET_CONST(platform::CUDAPlace, place_); auto dst_place = BOOST_GET_CONST(platform::CUDAPlace, place_);
memory::Copy(dst_place, dst, src_place, src, bytes, nullptr); memory::Copy(dst_place, dst, src_place, src, bytes, nullptr);
#else #else
PADDLE_THROW("Not compiled with cuda"); PADDLE_THROW(platform::errors::InvalidArgument(
"Check your paddle version, current version is not compiled with "
"cuda"));
#endif #endif
} }
} }
...@@ -107,7 +109,10 @@ class TestElementwiseOpGradGrad { ...@@ -107,7 +109,10 @@ class TestElementwiseOpGradGrad {
op->Run(scope_, place_); op->Run(scope_, place_);
platform::DeviceContextPool::Instance().Get(place_)->Wait(); platform::DeviceContextPool::Instance().Get(place_)->Wait();
framework::LoDTensor cpu_out; framework::LoDTensor cpu_out;
PADDLE_ENFORCE_EQ(scope_.kids().empty(), true, "scope has child scopes"); PADDLE_ENFORCE_EQ(scope_.kids().empty(), true,
platform::errors::InvalidArgument(
"The scope can not have the child scopes,"
"please check your code."));
// get outputs from scope and compare them with expected_outs // get outputs from scope and compare them with expected_outs
bool all_equal = true; bool all_equal = true;
......
...@@ -37,8 +37,21 @@ class GatherOp : public framework::OperatorWithKernel { ...@@ -37,8 +37,21 @@ class GatherOp : public framework::OperatorWithKernel {
"Output(Out) of GatherOp should not be null.")); "Output(Out) of GatherOp should not be null."));
auto index_dims = ctx->GetInputDim("Index"); auto index_dims = ctx->GetInputDim("Index");
PADDLE_ENFORCE(index_dims.size() == 1 ||
(index_dims.size() == 2 && index_dims[1] == 1)); if (index_dims.size() == 2) {
PADDLE_ENFORCE_EQ(
index_dims[1], 1,
platform::errors::InvalidArgument(
"The last dim of index should be 1 when it is 2D, but we get %d",
index_dims[1]));
} else {
PADDLE_ENFORCE_EQ(
index_dims.size(), 1,
platform::errors::InvalidArgument(
"The index should be 1D, when it is not 2D, but we get %d",
index_dims.size()));
}
int batch_size = ctx->GetInputDim("Index")[0]; int batch_size = ctx->GetInputDim("Index")[0];
framework::DDim output_dims(ctx->GetInputDim("X")); framework::DDim output_dims(ctx->GetInputDim("X"));
output_dims[0] = batch_size; output_dims[0] = batch_size;
......
...@@ -43,7 +43,11 @@ class OverflowOp : public framework::OperatorWithKernel { ...@@ -43,7 +43,11 @@ class OverflowOp : public framework::OperatorWithKernel {
} else if (x_var->IsType<framework::SelectedRows>()) { } else if (x_var->IsType<framework::SelectedRows>()) {
dtype = x_var->Get<framework::SelectedRows>().value().type(); dtype = x_var->Get<framework::SelectedRows>().value().type();
} else { } else {
PADDLE_THROW("Cannot find the input data type by all input data"); PADDLE_ENFORCE_EQ(
true, false,
platform::errors::InvalidArgument(
"The input type mismatch, the type of Input(X) must be Tensor or "
"SelectedRows, please check your input."));
} }
return framework::OpKernelType(framework::proto::VarType::Type(dtype), return framework::OpKernelType(framework::proto::VarType::Type(dtype),
ctx.GetPlace()); ctx.GetPlace());
......
...@@ -57,7 +57,11 @@ class OverflowKernel : public framework::OpKernel<T> { ...@@ -57,7 +57,11 @@ class OverflowKernel : public framework::OpKernel<T> {
auto& in = ctx.Input<framework::SelectedRows>("X")->value(); auto& in = ctx.Input<framework::SelectedRows>("X")->value();
functor(in, out); functor(in, out);
} else { } else {
PADDLE_THROW("Unsupported input type."); PADDLE_ENFORCE_EQ(
true, false,
platform::errors::InvalidArgument(
"The input type mismatch, the type of Input(X) must be Tensor or "
"SelectedRows, please check your input."));
} }
} }
}; };
......
...@@ -22,8 +22,6 @@ class LinspaceOp : public framework::OperatorWithKernel { ...@@ -22,8 +22,6 @@ class LinspaceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Start"),
"Input(Start) of LinspaceOp should not be null.");
OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace");
OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace");
OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace");
......
...@@ -63,7 +63,10 @@ class CUDALinspaceKernel : public framework::OpKernel<T> { ...@@ -63,7 +63,10 @@ class CUDALinspaceKernel : public framework::OpKernel<T> {
framework::TensorCopy(*num_t, platform::CPUPlace(), &n); framework::TensorCopy(*num_t, platform::CPUPlace(), &n);
int32_t num = n.data<int32_t>()[0]; int32_t num = n.data<int32_t>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument(
"The num of linspace op should be larger "
"than 0, but received num is %d",
num));
out->Resize(framework::make_ddim({num})); out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace()); T* out_data = out->mutable_data<T>(context.GetPlace());
......
...@@ -46,7 +46,10 @@ class CPULinspaceKernel : public framework::OpKernel<T> { ...@@ -46,7 +46,10 @@ class CPULinspaceKernel : public framework::OpKernel<T> {
T start = start_t.data<T>()[0]; T start = start_t.data<T>()[0];
T stop = stop_t.data<T>()[0]; T stop = stop_t.data<T>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument(
"The num of linspace op should be larger "
"than 0, but received num is %d",
num));
out->Resize(framework::make_ddim({num})); out->Resize(framework::make_ddim({num}));
......
...@@ -48,6 +48,7 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -48,6 +48,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
bool is_negative = ctx.Attr<bool>("is_negative_input"); bool is_negative = ctx.Attr<bool>("is_negative_input");
bool bfloat16 = ctx.Attr<bool>("bfloat16");
std::string key = std::string key =
platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data, platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data,
is_negative, ctx.OutputName("Output")); is_negative, ctx.OutputName("Output"));
...@@ -74,7 +75,10 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -74,7 +75,10 @@ class QuantOpKernel : public framework::OpKernel<T> {
src_md, engine, to_void_cast<T>(input_data)); src_md, engine, to_void_cast<T>(input_data));
std::shared_ptr<mkldnn::memory::desc> dst_md; std::shared_ptr<mkldnn::memory::desc> dst_md;
if (is_negative) { if (bfloat16) {
platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);
} else if (is_negative) {
platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine, platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine,
dst_md, dst_memory, out_format); dst_md, dst_memory, out_format);
} else { } else {
...@@ -96,7 +100,11 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -96,7 +100,11 @@ class QuantOpKernel : public framework::OpKernel<T> {
dst_memory = std::static_pointer_cast<mkldnn::memory>( dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem)); dev_ctx.GetBlob(key_dst_mem));
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
if (is_negative) {
if (bfloat16) {
dst_memory->set_data_handle(
output->mutable_data<paddle::platform::bfloat16>(place));
} else if (is_negative) {
dst_memory->set_data_handle(output->mutable_data<int8_t>(place)); dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
} else { } else {
dst_memory->set_data_handle(output->mutable_data<uint8_t>(place)); dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
......
...@@ -40,6 +40,8 @@ void QuantOpMaker::Make() { ...@@ -40,6 +40,8 @@ void QuantOpMaker::Make() {
AddAttr<std::string>("output_format", AddAttr<std::string>("output_format",
"Convert format to NHWC or NCHW during quantization.") "Convert format to NHWC or NCHW during quantization.")
.SetDefault("NHWC"); .SetDefault("NHWC");
AddAttr<bool>("bfloat16", "(bool, default false) Convert to bfloat16")
.SetDefault(false);
AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC"); AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC");
} }
......
...@@ -60,7 +60,10 @@ class ScaleKernel : public framework::OpKernel<T> { ...@@ -60,7 +60,10 @@ class ScaleKernel : public framework::OpKernel<T> {
out->mutable_data<T>(in->place()); out->mutable_data<T>(in->place());
PADDLE_ENFORCE_EQ(in->dims(), out->dims(), PADDLE_ENFORCE_EQ(in->dims(), out->dims(),
"in and out should have the same dim"); paddle::platform::errors::InvalidArgument(
"the input and output should have the same dim"
"but input dim is %s, output dim is %s",
in->dims(), out->dims()));
auto eigen_out = framework::EigenVector<T>::Flatten(*out); auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in); auto eigen_in = framework::EigenVector<T>::Flatten(*in);
......
...@@ -186,10 +186,17 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -186,10 +186,17 @@ class SumOp : public framework::OperatorWithKernel {
} }
} }
} }
PADDLE_THROW("Cannot find the input data type by all input data"); PADDLE_THROW(platform::errors::InvalidArgument(
"Expected each tensor in Input(x) in sum op has be initialized, but "
"some tensor in Input(x) is not be initialized, please check your "
"code.",
framework::ToTypeName(x_vars[0]->Type())));
} }
PADDLE_THROW("Unexpected branch. Input type is %s", PADDLE_THROW(platform::errors::InvalidArgument(
framework::ToTypeName(x_vars[0]->Type())); "Expected type of Input(X) must be Tensor, SelectedRows or "
"LodTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(x_vars[0]->Type())));
} }
}; };
......
...@@ -169,8 +169,18 @@ void SumToLoDTensor(const framework::ExecutionContext &context) { ...@@ -169,8 +169,18 @@ void SumToLoDTensor(const framework::ExecutionContext &context) {
auto row_numel = sr_value.numel() / sr_rows.size(); auto row_numel = sr_value.numel() / sr_rows.size();
auto out_dims = out->dims(); auto out_dims = out->dims();
PADDLE_ENFORCE_EQ(sr.height(), out_dims[0]); PADDLE_ENFORCE_EQ(sr.height(), out_dims[0],
PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height()); platform::errors::InvalidArgument(
"The table height of input must be same as output, "
"but received input height is %d"
", output height is %d",
sr.height(), out_dims[0]));
PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height(),
platform::errors::InvalidArgument(
"The table width of input must be same as output, "
"but received input width is %d"
", output width is %d",
row_numel, out->numel() / sr.height()));
auto *sr_data = sr_value.data<T>(); auto *sr_data = sr_value.data<T>();
auto *sr_out_data = out->data<T>(); auto *sr_out_data = out->data<T>();
...@@ -231,8 +241,11 @@ class SumKernel<platform::CUDADeviceContext, T> ...@@ -231,8 +241,11 @@ class SumKernel<platform::CUDADeviceContext, T>
} else if (out_var->IsType<framework::LoDTensorArray>()) { } else if (out_var->IsType<framework::LoDTensorArray>()) {
LodTensorArrayCompute<platform::CUDADeviceContext, T>(context); LodTensorArrayCompute<platform::CUDADeviceContext, T>(context);
} else { } else {
PADDLE_THROW("Unexpected branch, output variable type is %s", PADDLE_THROW(platform::errors::InvalidArgument(
framework::ToTypeName(out_var->Type())); "Expected type of Ouput(out) must be Tensor, SelectedRows or "
"LodTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
} }
} }
}; };
......
...@@ -182,7 +182,11 @@ class SumKernel : public framework::OpKernel<T> { ...@@ -182,7 +182,11 @@ class SumKernel : public framework::OpKernel<T> {
auto &in_t = in_vars[i]->Get<framework::SelectedRows>(); auto &in_t = in_vars[i]->Get<framework::SelectedRows>();
functor(context.template device_context<DeviceContext>(), in_t, out); functor(context.template device_context<DeviceContext>(), in_t, out);
} else { } else {
PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Input(X) of %d-th must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(in_vars[i]->Type())));
} }
} }
} else if (out_var->IsType<framework::SelectedRows>()) { } else if (out_var->IsType<framework::SelectedRows>()) {
...@@ -190,8 +194,11 @@ class SumKernel : public framework::OpKernel<T> { ...@@ -190,8 +194,11 @@ class SumKernel : public framework::OpKernel<T> {
} else if (out_var->IsType<framework::LoDTensorArray>()) { } else if (out_var->IsType<framework::LoDTensorArray>()) {
LodTensorArrayCompute<DeviceContext, T>(context); LodTensorArrayCompute<DeviceContext, T>(context);
} else { } else {
PADDLE_THROW("Unexpected branch, output variable type is %s", PADDLE_THROW(platform::errors::InvalidArgument(
framework::ToTypeName(out_var->Type())); "Expected type of Output(out) must be Tensor, SelectedRows, "
"LoDTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
} }
} }
}; };
......
...@@ -54,9 +54,11 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -54,9 +54,11 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
tensor = out_var->GetMutable<framework::LoDTensor>(); tensor = out_var->GetMutable<framework::LoDTensor>();
if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape)); if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape));
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"uniform_random_op's output only" "Expected type of Output(out) in uniform_random_op must be Tensor, "
"supports SelectedRows and LoDTensor"); "SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
} }
T *data = tensor->mutable_data<T>(ctx.GetPlace()); T *data = tensor->mutable_data<T>(ctx.GetPlace());
......
...@@ -116,9 +116,11 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -116,9 +116,11 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
tensor = out_var->GetMutable<framework::LoDTensor>(); tensor = out_var->GetMutable<framework::LoDTensor>();
if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape)); if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape));
} else { } else {
PADDLE_THROW( PADDLE_THROW(platform::errors::InvalidArgument(
"uniform_random_op's output only" "Expected type of Output(out) in uniform_random_op must be Tensor, "
"supports SelectedRows and LoDTensor"); "SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
} }
T* data = tensor->mutable_data<T>(context.GetPlace()); T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed")); unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
......
...@@ -50,7 +50,10 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor( ...@@ -50,7 +50,10 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor(
} }
return vec_new_data; return vec_new_data;
} else { } else {
PADDLE_THROW("The dtype of shape tensor must be int32 or int64."); PADDLE_THROW(platform::errors::InvalidArgument(
"Expected dtype of ShapeTensor must be int32, int64. But got "
"unsupport dtype: %s.",
paddle::framework::DataTypeToString(new_data_tensor->type())));
} }
} }
...@@ -84,7 +87,11 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList( ...@@ -84,7 +87,11 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList(
vec_new_shape.push_back(*tensor->data<int64_t>()); vec_new_shape.push_back(*tensor->data<int64_t>());
} }
} else { } else {
PADDLE_THROW("The dtype of shape tensor must be int32 or int64."); PADDLE_THROW(platform::errors::InvalidArgument(
"Expected dtype of ShapeTensorList of %d-th must be int32, int64. "
"But got "
"unsupport dtype: %s.",
i, paddle::framework::DataTypeToString(tensor->type())));
} }
} }
......
...@@ -287,6 +287,8 @@ class ScopedTensorDescriptor { ...@@ -287,6 +287,8 @@ class ScopedTensorDescriptor {
return descriptor(CudnnDataType<T>::type, dim, stride); return descriptor(CudnnDataType<T>::type, dim, stride);
} }
inline cudnnTensorDescriptor_t desc() { return desc_; }
private: private:
cudnnTensorDescriptor_t desc_; cudnnTensorDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor); DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
...@@ -329,6 +331,8 @@ class ScopedRNNTensorDescriptor { ...@@ -329,6 +331,8 @@ class ScopedRNNTensorDescriptor {
input_size, time_major, seq_length); input_size, time_major, seq_length);
} }
inline cudnnRNNDataDescriptor_t desc() { return desc_; }
private: private:
cudnnRNNDataDescriptor_t desc_; cudnnRNNDataDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor); DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor);
...@@ -361,6 +365,7 @@ class ScopedDropoutDescriptor { ...@@ -361,6 +365,7 @@ class ScopedDropoutDescriptor {
} }
return desc_; return desc_;
} }
inline cudnnDropoutDescriptor_t desc() { return desc_; }
private: private:
cudnnDropoutDescriptor_t desc_; cudnnDropoutDescriptor_t desc_;
...@@ -376,7 +381,7 @@ class ScopedRNNDescriptor { ...@@ -376,7 +381,7 @@ class ScopedRNNDescriptor {
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_)); PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
} }
inline cudnnRNNDescriptor_t descriptor() { return desc_; } inline cudnnRNNDescriptor_t desc() { return desc_; }
private: private:
cudnnRNNDescriptor_t desc_; cudnnRNNDescriptor_t desc_;
...@@ -419,172 +424,13 @@ class ScopedFilterDescriptor { ...@@ -419,172 +424,13 @@ class ScopedFilterDescriptor {
kernel, groups); kernel, groups);
} }
inline cudnnFilterDescriptor_t desc() { return desc_; }
private: private:
cudnnFilterDescriptor_t desc_; cudnnFilterDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor); DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
}; };
class ScopedRNNBase {
public:
ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size,
int num_layers, float dropout_prob, int seed, int weight_numel,
bool initialized, bool is_bidirec)
: seq_length_(seq_length),
batch_size_(batch_size),
input_size_(input_size),
hidden_size_(hidden_size),
num_layers_(num_layers),
dropout_prob_(dropout_prob),
seed_(seed),
weight_numel_(weight_numel),
initialized_(initialized),
is_bidirec_(is_bidirec) {}
template <typename T>
void Create(const cudnnHandle_t& handle, const platform::Place& place,
std::vector<int> sequence_length, size_t* workspace_size,
size_t* reserve_size, framework::Tensor* dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1;
cudnnDataType_t cudnn_type = platform::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1};
std::vector<int> dims_y = {batch_size_, hidden_size_ * numDirections, 1};
std::vector<int> strides_y = {hidden_size_ * numDirections, 1, 1};
for (int i = 0; i < seq_length_; ++i) {
x_desc_.emplace_back(x_d.descriptor<T>(dims_x, strides_x));
y_desc_.emplace_back(y_d.descriptor<T>(dims_y, strides_y));
}
if (!sequence_length.empty()) {
x_seq_desc_ = x_seq_d.descriptor<T>(seq_length_, batch_size_, input_size_,
true, sequence_length);
y_seq_desc_ = y_seq_d.descriptor<T>(seq_length_, batch_size_,
hidden_size_ * numDirections, true,
sequence_length);
}
// ------------------- cudnn hx, hy, cx, cy descriptors----------
std::vector<int> dims_hx = {num_layers_ * numDirections, batch_size_,
hidden_size_};
std::vector<int> strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1};
hx_desc_ = hx_d.descriptor<T>(dims_hx, strides_hx);
cx_desc_ = cx_d.descriptor<T>(dims_hx, strides_hx);
hy_desc_ = hy_d.descriptor<T>(dims_hx, strides_hx);
cy_desc_ = cy_d.descriptor<T>(dims_hx, strides_hx);
// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
if (!initialized_) {
PADDLE_ENFORCE_CUDA_SUCCESS(
dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
}
dropout_desc_ =
dropout_d.descriptor(handle, place, initialized_, dropout_prob_,
dropout_state, seed_, state_size);
// ------------------- cudnn rnn descriptors ---------------------
rnn_desc_ = rnn_d.descriptor();
#if CUDNN_VERSION >= 6000
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, cudnn_type));
#else
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
cudnn_type));
#endif
if (!sequence_length.empty()) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode(
rnn_desc_, CUDNN_RNN_PADDED_IO_ENABLED));
}
// ------------------- cudnn weights_size ---------------------
size_t weights_size_;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_, x_desc_[0], &weights_size_, cudnn_type));
PADDLE_ENFORCE_EQ(
weights_size_, sizeof(T) * weight_numel_,
platform::errors::InvalidArgument(
"The cudnn lstm and setting weight size should be same."));
// ------------------- cudnn weight descriptors ---------------------
platform::DataLayout layout = platform::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1};
w_desc_ = w_d.descriptor<T>(layout, dim_w);
// ------------------- cudnn workspace, reserve size ---------------------
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_, seq_length_, x_desc_.data(), workspace_size));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_, seq_length_, x_desc_.data(), reserve_size));
}
cudnnTensorDescriptor_t* x_desc() { return x_desc_.data(); }
cudnnTensorDescriptor_t* y_desc() { return y_desc_.data(); }
cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_; }
cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_; }
cudnnTensorDescriptor_t hx_desc() { return hx_desc_; }
cudnnTensorDescriptor_t cx_desc() { return cx_desc_; }
cudnnTensorDescriptor_t hy_desc() { return hy_desc_; }
cudnnTensorDescriptor_t cy_desc() { return cy_desc_; }
cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_; }
cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_; }
cudnnFilterDescriptor_t w_desc() { return w_desc_; }
private:
int seq_length_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
float dropout_prob_;
int seed_;
int weight_numel_;
bool initialized_;
bool is_bidirec_;
std::vector<cudnnTensorDescriptor_t> x_desc_;
std::vector<cudnnTensorDescriptor_t> y_desc_;
cudnnRNNDataDescriptor_t x_seq_desc_;
cudnnRNNDataDescriptor_t y_seq_desc_;
// A tensor descriptor describing the initial hidden state of the RNN.
cudnnTensorDescriptor_t hx_desc_;
// A tensor descriptor describing the initial cell state for LSTM networks.
cudnnTensorDescriptor_t cx_desc_;
// A tensor descriptor describing the final hidden state of the RNN.
cudnnTensorDescriptor_t hy_desc_;
// A tensor descriptor describing the final cell state for LSTM networks.
cudnnTensorDescriptor_t cy_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
cudnnFilterDescriptor_t w_desc_;
cudnnRNNDescriptor_t rnn_desc_;
ScopedTensorDescriptor x_d;
ScopedTensorDescriptor y_d;
ScopedRNNTensorDescriptor x_seq_d;
ScopedRNNTensorDescriptor y_seq_d;
ScopedTensorDescriptor hx_d;
ScopedTensorDescriptor cx_d;
ScopedTensorDescriptor hy_d;
ScopedTensorDescriptor cy_d;
ScopedDropoutDescriptor dropout_d;
ScopedFilterDescriptor w_d;
ScopedRNNDescriptor rnn_d;
};
class ScopedConvolutionDescriptor { class ScopedConvolutionDescriptor {
public: public:
ScopedConvolutionDescriptor() { ScopedConvolutionDescriptor() {
......
...@@ -443,6 +443,13 @@ inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) { ...@@ -443,6 +443,13 @@ inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) {
op->GetAttrIfExists<bool>("use_quantizer")); op->GetAttrIfExists<bool>("use_quantizer"));
} }
inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) {
return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "bfloat16";
}
inline bool HasOpFLOAT32DataType(const paddle::framework::OpDesc* op) {
return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "float32";
}
enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP }; enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };
} // namespace platform } // namespace platform
......
...@@ -38,6 +38,7 @@ set(PYBIND_SRCS ...@@ -38,6 +38,7 @@ set(PYBIND_SRCS
imperative.cc imperative.cc
ir.cc ir.cc
inference_api.cc inference_api.cc
compatible.cc
generator_py.cc) generator_py.cc)
if(WITH_GLOO) if(WITH_GLOO)
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/pybind/compatible.h"
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_version_registry.h"
namespace py = pybind11;
using paddle::framework::compatible::PassVersionCheckerRegistrar;
namespace paddle {
namespace pybind {
void BindCompatible(py::module* m) {
py::class_<PassVersionCheckerRegistrar>(*m, "PassVersionChecker")
.def_static("IsCompatible", [](const std::string& name) -> bool {
auto instance = PassVersionCheckerRegistrar::GetInstance();
return instance.IsPassCompatible(name);
});
}
} // namespace pybind
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/pybind11.h>
namespace paddle {
namespace pybind {
void BindCompatible(pybind11::module *m);
} // namespace pybind
} // namespace paddle
...@@ -184,6 +184,7 @@ void BindVarDsec(pybind11::module *m) { ...@@ -184,6 +184,7 @@ void BindVarDsec(pybind11::module *m) {
.value("FP16", pd::proto::VarType::FP16) .value("FP16", pd::proto::VarType::FP16)
.value("FP32", pd::proto::VarType::FP32) .value("FP32", pd::proto::VarType::FP32)
.value("FP64", pd::proto::VarType::FP64) .value("FP64", pd::proto::VarType::FP64)
.value("BF16", pd::proto::VarType::BF16)
.value("LOD_TENSOR", pd::proto::VarType::LOD_TENSOR) .value("LOD_TENSOR", pd::proto::VarType::LOD_TENSOR)
.value("SELECTED_ROWS", pd::proto::VarType::SELECTED_ROWS) .value("SELECTED_ROWS", pd::proto::VarType::SELECTED_ROWS)
.value("FEED_MINIBATCH", pd::proto::VarType::FEED_MINIBATCH) .value("FEED_MINIBATCH", pd::proto::VarType::FEED_MINIBATCH)
......
...@@ -60,6 +60,7 @@ limitations under the License. */ ...@@ -60,6 +60,7 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/box_helper_py.h" #include "paddle/fluid/pybind/box_helper_py.h"
#include "paddle/fluid/pybind/compatible.h"
#include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/data_set_py.h" #include "paddle/fluid/pybind/data_set_py.h"
#include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/exception.h"
...@@ -2619,6 +2620,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2619,6 +2620,7 @@ All parameter, weight, gradient are variables in Paddle.
BindGraph(&m); BindGraph(&m);
BindNode(&m); BindNode(&m);
BindInferenceApi(&m); BindInferenceApi(&m);
BindCompatible(&m);
BindDataset(&m); BindDataset(&m);
BindGenerator(&m); BindGenerator(&m);
#ifdef PADDLE_WITH_CRYPTO #ifdef PADDLE_WITH_CRYPTO
......
...@@ -51,6 +51,17 @@ if %ERRORLEVEL% NEQ 0 ( ...@@ -51,6 +51,17 @@ if %ERRORLEVEL% NEQ 0 (
exit /b 7 exit /b 7
) )
rem ------pre install clcache and init config----------
pip install clcache
:: set USE_CLCACHE to enable clcache
set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
clcache.exe -M 21474836480
rem ------initialize common variable------ rem ------initialize common variable------
if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0"
if not defined BRANCH set BRANCH=develop if not defined BRANCH set BRANCH=develop
...@@ -173,7 +184,7 @@ echo Build third_party successfully! ...@@ -173,7 +184,7 @@ echo Build third_party successfully!
set build_times=1 set build_times=1
:build_paddle :build_paddle
echo Build Paddle the %build_times% time: echo Build Paddle the %build_times% time:
msbuild /m:%PARALLEL_PROJECT_COUNT% /p:Configuration=Release /verbosity:minimal paddle.sln msbuild /m:%PARALLEL_PROJECT_COUNT% /p:TrackFileAccess=false /p:CLToolExe=clcache.exe /p:CLToolPath=%PYTHON_ROOT%\Scripts /p:Configuration=Release /verbosity:minimal paddle.sln
if %ERRORLEVEL% NEQ 0 ( if %ERRORLEVEL% NEQ 0 (
set /a build_times=%build_times%+1 set /a build_times=%build_times%+1
if %build_times% GTR 2 ( if %build_times% GTR 2 (
......
...@@ -49,6 +49,7 @@ import paddle.optimizer ...@@ -49,6 +49,7 @@ import paddle.optimizer
import paddle.metric import paddle.metric
import paddle.device import paddle.device
import paddle.incubate.complex as complex import paddle.incubate.complex as complex
import paddle.regularizer
# TODO: define alias in tensor and framework directory # TODO: define alias in tensor and framework directory
......
...@@ -21,6 +21,7 @@ from .parallel import get_rank ...@@ -21,6 +21,7 @@ from .parallel import get_rank
from .parallel import get_world_size from .parallel import get_world_size
from paddle.fluid.dygraph.parallel import prepare_context #DEFINE_ALIAS from paddle.fluid.dygraph.parallel import prepare_context #DEFINE_ALIAS
from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS
from paddle.distributed.fleet.dataset import *
from . import collective from . import collective
from .collective import * from .collective import *
...@@ -30,11 +31,8 @@ __all__ = ["spawn"] ...@@ -30,11 +31,8 @@ __all__ = ["spawn"]
# dygraph parallel apis # dygraph parallel apis
__all__ += [ __all__ += [
"init_parallel_env", "init_parallel_env", "get_rank", "get_world_size", "prepare_context",
"get_rank", "ParallelEnv", "InMemoryDataset", "QueueDataset"
"get_world_size",
"prepare_context",
"ParallelEnv",
] ]
# collective apis # collective apis
......
...@@ -19,7 +19,7 @@ from paddle.distributed.utils import get_cluster, logger ...@@ -19,7 +19,7 @@ from paddle.distributed.utils import get_cluster, logger
def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus): def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus):
""" """
args_node_ips, args_node_ip:string args_node_ips:string, args_node_ip:string, args_port: int, selected_gpus:list
""" """
#you can automatically get ip info while using paddlecloud multi nodes mode. #you can automatically get ip info while using paddlecloud multi nodes mode.
node_ips = os.getenv("PADDLE_TRAINERS") node_ips = os.getenv("PADDLE_TRAINERS")
...@@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus): ...@@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus):
node_rank = os.getenv("PADDLE_TRAINER_ID") node_rank = os.getenv("PADDLE_TRAINER_ID")
assert node_rank is not None, "PADDLE_TRAINER_ID should not be None" assert node_rank is not None, "PADDLE_TRAINER_ID should not be None"
paddle_ports_num = int(os.getenv("TRAINER_PORTS_NUM"))
assert paddle_ports_num is not None, "TRAINER_PORTS_NUM should not be None"
node_ips = node_ips.split(",") node_ips = node_ips.split(",")
num_nodes = len(node_ips) num_nodes = len(node_ips)
node_rank = int(node_rank) node_rank = int(node_rank)
...@@ -47,32 +50,47 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\ ...@@ -47,32 +50,47 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\
Your input cluster_node_ips: {} doesn't equals to IPs: {} from \ Your input cluster_node_ips: {} doesn't equals to IPs: {} from \
paddlecloud environment.".format(args_node_ips, node_ips)) paddlecloud environment.".format(args_node_ips, node_ips))
started_port = args_port # DISTRIBUTED_TRAINER_ENDPOINTS: new environment since paddlecloud 1.8.4
print("num_nodes:", num_nodes) # e.g: DISTRIBUTED_TRAINER_ENDPOINTS="ip1:port1,ip1:port2,ip1:port3,ip1:port4,ip2:port5,ip2:port6,ip2:port7,ip2:port8"
if num_nodes > 1: trainer_endpoints = os.getenv("DISTRIBUTED_TRAINER_ENDPOINTS")
try: if trainer_endpoints is None:
paddle_port = int(os.getenv("PADDLE_PORT", "")) started_port = args_port
paddle_port_num = int(os.getenv("TRAINER_PORTS_NUM", "")) if num_nodes > 1:
try:
if paddle_port_num >= len( paddle_port = int(os.getenv("PADDLE_PORT", ""))
selected_gpus) and paddle_port != args_port:
logger.warning("Use Cloud specified port:{}.".format( if paddle_ports_num >= len(
paddle_port)) selected_gpus) and paddle_port != args_port:
started_port = paddle_port logger.warning("Use Cloud specified port:{}.".format(
paddle_port))
except Exception as e: started_port = paddle_port
print(e)
pass except Exception as e:
print(e)
if started_port is None: pass
started_port = 6170
if started_port is None:
logger.debug("parsed from args:node_ips:{} \ started_port = 6170
node_ip:{} node_rank:{} started_port:{}" ports = [
.format(node_ips, node_ip, node_rank, started_port)) x for x in range(started_port, started_port + len(selected_gpus))
]
ports = [x for x in range(started_port, started_port + len(selected_gpus))] trainer_endpoints = []
cluster, pod = get_cluster(node_ips, node_ip, ports, selected_gpus) for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in ports])
else:
trainer_endpoints_ori = trainer_endpoints.split(",")
trainer_endpoints = []
assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori)
for i in range(num_nodes):
trainer_endpoints.append(trainer_endpoints_ori[
i * paddle_ports_num:(i + 1) * paddle_ports_num])
logger.debug("parsed from args: node_ips:{} \
node_ip:{} node_rank:{} trainer_endpoints:{}"
.format(node_ips, node_ip, node_rank, trainer_endpoints))
cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints,
selected_gpus)
return cluster, cluster.pods[node_rank] return cluster, cluster.pods[node_rank]
......
...@@ -23,7 +23,6 @@ from .dataset import * ...@@ -23,7 +23,6 @@ from .dataset import *
__all__ = [ __all__ = [
"DistributedStrategy", "DistributedStrategy",
"UtilBase", "UtilBase",
"DatasetFactory",
"UserDefinedRoleMaker", "UserDefinedRoleMaker",
"PaddleCloudRoleMaker", "PaddleCloudRoleMaker",
"Fleet", "Fleet",
......
...@@ -60,7 +60,7 @@ class StrategyCompiler(StrategyCompilerBase): ...@@ -60,7 +60,7 @@ class StrategyCompiler(StrategyCompilerBase):
def _get_valid_strategy(self, dist_strategy, can_not_apply_optimizer_list): def _get_valid_strategy(self, dist_strategy, can_not_apply_optimizer_list):
import copy import copy
valid_strategy = copy.copy(dist_strategy) valid_strategy = copy.deepcopy(dist_strategy)
invalid_optimizers = [] invalid_optimizers = []
for candidate in self._meta_optimizer_candidates: for candidate in self._meta_optimizer_candidates:
is_valid = False is_valid = False
......
...@@ -19,7 +19,7 @@ from paddle.distributed.fleet.launch_utils import get_cluster, logger ...@@ -19,7 +19,7 @@ from paddle.distributed.fleet.launch_utils import get_cluster, logger
def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170): def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170):
""" """
args_node_ips, args_node_ip:string args_node_ips:string, selected_gpus:list, args_port: int
""" """
#you can automatically get ip info while using paddlecloud multi nodes mode. #you can automatically get ip info while using paddlecloud multi nodes mode.
node_ips = os.getenv("PADDLE_TRAINERS") node_ips = os.getenv("PADDLE_TRAINERS")
...@@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170): ...@@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170):
node_rank = os.getenv("PADDLE_TRAINER_ID") node_rank = os.getenv("PADDLE_TRAINER_ID")
assert node_rank is not None, "PADDLE_TRAINER_ID should not be None" assert node_rank is not None, "PADDLE_TRAINER_ID should not be None"
paddle_ports_num = int(os.getenv("TRAINER_PORTS_NUM"))
assert paddle_ports_num is not None, "TRAINER_PORTS_NUM should not be None"
node_ips = node_ips.split(",") node_ips = node_ips.split(",")
num_nodes = len(node_ips) num_nodes = len(node_ips)
node_rank = int(node_rank) node_rank = int(node_rank)
...@@ -42,32 +45,47 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\ ...@@ -42,32 +45,47 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\
Your input cluster_node_ips: {} doesn't equals to IPs: {} from \ Your input cluster_node_ips: {} doesn't equals to IPs: {} from \
paddlecloud environment.".format(args_node_ips, node_ips)) paddlecloud environment.".format(args_node_ips, node_ips))
started_port = args_port # DISTRIBUTED_TRAINER_ENDPOINTS: new environment since paddlecloud 1.8.4
print("num_nodes:", num_nodes) # e.g: DISTRIBUTED_TRAINER_ENDPOINTS="ip1:port1,ip1:port2,ip1:port3,ip1:port4,ip2:port5,ip2:port6,ip2:port7,ip2:port8"
if num_nodes > 1: trainer_endpoints = os.getenv("DISTRIBUTED_TRAINER_ENDPOINTS")
try: if trainer_endpoints is None:
paddle_port = int(os.getenv("PADDLE_PORT", "")) started_port = args_port
paddle_port_num = int(os.getenv("TRAINER_PORTS_NUM", "")) if num_nodes > 1:
try:
if paddle_port_num >= len( paddle_port = int(os.getenv("PADDLE_PORT", ""))
selected_gpus) and paddle_port != args_port:
logger.warning("Use Cloud specified port:{}.".format( if paddle_ports_num >= len(
paddle_port)) selected_gpus) and paddle_port != args_port:
started_port = paddle_port logger.warning("Use Cloud specified port:{}.".format(
paddle_port))
except Exception as e: started_port = paddle_port
print(e)
pass except Exception as e:
print(e)
if started_port is None: pass
started_port = 6170
if started_port is None:
logger.debug("parsed from args:node_ips:{} \ started_port = 6170
node_ip:{} node_rank:{} started_port:{}" ports = [
.format(node_ips, node_ip, node_rank, started_port)) x for x in range(started_port, started_port + len(selected_gpus))
]
ports = [x for x in range(started_port, started_port + len(selected_gpus))] trainer_endpoints = []
cluster, pod = get_cluster(node_ips, node_ip, ports, selected_gpus) for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in ports])
else:
trainer_endpoints_ori = trainer_endpoints.split(",")
trainer_endpoints = []
assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori)
for i in range(num_nodes):
trainer_endpoints.append(trainer_endpoints_ori[
i * paddle_ports_num:(i + 1) * paddle_ports_num])
logger.debug("parsed from args: node_ips:{} \
node_ip:{} node_rank:{} trainer_endpoints:{}"
.format(node_ips, node_ip, node_rank, trainer_endpoints))
cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints,
selected_gpus)
return cluster, cluster.pods[node_rank] return cluster, cluster.pods[node_rank]
...@@ -75,7 +93,8 @@ def use_paddlecloud(): ...@@ -75,7 +93,8 @@ def use_paddlecloud():
node_ips = os.getenv("PADDLE_TRAINERS") node_ips = os.getenv("PADDLE_TRAINERS")
node_ip = os.getenv("POD_IP") node_ip = os.getenv("POD_IP")
node_rank = os.getenv("PADDLE_TRAINER_ID") node_rank = os.getenv("PADDLE_TRAINER_ID")
if node_ips is None or node_ip is None or node_rank is None: paddle_ports_num = os.getenv("TRAINER_PORTS_NUM")
if node_ips is None or node_ip is None or node_rank is None or paddle_ports_num is None:
return False return False
else: else:
return True return True
......
...@@ -14,54 +14,11 @@ ...@@ -14,54 +14,11 @@
"""This is definition of dataset class, which is high performance IO.""" """This is definition of dataset class, which is high performance IO."""
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.proto import data_feed_pb2 from paddle.fluid.proto import data_feed_pb2
from google.protobuf import text_format from google.protobuf import text_format
import paddle.fluid.core as core import paddle.fluid.core as core
class DatasetFactory(object):
"""
DatasetFactory is a factory which create dataset by its name,
you can create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset",
the default is "QueueDataset".
Example:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
"""
def __init__(self):
""" Init. """
pass
def create_dataset(self, datafeed_class="QueueDataset"):
"""
Create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset",
the default is "QueueDataset".
Args:
datafeed_class(str): datafeed class name, QueueDataset or InMemoryDataset.
Default is QueueDataset.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
"""
try:
dataset = globals()[datafeed_class]()
return dataset
except:
raise ValueError("datafeed class %s does not exist" %
datafeed_class)
class DatasetBase(object): class DatasetBase(object):
""" Base dataset class. """ """ Base dataset class. """
...@@ -75,96 +32,67 @@ class DatasetBase(object): ...@@ -75,96 +32,67 @@ class DatasetBase(object):
self.thread_num = 1 self.thread_num = 1
self.filelist = [] self.filelist = []
def set_pipe_command(self, pipe_command): def init(self,
batch_size=1,
thread_num=1,
use_var=[],
pipe_command="cat",
input_type=0,
fs_name="",
fs_ugi="",
download_cmd="cat"):
""" """
Set pipe command of current dataset should be called only once in user's python scripts to initialize setings of dataset instance.
A pipe command is a UNIX pipeline command that can be used only Normally, it is called by InMemoryDataset or QueueDataset.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_pipe_command("python my_script.py")
Args: Args:
pipe_command(str): pipe command batch_size(int): batch size. It will be effective during training. default is 1.
thread_num(int): thread num, it is the num of readers. default is 1.
use_var(list): list of variables. Variables which you will use. default is [].
pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat"
input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0.
fs_name(str): fs name. default is "".
fs_ugi(str): fs ugi. default is "".
download_cmd(str): customized download command. default is "cat"
"""
self.proto_desc.pipe_command = pipe_command
def set_rank_offset(self, rank_offset):
""" """
Set rank_offset for merge_pv. It set the message of Pv. self._set_batch_size(batch_size)
self._set_thread(thread_num)
Examples: self._set_use_var(use_var)
.. code-block:: python self._set_pipe_command(pipe_command)
self._set_input_type(input_type)
import paddle.fluid as fluid self._set_hdfs_config(fs_name, fs_ugi)
dataset = fluid.DatasetFactory().create_dataset() self._set_download_cmd(download_cmd)
dataset.set_rank_offset("rank_offset")
Args:
rank_offset(str): rank_offset's name
def _set_pipe_command(self, pipe_command):
""" """
self.proto_desc.rank_offset = rank_offset Set pipe command of current dataset
A pipe command is a UNIX pipeline command that can be used only
def set_fea_eval(self, record_candidate_size, fea_eval=True):
"""
set fea eval mode for slots shuffle to debug the importance level of
slots(features), fea_eval need to be set True for slots shuffle.
Args:
record_candidate_size(int): size of instances candidate to shuffle
one slot
fea_eval(bool): whether enable fea eval mode to enable slots shuffle.
default is True.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.fleet.dataset.DatasetBase()
dataset.set_fea_eval(1000000, True) dataset._set_pipe_command("python my_script.py")
"""
if fea_eval:
self.dataset.set_fea_eval(fea_eval, record_candidate_size)
self.fea_eval = fea_eval
def slots_shuffle(self, slots):
"""
Slots Shuffle
Slots Shuffle is a shuffle method in slots level, which is usually used
in sparse feature with large scale of instances. To compare the metric, i.e.
auc while doing slots shuffle on one or several slots with baseline to
evaluate the importance level of slots(features).
Args: Args:
slots(list[string]): the set of slots(string) to do slots shuffle. pipe_command(str): pipe command
Examples:
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_merge_by_lineid()
#suppose there is a slot 0
dataset.slots_shuffle(['0'])
""" """
if self.fea_eval: self.proto_desc.pipe_command = pipe_command
slots_set = set(slots)
self.dataset.slots_shuffle(slots_set)
def set_batch_size(self, batch_size): def _set_batch_size(self, batch_size):
""" """
Set batch size. Will be effective during training Set batch size. Will be effective during training
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_batch_size(128) dataset._set_batch_size(128)
Args: Args:
batch_size(int): batch size batch_size(int): batch size
...@@ -172,32 +100,16 @@ class DatasetBase(object): ...@@ -172,32 +100,16 @@ class DatasetBase(object):
""" """
self.proto_desc.batch_size = batch_size self.proto_desc.batch_size = batch_size
def set_pv_batch_size(self, pv_batch_size): def _set_thread(self, thread_num):
"""
Set pv batch size. It will be effective during enable_pv_merge
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_pv_batch(128)
Args:
pv_batch_size(int): pv batch size
"""
self.proto_desc.pv_batch_size = pv_batch_size
def set_thread(self, thread_num):
""" """
Set thread num, it is the num of readers. Set thread num, it is the num of readers.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_thread(12) dataset._set_thread(12)
Args: Args:
thread_num(int): thread num thread_num(int): thread num
...@@ -212,8 +124,8 @@ class DatasetBase(object): ...@@ -212,8 +124,8 @@ class DatasetBase(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_filelist(['a.txt', 'b.txt']) dataset.set_filelist(['a.txt', 'b.txt'])
Args: Args:
...@@ -222,19 +134,19 @@ class DatasetBase(object): ...@@ -222,19 +134,19 @@ class DatasetBase(object):
self.dataset.set_filelist(filelist) self.dataset.set_filelist(filelist)
self.filelist = filelist self.filelist = filelist
def set_input_type(self, input_type): def _set_input_type(self, input_type):
self.proto_desc.input_type = input_type self.proto_desc.input_type = input_type
def set_use_var(self, var_list): def _set_use_var(self, var_list):
""" """
Set Variables which you will use. Set Variables which you will use.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_use_var([data, label]) dataset._set_use_var([data, label])
Args: Args:
var_list(list): variable list var_list(list): variable list
...@@ -253,19 +165,19 @@ class DatasetBase(object): ...@@ -253,19 +165,19 @@ class DatasetBase(object):
slot_var.type = "uint64" slot_var.type = "uint64"
else: else:
raise ValueError( raise ValueError(
"Currently, fluid.dataset only supports dtype=float32 and dtype=int64" "Currently, paddle.distributed.fleet.dataset only supports dtype=float32 and dtype=int64"
) )
def set_hdfs_config(self, fs_name, fs_ugi): def _set_hdfs_config(self, fs_name, fs_ugi):
""" """
Set hdfs config: fs name ad ugi Set hdfs config: fs name ad ugi
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_hdfs_config("my_fs_name", "my_fs_ugi") dataset._set_hdfs_config("my_fs_name", "my_fs_ugi")
Args: Args:
fs_name(str): fs name fs_name(str): fs name
...@@ -273,16 +185,16 @@ class DatasetBase(object): ...@@ -273,16 +185,16 @@ class DatasetBase(object):
""" """
self.dataset.set_hdfs_config(fs_name, fs_ugi) self.dataset.set_hdfs_config(fs_name, fs_ugi)
def set_download_cmd(self, download_cmd): def _set_download_cmd(self, download_cmd):
""" """
Set customized download cmd: download_cmd Set customized download cmd: download_cmd
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_download_cmd("./read_from_afs") dataset._set_download_cmd("./read_from_afs")
Args: Args:
download_cmd(str): customized download command download_cmd(str): customized download command
...@@ -297,22 +209,22 @@ class DatasetBase(object): ...@@ -297,22 +209,22 @@ class DatasetBase(object):
if self.thread_num > len(self.filelist): if self.thread_num > len(self.filelist):
self.thread_num = len(self.filelist) self.thread_num = len(self.filelist)
self.dataset.set_thread_num(self.thread_num) self.dataset.set_thread_num(self.thread_num)
self.dataset.set_data_feed_desc(self.desc()) self.dataset.set_data_feed_desc(self._desc())
self.dataset.create_readers() self.dataset.create_readers()
def _finish_to_run(self): def _finish_to_run(self):
self.dataset.destroy_readers() self.dataset.destroy_readers()
def desc(self): def _desc(self):
""" """
Returns a protobuf message for this DataFeedDesc Returns a protobuf message for this DataFeedDesc
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset() dataset = paddle.distributed.fleet.DatasetBase()
print(dataset.desc()) print(dataset._desc())
Returns: Returns:
A string message A string message
...@@ -330,10 +242,10 @@ class InMemoryDataset(DatasetBase): ...@@ -330,10 +242,10 @@ class InMemoryDataset(DatasetBase):
""" """
InMemoryDataset, it will load data into memory InMemoryDataset, it will load data into memory
and shuffle data before training. and shuffle data before training.
This class should be created by DatasetFactory
Example: Example:
dataset = paddle.fluid.DatasetFactory().create_dataset("InMemoryDataset") import paddle
dataset = paddle.distributed.InMemoryDataset()
""" """
def __init__(self): def __init__(self):
...@@ -351,7 +263,229 @@ class InMemoryDataset(DatasetBase): ...@@ -351,7 +263,229 @@ class InMemoryDataset(DatasetBase):
self.merge_by_lineid = False self.merge_by_lineid = False
self.fleet_send_sleep_seconds = None self.fleet_send_sleep_seconds = None
def set_feed_type(self, data_feed_type): def _init_distributed_settings(self, **kwargs):
"""
should be called only once in user's python scripts to initialize distributed-related setings of dataset instance
Args:
kwargs: Keyword arguments. Currently, we support following keys in **kwargs:
merge_size(int): ins size to merge, if merge_size > 0, set merge by line id,
instances of same line id will be merged after shuffle,
you should parse line id in data generator. default is -1.
parse_ins_id(bool): Set if Dataset need to parse ins_id. default is False.
parse_content(bool): Set if Dataset need to parse content. default is False.
fleet_send_batch_size(int): Set fleet send batch size in one rpc, default is 1024
fleet_send_sleep_seconds(int): Set fleet send sleep time, default is 0
fea_eval(bool): Set if Dataset need to do feature importance evaluation using slots shuffle.
default is False.
candidate_size(int): if fea_eval is set True, set the candidate size used in slots shuffle.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=[])
dataset._init_distributed_settings(
parse_ins_id=True,
parse_content=True,
fea_eval=True,
candidate_size=10000)
"""
merge_size = kwargs.get("merge_size", -1)
if merge_size > 0:
self._set_merge_by_lineid(merge_size)
parse_ins_id = kwargs.get("parse_ins_id", False)
self._set_parse_ins_id(parse_ins_id)
parse_content = kwargs.get("parse_content", False)
self._set_parse_content(parse_content)
fleet_send_batch_size = kwargs.get("fleet_send_batch_size", None)
if fleet_send_batch_size:
self._set_fleet_send_batch_size(fleet_send_batch_size)
fleet_send_sleep_seconds = kwargs.get("fleet_send_sleep_seconds", None)
if fleet_send_sleep_seconds:
self._set_fleet_send_sleep_seconds(fleet_send_sleep_seconds)
fea_eval = kwargs.get("fea_eval", False)
if fea_eval:
candidate_size = kwargs.get("candidate_size", 10000)
self._set_fea_eval(candidate_size, True)
def update_settings(self, **kwargs):
"""
should be called in user's python scripts to update setings of dataset instance
Args:
kwargs: Keyword arguments. Currently, we support following keys in **kwargs,
including single node settings and advanced distributed related settings:
batch_size(int): batch size. It will be effective during training. default is 1.
thread_num(int): thread num, it is the num of readers. default is 1.
use_var(list): list of variables. Variables which you will use. default is [].
input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0.
fs_name(str): fs name. default is "".
fs_ugi(str): fs ugi. default is "".
pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat"
download_cmd(str): customized download command. default is "cat"
data_feed_type(str): data feed type used in c++ code. default is "MultiSlotInMemoryDataFeed".
queue_num(int): Dataset output queue num, training threads get data from queues. default is-1, which is set same as thread number in c++.
merge_size(int): ins size to merge, if merge_size > 0, set merge by line id,
instances of same line id will be merged after shuffle,
you should parse line id in data generator. default is -1.
parse_ins_id(bool): Set if Dataset need to parse ins_id. default is False.
parse_content(bool): Set if Dataset need to parse content. default is False.
fleet_send_batch_size(int): Set fleet send batch size in one rpc, default is 1024
fleet_send_sleep_seconds(int): Set fleet send sleep time, default is 0
fea_eval(bool): Set if Dataset need to do feature importance evaluation using slots shuffle.
default is False.
candidate_size(int): if fea_eval is set True, set the candidate size used in slots shuffle.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=[])
dataset._init_distributed_settings(
parse_ins_id=True,
parse_content=True,
fea_eval=True,
candidate_size=10000)
dataset.update_settings(batch_size=2)
"""
for key in kwargs:
if key == "pipe_command":
self._set_pipe_command(kwargs[key])
elif key == "batch_size":
self._set_batch_size(kwargs[key])
elif key == "thread_num":
self._set_thread(kwargs[key])
elif key == "use_var":
self._set_use_var(kwargs[key])
elif key == "input_type":
self._set_input_type(kwargs[key])
elif key == "fs_name" and "fs_ugi" in kwargs:
self._set_hdfs_config(kwargs[key], kwargs["fs_ugi"])
elif key == "download_cmd":
self._set_download_cmd(kwargs[key])
elif key == "merge_size" and kwargs.get("merge_size", -1) > 0:
self._set_merge_by_lineid(kwargs[key])
elif key == "parse_ins_id":
self._set_parse_ins_id(kwargs[key])
elif key == "parse_content":
self._set_parse_content(kwargs[key])
elif key == "fleet_send_batch_size":
self._set_fleet_send_batch_size(kwargs[key])
elif key == "fleet_send_sleep_seconds":
self._set_fleet_send_sleep_seconds(kwargs[key])
elif key == "fea_eval" and kwargs[key] == True:
candidate_size = kwargs.get("candidate_size", 10000)
self._set_fea_eval(candidate_size, True)
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
Args:
kwargs: Keyword arguments. Currently, we support following keys in **kwargs:
batch_size(int): batch size. It will be effective during training. default is 1.
thread_num(int): thread num, it is the num of readers. default is 1.
use_var(list): list of variables. Variables which you will use. default is [].
input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0.
fs_name(str): fs name. default is "".
fs_ugi(str): fs ugi. default is "".
pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat"
download_cmd(str): customized download command. default is "cat"
data_feed_type(str): data feed type used in c++ code. default is "MultiSlotInMemoryDataFeed".
queue_num(int): Dataset output queue num, training threads get data from queues. default is -1, which is set same as thread number in c++.
Examples:
.. code-block:: python
import paddle
with open("test_queue_dataset_run_a.txt", "w") as f:
data = "2 1 2 2 5 4 2 2 7 2 1 3\n"
data += "2 6 2 2 1 4 2 2 4 2 2 3\n"
data += "2 5 2 2 9 9 2 2 7 2 1 3\n"
data += "2 7 2 2 1 9 2 3 7 2 5 3\n"
f.write(data)
with open("test_queue_dataset_run_b.txt", "w") as f:
data = "2 1 2 2 5 4 2 2 7 2 1 3\n"
data += "2 6 2 2 1 4 2 2 4 2 2 3\n"
data += "2 5 2 2 9 9 2 2 7 2 1 3\n"
data += "2 7 2 2 1 9 2 3 7 2 5 3\n"
f.write(data)
slots = ["slot1", "slot2", "slot3", "slot4"]
slots_vars = []
for slot in slots:
var = fluid.data(
name=slot, shape=[None, 1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.load_into_memory()
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())
exe.train_from_dataset(fluid.default_main_program(),
dataset)
os.remove("./test_queue_dataset_run_a.txt")
os.remove("./test_queue_dataset_run_b.txt")
"""
batch_size = kwargs.get("batch_size", 1)
thread_num = kwargs.get("thread_num", 1)
use_var = kwargs.get("use_var", [])
input_type = kwargs.get("input_type", 0)
fs_name = kwargs.get("fs_name", "")
fs_ugi = kwargs.get("fs_ugi", "")
pipe_command = kwargs.get("pipe_command", "cat")
download_cmd = kwargs.get("download_cmd", "cat")
super(InMemoryDataset, self).init(
batch_size=batch_size,
thread_num=thread_num,
use_var=use_var,
pipe_command=pipe_command,
input_type=input_type,
fs_name=fs_name,
fs_ugi=fs_ugi,
download_cmd=download_cmd)
data_feed_type = kwargs.get("data_feed_type",
"MultiSlotInMemoryDataFeed")
self._set_feed_type(data_feed_type)
if kwargs.get("queue_num", -1) > 0:
queue_num = kwargs.get("queue_num", -1)
self._set_queue_num(queue_num)
def _set_feed_type(self, data_feed_type):
""" """
Set data_feed_desc Set data_feed_desc
""" """
...@@ -373,7 +507,7 @@ class InMemoryDataset(DatasetBase): ...@@ -373,7 +507,7 @@ class InMemoryDataset(DatasetBase):
self.dataset.set_parse_logkey(self.parse_logkey) self.dataset.set_parse_logkey(self.parse_logkey)
self.dataset.set_merge_by_sid(self.merge_by_sid) self.dataset.set_merge_by_sid(self.merge_by_sid)
self.dataset.set_enable_pv_merge(self.enable_pv_merge) self.dataset.set_enable_pv_merge(self.enable_pv_merge)
self.dataset.set_data_feed_desc(self.desc()) self.dataset.set_data_feed_desc(self._desc())
self.dataset.create_channel() self.dataset.create_channel()
self.dataset.create_readers() self.dataset.create_readers()
...@@ -387,7 +521,7 @@ class InMemoryDataset(DatasetBase): ...@@ -387,7 +521,7 @@ class InMemoryDataset(DatasetBase):
self.dataset.dynamic_adjust_channel_num(self.thread_num, False) self.dataset.dynamic_adjust_channel_num(self.thread_num, False)
self.dataset.dynamic_adjust_readers_num(self.thread_num) self.dataset.dynamic_adjust_readers_num(self.thread_num)
def set_queue_num(self, queue_num): def _set_queue_num(self, queue_num):
""" """
Set Dataset output queue num, training threads get data from queues Set Dataset output queue num, training threads get data from queues
...@@ -397,17 +531,17 @@ class InMemoryDataset(DatasetBase): ...@@ -397,17 +531,17 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
dataset.set_queue_num(12) dataset._set_queue_num(12)
""" """
self.is_user_set_queue_num = True self.is_user_set_queue_num = True
self.queue_num = queue_num self.queue_num = queue_num
def set_parse_ins_id(self, parse_ins_id): def _set_parse_ins_id(self, parse_ins_id):
""" """
Set id Dataset need to parse insid Set if Dataset need to parse insid
Args: Args:
parse_ins_id(bool): if parse ins_id or not parse_ins_id(bool): if parse ins_id or not
...@@ -415,14 +549,14 @@ class InMemoryDataset(DatasetBase): ...@@ -415,14 +549,14 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
dataset.set_parse_ins_id(True) dataset._set_parse_ins_id(True)
""" """
self.parse_ins_id = parse_ins_id self.parse_ins_id = parse_ins_id
def set_parse_content(self, parse_content): def _set_parse_content(self, parse_content):
""" """
Set if Dataset need to parse content Set if Dataset need to parse content
...@@ -432,120 +566,14 @@ class InMemoryDataset(DatasetBase): ...@@ -432,120 +566,14 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
dataset.set_parse_content(True) dataset._set_parse_content(True)
""" """
self.parse_content = parse_content self.parse_content = parse_content
def set_parse_logkey(self, parse_logkey): def _set_fleet_send_batch_size(self, fleet_send_batch_size=1024):
"""
Set if Dataset need to parse logkey
Args:
parse_content(bool): if parse logkey or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_parse_logkey(True)
"""
self.parse_logkey = parse_logkey
def set_merge_by_sid(self, merge_by_sid):
"""
Set if Dataset need to merge sid. If not, one ins means one Pv.
Args:
merge_by_sid(bool): if merge sid or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_merge_by_sid(True)
"""
self.merge_by_sid = merge_by_sid
def set_enable_pv_merge(self, enable_pv_merge):
"""
Set if Dataset need to merge pv.
Args:
enable_pv_merge(bool): if enable_pv_merge or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_enable_pv_merge(True)
"""
self.enable_pv_merge = enable_pv_merge
def preprocess_instance(self):
"""
Merge pv instance and convey it from input_channel to input_pv_channel.
It will be effective when enable_pv_merge_ is True.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
"""
self.dataset.preprocess_instance()
def set_current_phase(self, current_phase):
"""
Set current phase in train. It is useful for untest.
current_phase : 1 for join, 0 for update.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.set_current_phase(1)
"""
self.dataset.set_current_phase(current_phase)
def postprocess_instance(self):
"""
Divide pv instance and convey it to input_channel.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
exe.train_from_dataset(dataset)
dataset.postprocess_instance()
"""
self.dataset.postprocess_instance()
def set_fleet_send_batch_size(self, fleet_send_batch_size=1024):
""" """
Set fleet send batch size, default is 1024 Set fleet send batch size, default is 1024
...@@ -555,14 +583,14 @@ class InMemoryDataset(DatasetBase): ...@@ -555,14 +583,14 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
dataset.set_fleet_send_batch_size(800) dataset._set_fleet_send_batch_size(800)
""" """
self.fleet_send_batch_size = fleet_send_batch_size self.fleet_send_batch_size = fleet_send_batch_size
def set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0): def _set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0):
""" """
Set fleet send sleep time, default is 0 Set fleet send sleep time, default is 0
...@@ -572,14 +600,14 @@ class InMemoryDataset(DatasetBase): ...@@ -572,14 +600,14 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
dataset.set_fleet_send_sleep_seconds(2) dataset._set_fleet_send_sleep_seconds(2)
""" """
self.fleet_send_sleep_seconds = fleet_send_sleep_seconds self.fleet_send_sleep_seconds = fleet_send_sleep_seconds
def set_merge_by_lineid(self, merge_size=2): def _set_merge_by_lineid(self, merge_size=2):
""" """
Set merge by line id, instances of same line id will be merged after Set merge by line id, instances of same line id will be merged after
shuffle, you should parse line id in data generator. shuffle, you should parse line id in data generator.
...@@ -590,22 +618,22 @@ class InMemoryDataset(DatasetBase): ...@@ -590,22 +618,22 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
dataset.set_merge_by_lineid() dataset._set_merge_by_lineid()
""" """
self.dataset.set_merge_by_lineid(merge_size) self.dataset.set_merge_by_lineid(merge_size)
self.merge_by_lineid = True self.merge_by_lineid = True
self.parse_ins_id = True self.parse_ins_id = True
def set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num): def _set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num):
self.dataset.set_generate_unique_feasigns(generate_uni_feasigns) self.dataset.set_generate_unique_feasigns(generate_uni_feasigns)
self.gen_uni_feasigns = generate_uni_feasigns self.gen_uni_feasigns = generate_uni_feasigns
self.local_shard_num = shard_num self.local_shard_num = shard_num
def generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num, def _generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num,
consume_thread_num, shard_num): consume_thread_num, shard_num):
self.dataset.generate_local_tables_unlock( self.dataset.generate_local_tables_unlock(
table_id, fea_dim, read_thread_num, consume_thread_num, shard_num) table_id, fea_dim, read_thread_num, consume_thread_num, shard_num)
...@@ -616,8 +644,8 @@ class InMemoryDataset(DatasetBase): ...@@ -616,8 +644,8 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -635,8 +663,8 @@ class InMemoryDataset(DatasetBase): ...@@ -635,8 +663,8 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.preload_into_memory() dataset.preload_into_memory()
...@@ -656,8 +684,8 @@ class InMemoryDataset(DatasetBase): ...@@ -656,8 +684,8 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.preload_into_memory() dataset.preload_into_memory()
...@@ -673,8 +701,8 @@ class InMemoryDataset(DatasetBase): ...@@ -673,8 +701,8 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -692,9 +720,9 @@ class InMemoryDataset(DatasetBase): ...@@ -692,9 +720,9 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -736,9 +764,9 @@ class InMemoryDataset(DatasetBase): ...@@ -736,9 +764,9 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -751,30 +779,6 @@ class InMemoryDataset(DatasetBase): ...@@ -751,30 +779,6 @@ class InMemoryDataset(DatasetBase):
""" """
self.dataset.release_memory() self.dataset.release_memory()
def get_pv_data_size(self):
"""
Get memory data size of Pv, user can call this function to know the pv num
of ins in all workers after load into memory.
Note:
This function may cause bad performance, because it has barrier
Returns:
The size of memory pv data.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
print dataset.get_pv_data_size()
"""
return self.dataset.get_pv_data_size()
def get_memory_data_size(self, fleet=None): def get_memory_data_size(self, fleet=None):
""" """
Get memory data size, user can call this function to know the num Get memory data size, user can call this function to know the num
...@@ -792,9 +796,9 @@ class InMemoryDataset(DatasetBase): ...@@ -792,9 +796,9 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -829,9 +833,9 @@ class InMemoryDataset(DatasetBase): ...@@ -829,9 +833,9 @@ class InMemoryDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -849,6 +853,51 @@ class InMemoryDataset(DatasetBase): ...@@ -849,6 +853,51 @@ class InMemoryDataset(DatasetBase):
return global_data_size[0] return global_data_size[0]
return local_data_size[0] return local_data_size[0]
def _set_fea_eval(self, record_candidate_size, fea_eval=True):
"""
set fea eval mode for slots shuffle to debug the importance level of
slots(features), fea_eval need to be set True for slots shuffle.
Args:
record_candidate_size(int): size of instances candidate to shuffle
one slot
fea_eval(bool): whether enable fea eval mode to enable slots shuffle.
default is True.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_fea_eval(1000000, True)
"""
if fea_eval:
self.dataset.set_fea_eval(fea_eval, record_candidate_size)
self.fea_eval = fea_eval
def slots_shuffle(self, slots):
"""
Slots Shuffle
Slots Shuffle is a shuffle method in slots level, which is usually used
in sparse feature with large scale of instances. To compare the metric, i.e.
auc while doing slots shuffle on one or several slots with baseline to
evaluate the importance level of slots(features).
Args:
slots(list[string]): the set of slots(string) to do slots shuffle.
Examples:
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset.set_merge_by_lineid()
#suppose there is a slot 0
dataset.slots_shuffle(['0'])
"""
if self.fea_eval:
slots_set = set(slots)
self.dataset.slots_shuffle(slots_set)
class QueueDataset(DatasetBase): class QueueDataset(DatasetBase):
""" """
...@@ -857,19 +906,24 @@ class QueueDataset(DatasetBase): ...@@ -857,19 +906,24 @@ class QueueDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("QueueDataset") dataset = paddle.distributed.QueueDataset()
""" """
def __init__(self): def __init__(self):
""" """
Initialize QueueDataset Initialize QueueDataset
This class should be created by DatasetFactory
""" """
super(QueueDataset, self).__init__() super(QueueDataset, self).__init__()
self.proto_desc.name = "MultiSlotDataFeed" self.proto_desc.name = "MultiSlotDataFeed"
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(QueueDataset, self).init(**kwargs)
def _prepare_to_run(self): def _prepare_to_run(self):
""" """
Set data_feed_desc/thread num/filelist before run, Set data_feed_desc/thread num/filelist before run,
...@@ -881,57 +935,9 @@ class QueueDataset(DatasetBase): ...@@ -881,57 +935,9 @@ class QueueDataset(DatasetBase):
self.thread_num = 1 self.thread_num = 1
self.dataset.set_thread_num(self.thread_num) self.dataset.set_thread_num(self.thread_num)
self.dataset.set_filelist(self.filelist) self.dataset.set_filelist(self.filelist)
self.dataset.set_data_feed_desc(self.desc()) self.dataset.set_data_feed_desc(self._desc())
self.dataset.create_readers() self.dataset.create_readers()
def local_shuffle(self):
"""
Local shuffle data.
Local shuffle is not supported in QueueDataset
NotImplementedError will be raised
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
dataset.local_shuffle()
Raises:
NotImplementedError: QueueDataset does not support local shuffle
"""
raise NotImplementedError(
"QueueDataset does not support local shuffle, "
"please use InMemoryDataset for local_shuffle")
def global_shuffle(self, fleet=None):
"""
Global shuffle data.
Global shuffle is not supported in QueueDataset
NotImplementedError will be raised
Args:
fleet(Fleet): fleet singleton. Default None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
dataset.global_shuffle(fleet)
Raises:
NotImplementedError: QueueDataset does not support global shuffle
"""
raise NotImplementedError(
"QueueDataset does not support global shuffle, "
"please use InMemoryDataset for global_shuffle")
class FileInstantDataset(DatasetBase): class FileInstantDataset(DatasetBase):
""" """
...@@ -940,35 +946,22 @@ class FileInstantDataset(DatasetBase): ...@@ -940,35 +946,22 @@ class FileInstantDataset(DatasetBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory.create_dataset("FileInstantDataset") dataset = paddle.distributed.fleet.FileInstantDataset()
""" """
def __init__(self): def __init__(self):
""" """
Initialize FileInstantDataset Initialize FileInstantDataset
This class should be created by DatasetFactory
""" """
super(FileInstantDataset, self).__init__() super(FileInstantDataset, self).__init__()
self.proto_desc.name = "MultiSlotFileInstantDataFeed" self.proto_desc.name = "MultiSlotFileInstantDataFeed"
def local_shuffle(self): def init(self, **kwargs):
""" """
Local shuffle should be called only once in user's python scripts to initialize setings of dataset instance
FileInstantDataset does not support local shuffle
""" """
raise NotImplementedError( super(FileInstantDataset, self).init(**kwargs)
"FileInstantDataset does not support local shuffle, "
"please use InMemoryDataset for local_shuffle")
def global_shuffle(self, fleet=None):
"""
Global shuffle
FileInstantDataset does not support global shuffle
"""
raise NotImplementedError(
"FileInstantDataset does not support global shuffle, "
"please use InMemoryDataset for global_shuffle")
class BoxPSDataset(InMemoryDataset): class BoxPSDataset(InMemoryDataset):
...@@ -978,19 +971,119 @@ class BoxPSDataset(InMemoryDataset): ...@@ -978,19 +971,119 @@ class BoxPSDataset(InMemoryDataset):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
""" """
def __init__(self): def __init__(self):
""" """
Initialize BoxPSDataset Initialize BoxPSDataset
This class should be created by DatasetFactory
""" """
super(BoxPSDataset, self).__init__() super(BoxPSDataset, self).__init__()
self.boxps = core.BoxPS(self.dataset) self.boxps = core.BoxPS(self.dataset)
self.proto_desc.name = "PaddleBoxDataFeed" self.proto_desc.name = "PaddleBoxDataFeed"
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(BoxPSDataset, self).init(**kwargs)
rank_offset = kwargs.get("rank_offset", "")
self._set_rank_offset(rank_offset)
pv_batch_size = kwargs.get("pv_batch_size", 1)
self._set_pv_batch_size(pv_batch_size)
parse_logkey = kwargs.get("parse_logkey", False)
self._set_parse_logkey(parse_logkey)
merge_by_sid = kwargs.get("merge_by_sid", False)
self._set_merge_by_sid(merge_by_sid)
enable_pv_merge = kwargs.get("enable_pv_merge", False)
self._set_enable_pv_merge(enable_pv_merge)
def _set_rank_offset(self, rank_offset):
"""
Set rank_offset for merge_pv. It set the message of Pv.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_rank_offset("rank_offset")
Args:
rank_offset(str): rank_offset's name
"""
self.proto_desc.rank_offset = rank_offset
def _set_pv_batch_size(self, pv_batch_size):
"""
Set pv batch size. It will be effective during enable_pv_merge
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_pv_batch_size(128)
Args:
pv_batch_size(int): pv batch size
"""
self.proto_desc.pv_batch_size = pv_batch_size
def _set_parse_logkey(self, parse_logkey):
"""
Set if Dataset need to parse logkey
Args:
parse_content(bool): if parse logkey or not
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_parse_logkey(True)
"""
self.parse_logkey = parse_logkey
def _set_merge_by_sid(self, merge_by_sid):
"""
Set if Dataset need to merge sid. If not, one ins means one Pv.
Args:
merge_by_sid(bool): if merge sid or not
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_merge_by_sid(True)
"""
self.merge_by_sid = merge_by_sid
def _set_enable_pv_merge(self, enable_pv_merge):
"""
Set if Dataset need to merge pv.
Args:
enable_pv_merge(bool): if enable_pv_merge or not
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_enable_pv_merge(True)
"""
self.enable_pv_merge = enable_pv_merge
def set_date(self, date): def set_date(self, date):
""" """
Workaround for date Workaround for date
...@@ -1008,8 +1101,8 @@ class BoxPSDataset(InMemoryDataset): ...@@ -1008,8 +1101,8 @@ class BoxPSDataset(InMemoryDataset):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.begin_pass() dataset.begin_pass()
""" """
self.boxps.begin_pass() self.boxps.begin_pass()
...@@ -1021,8 +1114,8 @@ class BoxPSDataset(InMemoryDataset): ...@@ -1021,8 +1114,8 @@ class BoxPSDataset(InMemoryDataset):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.end_pass(True) dataset.end_pass(True)
""" """
self.boxps.end_pass(need_save_delta) self.boxps.end_pass(need_save_delta)
...@@ -1034,8 +1127,8 @@ class BoxPSDataset(InMemoryDataset): ...@@ -1034,8 +1127,8 @@ class BoxPSDataset(InMemoryDataset):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.preload_into_memory() dataset.preload_into_memory()
...@@ -1049,8 +1142,8 @@ class BoxPSDataset(InMemoryDataset): ...@@ -1049,8 +1142,8 @@ class BoxPSDataset(InMemoryDataset):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.load_into_memory() dataset.load_into_memory()
...@@ -1064,8 +1157,8 @@ class BoxPSDataset(InMemoryDataset): ...@@ -1064,8 +1157,8 @@ class BoxPSDataset(InMemoryDataset):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"] filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.preload_into_memory() dataset.preload_into_memory()
...@@ -1093,11 +1186,90 @@ class BoxPSDataset(InMemoryDataset): ...@@ -1093,11 +1186,90 @@ class BoxPSDataset(InMemoryDataset):
slots(list[string]): the set of slots(string) to do slots shuffle. slots(list[string]): the set of slots(string) to do slots shuffle.
Examples: Examples:
import paddle.fluid as fluid import paddle
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.set_merge_by_lineid() dataset.set_merge_by_lineid()
#suppose there is a slot 0 #suppose there is a slot 0
dataset.slots_shuffle(['0']) dataset.slots_shuffle(['0'])
""" """
slots_set = set(slots) slots_set = set(slots)
self.boxps.slots_shuffle(slots_set) self.boxps.slots_shuffle(slots_set)
def set_current_phase(self, current_phase):
"""
Set current phase in train. It is useful for untest.
current_phase : 1 for join, 0 for update.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.set_current_phase(1)
"""
self.dataset.set_current_phase(current_phase)
def get_pv_data_size(self):
"""
Get memory data size of Pv, user can call this function to know the pv num
of ins in all workers after load into memory.
Note:
This function may cause bad performance, because it has barrier
Returns:
The size of memory pv data.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
print dataset.get_pv_data_size()
"""
return self.dataset.get_pv_data_size()
def preprocess_instance(self):
"""
Merge pv instance and convey it from input_channel to input_pv_channel.
It will be effective when enable_pv_merge_ is True.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
"""
self.dataset.preprocess_instance()
def postprocess_instance(self):
"""
Divide pv instance and convey it to input_channel.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
exe.train_from_dataset(dataset)
dataset.postprocess_instance()
"""
self.dataset.postprocess_instance()
...@@ -157,17 +157,20 @@ def get_cluster_from_args(args, gpus): ...@@ -157,17 +157,20 @@ def get_cluster_from_args(args, gpus):
free_ports = [x for x in range(start_port, start_port + len(gpus))] free_ports = [x for x in range(start_port, start_port + len(gpus))]
return get_cluster(node_ips, node_ip, free_ports, gpus) trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports])
return get_cluster(node_ips, node_ip, trainer_endpoints, gpus)
def get_gpus(gpus): def get_gpus(gpus):
if gpus is None: if gpus is None:
gpus_num = fluid.core.get_cuda_device_count() gpus_num = fluid.core.get_cuda_device_count()
gpus = [str(x) for x in range(0, gpus_num)] res_gpus = [str(x) for x in range(0, gpus_num)]
else: else:
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
if cuda_visible_devices is None or cuda_visible_devices == "": if cuda_visible_devices is None or cuda_visible_devices == "":
gpus = [x.strip() for x in gpus.split(',')] res_gpus = [x.strip() for x in gpus.split(',')]
else: else:
# change gpus into relative values # change gpus into relative values
# e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.gpus=4,5,6,7; # e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.gpus=4,5,6,7;
...@@ -177,12 +180,16 @@ def get_gpus(gpus): ...@@ -177,12 +180,16 @@ def get_gpus(gpus):
assert x in cuda_visible_devices_list, "Can't find "\ assert x in cuda_visible_devices_list, "Can't find "\
"your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\ "your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\
% (x, cuda_visible_devices) % (x, cuda_visible_devices)
gpus = [ res_gpus = [
cuda_visible_devices_list.index(x.strip()) cuda_visible_devices_list.index(x.strip())
for x in gpus.split(',') for x in gpus.split(',')
] ]
logger.info("Change selected_gpus into reletive values. --ips:{} "
"will change into relative_ips:{} according to your "
"CUDA_VISIBLE_DEVICES:{}".format(
gpus, res_gpus, cuda_visible_devices_list))
return gpus return res_gpus
def launch_collective(args): def launch_collective(args):
......
...@@ -227,18 +227,23 @@ def get_logger(log_level=20, name="root"): ...@@ -227,18 +227,23 @@ def get_logger(log_level=20, name="root"):
return logger return logger
def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus): def get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus):
assert type(paddle_ports) is list, "paddle_ports must be list" assert type(trainer_endpoints) is list, "trainer_endpoints must be list"
cluster = Cluster(hdfs=None) cluster = Cluster(hdfs=None)
trainer_rank = 0 trainer_rank = 0
for node_rank, ip in enumerate(node_ips): for node_rank, ip in enumerate(node_ips):
pod = Pod() pod = Pod()
pod.rank = node_rank pod.rank = node_rank
pod.addr = ip pod.addr = ip
cur_node_endpoints = trainer_endpoints[node_rank]
# when use paddlecloud, endpoints may > selected_gpus(user_defined)
assert len(cur_node_endpoints) >= len(
selected_gpus
), "current trainer_endpoints size should be greater equal than selected_gpus size."
for i in range(len(selected_gpus)): for i in range(len(selected_gpus)):
trainer = Trainer() trainer = Trainer()
trainer.gpus.append(selected_gpus[i]) trainer.gpus.append(selected_gpus[i])
trainer.endpoint = "%s:%d" % (ip, paddle_ports[i]) trainer.endpoint = "%s" % (cur_node_endpoints[i])
trainer.rank = trainer_rank trainer.rank = trainer_rank
trainer_rank += 1 trainer_rank += 1
...@@ -424,10 +429,6 @@ def start_local_trainers(cluster, ...@@ -424,10 +429,6 @@ def start_local_trainers(cluster,
len(pod.trainers), len(pod.trainers),
pretty_print_envs(proc_env, ("Distributed Envs", pretty_print_envs(proc_env, ("Distributed Envs",
"Value")))) "Value"))))
logger.info(
"More details for debug about commands and environments are written in {}/run.sh".
format(log_dir))
fn = None fn = None
if log_dir is not None: if log_dir is not None:
os.system("mkdir -p {}".format(log_dir)) os.system("mkdir -p {}".format(log_dir))
......
...@@ -38,7 +38,7 @@ class RecomputeOptimizer(MetaOptimizerBase): ...@@ -38,7 +38,7 @@ class RecomputeOptimizer(MetaOptimizerBase):
list(user_defined_strategy.recompute_configs["checkpoints"])) list(user_defined_strategy.recompute_configs["checkpoints"]))
def _can_apply(self): def _can_apply(self):
if self.role_maker._is_collective: if not self.role_maker._is_collective:
return False return False
if self.user_defined_strategy.recompute == True: if self.user_defined_strategy.recompute == True:
......
...@@ -160,18 +160,21 @@ def get_cluster_from_args(args, selected_gpus): ...@@ -160,18 +160,21 @@ def get_cluster_from_args(args, selected_gpus):
x for x in range(started_port, started_port + len(selected_gpus)) x for x in range(started_port, started_port + len(selected_gpus))
] ]
return get_cluster(node_ips, node_ip, free_ports, selected_gpus) trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports])
return get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus)
def get_gpus(selected_gpus): def get_gpus(selected_gpus):
if selected_gpus is None: if selected_gpus is None:
from paddle.fluid import core from paddle.fluid import core
gpus_num = core.get_cuda_device_count() gpus_num = core.get_cuda_device_count()
selected_gpus = [str(x) for x in range(0, gpus_num)] gpus = [str(x) for x in range(0, gpus_num)]
else: else:
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
if cuda_visible_devices is None or cuda_visible_devices == "": if cuda_visible_devices is None or cuda_visible_devices == "":
selected_gpus = [x.strip() for x in selected_gpus.split(',')] gpus = [x.strip() for x in selected_gpus.split(',')]
else: else:
# change selected_gpus into relative values # change selected_gpus into relative values
# e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.selected_gpus=4,5,6,7; # e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.selected_gpus=4,5,6,7;
...@@ -181,12 +184,16 @@ def get_gpus(selected_gpus): ...@@ -181,12 +184,16 @@ def get_gpus(selected_gpus):
assert x in cuda_visible_devices_list, "Can't find "\ assert x in cuda_visible_devices_list, "Can't find "\
"your selected_gpus %s in CUDA_VISIBLE_DEVICES[%s]."\ "your selected_gpus %s in CUDA_VISIBLE_DEVICES[%s]."\
% (x, cuda_visible_devices) % (x, cuda_visible_devices)
selected_gpus = [ gpus = [
cuda_visible_devices_list.index(x.strip()) cuda_visible_devices_list.index(x.strip())
for x in selected_gpus.split(',') for x in selected_gpus.split(',')
] ]
logger.info("Change selected_gpus into reletive values. --ips:{} "
"will change into relative_ips:{} according to your "
"CUDA_VISIBLE_DEVICES:{}".format(
selected_gpus, gpus, cuda_visible_devices_list))
return selected_gpus return gpus
def get_cluster_and_pod(args): def get_cluster_and_pod(args):
......
...@@ -227,18 +227,23 @@ def get_logger(log_level, name="root"): ...@@ -227,18 +227,23 @@ def get_logger(log_level, name="root"):
return logger return logger
def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus): def get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus):
assert type(paddle_ports) is list, "paddle_ports must be list" assert type(trainer_endpoints) is list, "trainer_endpoints must be list"
cluster = Cluster(hdfs=None) cluster = Cluster(hdfs=None)
trainer_rank = 0 trainer_rank = 0
for node_rank, ip in enumerate(node_ips): for node_rank, ip in enumerate(node_ips):
pod = Pod() pod = Pod()
pod.rank = node_rank pod.rank = node_rank
pod.addr = ip pod.addr = ip
cur_node_endpoints = trainer_endpoints[node_rank]
# when use paddlecloud, endpoints may > selected_gpus(user_defined)
assert len(cur_node_endpoints) >= len(
selected_gpus
), "current trainer_endpoints size should be greater equal than selected_gpus size."
for i in range(len(selected_gpus)): for i in range(len(selected_gpus)):
trainer = Trainer() trainer = Trainer()
trainer.gpus.append(selected_gpus[i]) trainer.gpus.append(selected_gpus[i])
trainer.endpoint = "%s:%d" % (ip, paddle_ports[i]) trainer.endpoint = "%s" % (cur_node_endpoints[i])
trainer.rank = trainer_rank trainer.rank = trainer_rank
trainer_rank += 1 trainer_rank += 1
...@@ -253,7 +258,8 @@ def terminate_local_procs(procs): ...@@ -253,7 +258,8 @@ def terminate_local_procs(procs):
for p in procs: for p in procs:
if p.proc.poll() is None: if p.proc.poll() is None:
p.proc.terminate() p.proc.terminate()
p.log_fn.close() if p.log_fn:
p.log_fn.close()
logger.debug("terminate process id:{}".format(p.proc.pid)) logger.debug("terminate process id:{}".format(p.proc.pid))
#wait all process terminiated #wait all process terminiated
......
...@@ -143,7 +143,7 @@ class PostTrainingQuantization(object): ...@@ -143,7 +143,7 @@ class PostTrainingQuantization(object):
weight_quantize_type='channel_wise_abs_max', weight_quantize_type='channel_wise_abs_max',
optimize_model=False, optimize_model=False,
is_use_cache_file=False, is_use_cache_file=False,
cache_dir="./temp_post_training"): cache_dir=None):
''' '''
Constructor. Constructor.
...@@ -206,13 +206,8 @@ class PostTrainingQuantization(object): ...@@ -206,13 +206,8 @@ class PostTrainingQuantization(object):
`conv2d/depthwise_conv2d + bn`, the weights scale for all channel will `conv2d/depthwise_conv2d + bn`, the weights scale for all channel will
be different. In address this problem, fuse the pattern before be different. In address this problem, fuse the pattern before
quantization. Default False. quantization. Default False.
is_use_cache_file(bool, optional): If set is_use_cache_file as False, is_use_cache_file(bool, optional): This param is deprecated.
all temp data will be saved in memory. If set is_use_cache_file as True, cache_dir(str, optional): This param is deprecated.
it will save temp data to disk. When the fp32 model is complex or
the number of calibrate data is large, we should set is_use_cache_file
as True. Defalut is False.
cache_dir(str, optional): When is_use_cache_file is True, set cache_dir as
the directory for saving temp data. Default is ./temp_post_training.
Returns: Returns:
None None
...@@ -302,10 +297,6 @@ class PostTrainingQuantization(object): ...@@ -302,10 +297,6 @@ class PostTrainingQuantization(object):
assert op_type in self._support_quantize_op_type, \ assert op_type in self._support_quantize_op_type, \
op_type + " is not supported for quantization." op_type + " is not supported for quantization."
self._optimize_model = optimize_model self._optimize_model = optimize_model
self._is_use_cache_file = is_use_cache_file
self._cache_dir = cache_dir
if self._is_use_cache_file and not os.path.exists(self._cache_dir):
os.mkdir(self._cache_dir)
# Define variables # Define variables
self._place = self._executor.place self._place = self._executor.place
...@@ -317,11 +308,17 @@ class PostTrainingQuantization(object): ...@@ -317,11 +308,17 @@ class PostTrainingQuantization(object):
self._out_scale_op_list = _out_scale_op_list self._out_scale_op_list = _out_scale_op_list
self._quantized_weight_var_name = set() self._quantized_weight_var_name = set()
self._quantized_act_var_name = set() self._quantized_act_var_name = set()
self.weight_op_pairs = {} self._weight_op_pairs = {}
# The vars for alog = KL
self._sampling_act_abs_min_max = {}
self._sampling_act_histogram = {}
self._sampling_data = {} self._sampling_data = {}
self._quantized_var_kl_threshold = {} self._quantized_var_kl_threshold = {}
self._histogram_bins = 2048
# The vars for algo = min_max
self._quantized_var_min = {} self._quantized_var_min = {}
self._quantized_var_max = {} self._quantized_var_max = {}
# The vars for algo = abs_max
self._quantized_var_abs_max = {} self._quantized_var_abs_max = {}
def quantize(self): def quantize(self):
...@@ -339,6 +336,25 @@ class PostTrainingQuantization(object): ...@@ -339,6 +336,25 @@ class PostTrainingQuantization(object):
self._collect_target_varnames() self._collect_target_varnames()
self._set_activation_persistable() self._set_activation_persistable()
if self._algo == "KL":
_logger.info("Preparation stage ...")
batch_id = 0
for data in self._data_loader():
self._executor.run(program=self._program,
feed=data,
fetch_list=self._fetch_list,
return_numpy=False,
scope=self._scope)
self._collect_activation_abs_min_max()
if batch_id % 5 == 0:
_logger.info("Run batch: " + str(batch_id))
batch_id += 1
if self._batch_nums and batch_id >= self._batch_nums:
break
_logger.info("Finish preparation stage, all batch:" + str(batch_id))
self._init_sampling_act_histogram()
_logger.info("Sampling stage ...")
batch_id = 0 batch_id = 0
for data in self._data_loader(): for data in self._data_loader():
self._executor.run(program=self._program, self._executor.run(program=self._program,
...@@ -346,17 +362,13 @@ class PostTrainingQuantization(object): ...@@ -346,17 +362,13 @@ class PostTrainingQuantization(object):
fetch_list=self._fetch_list, fetch_list=self._fetch_list,
return_numpy=False, return_numpy=False,
scope=self._scope) scope=self._scope)
if self._algo == "KL": self._sampling()
self._sample_data(batch_id)
else:
self._sample_threshold()
if batch_id % 5 == 0: if batch_id % 5 == 0:
_logger.info("Run batch: " + str(batch_id)) _logger.info("Run batch: " + str(batch_id))
batch_id += 1 batch_id += 1
if self._batch_nums and batch_id >= self._batch_nums: if self._batch_nums and batch_id >= self._batch_nums:
break break
_logger.info("Finish all batch: " + str(batch_id)) _logger.info("Finish sampling stage, all batch: " + str(batch_id))
self._reset_activation_persistable() self._reset_activation_persistable()
...@@ -397,6 +409,7 @@ class PostTrainingQuantization(object): ...@@ -397,6 +409,7 @@ class PostTrainingQuantization(object):
target_vars=self._fetch_list, target_vars=self._fetch_list,
executor=self._executor, executor=self._executor,
main_program=self._program) main_program=self._program)
_logger.info("The quantized model is saved in " + save_model_path)
def _load_model_data(self): def _load_model_data(self):
''' '''
...@@ -454,7 +467,7 @@ class PostTrainingQuantization(object): ...@@ -454,7 +467,7 @@ class PostTrainingQuantization(object):
for var_name in var_name_list: for var_name in var_name_list:
if var_name in persistable_var_names: if var_name in persistable_var_names:
self._quantized_weight_var_name.add(var_name) self._quantized_weight_var_name.add(var_name)
self.weight_op_pairs[var_name] = op_type self._weight_op_pairs[var_name] = op_type
else: else:
self._quantized_act_var_name.add(var_name) self._quantized_act_var_name.add(var_name)
...@@ -494,20 +507,18 @@ class PostTrainingQuantization(object): ...@@ -494,20 +507,18 @@ class PostTrainingQuantization(object):
if var.name in self._quantized_act_var_name: if var.name in self._quantized_act_var_name:
var.persistable = False var.persistable = False
def _sample_threshold(self): def _sampling(self):
''' '''
Sample the input threshold(min, max, or abs_max) in every iterations. Sample the min/max, abs_max or histogram in every iterations.
''' '''
assert self._algo in ["abs_max", "min_max"], \
"The algo should be abs_max or min_max for _sample_threshold."
if self._algo == "abs_max": if self._algo == "abs_max":
self._sample_threshold_abs_max() self._sample_abs_max()
elif self._algo == "min_max": elif self._algo == "min_max":
self._sample_threshold_min_max() self._sample_min_max()
elif self._algo == "KL":
self._sample_histogram()
def _sample_threshold_abs_max(self): def _sample_abs_max(self):
assert self._algo == "abs_max", \
"The algo should be abs_max for _sample_threshold_abs_max."
# Only calculate abs_max value for weight for once # Only calculate abs_max value for weight for once
if self._quantized_var_abs_max == {}: if self._quantized_var_abs_max == {}:
for var_name in self._quantized_weight_var_name: for var_name in self._quantized_weight_var_name:
...@@ -516,7 +527,7 @@ class PostTrainingQuantization(object): ...@@ -516,7 +527,7 @@ class PostTrainingQuantization(object):
abs_max_value = float(np.max(np.abs(var_tensor))) abs_max_value = float(np.max(np.abs(var_tensor)))
elif self._weight_quantize_type == "channel_wise_abs_max": elif self._weight_quantize_type == "channel_wise_abs_max":
abs_max_value = [] abs_max_value = []
if self.weight_op_pairs[ if self._weight_op_pairs[
var_name] in _channelwise_quant_axis1_ops: var_name] in _channelwise_quant_axis1_ops:
for i in range(var_tensor.shape[1]): for i in range(var_tensor.shape[1]):
abs_max_value.append( abs_max_value.append(
...@@ -534,9 +545,7 @@ class PostTrainingQuantization(object): ...@@ -534,9 +545,7 @@ class PostTrainingQuantization(object):
(abs_max_value > self._quantized_var_abs_max[var_name]): (abs_max_value > self._quantized_var_abs_max[var_name]):
self._quantized_var_abs_max[var_name] = abs_max_value self._quantized_var_abs_max[var_name] = abs_max_value
def _sample_threshold_min_max(self): def _sample_min_max(self):
assert self._algo == "min_max", \
"The algo should be min_max for _sample_threshold_min_max."
if self._quantized_var_min == {} and self._quantized_var_max == {}: if self._quantized_var_min == {} and self._quantized_var_max == {}:
for var_name in self._quantized_weight_var_name: for var_name in self._quantized_weight_var_name:
var_tensor = _load_variable_data(self._scope, var_name) var_tensor = _load_variable_data(self._scope, var_name)
...@@ -546,7 +555,7 @@ class PostTrainingQuantization(object): ...@@ -546,7 +555,7 @@ class PostTrainingQuantization(object):
elif self._weight_quantize_type == "channel_wise_abs_max": elif self._weight_quantize_type == "channel_wise_abs_max":
min_value = [] min_value = []
max_value = [] max_value = []
if self.weight_op_pairs[ if self._weight_op_pairs[
var_name] in _channelwise_quant_axis1_ops: var_name] in _channelwise_quant_axis1_ops:
for i in range(var_tensor.shape[1]): for i in range(var_tensor.shape[1]):
min_value.append(float(np.min(var_tensor[:, i]))) min_value.append(float(np.min(var_tensor[:, i])))
...@@ -569,6 +578,14 @@ class PostTrainingQuantization(object): ...@@ -569,6 +578,14 @@ class PostTrainingQuantization(object):
(max_value > self._quantized_var_max[var_name]): (max_value > self._quantized_var_max[var_name]):
self._quantized_var_max[var_name] = max_value self._quantized_var_max[var_name] = max_value
def _sample_histogram(self):
for var_name in self._quantized_act_var_name:
var_tensor = _load_variable_data(self._scope, var_name)
var_tensor_abs = np.abs(var_tensor)
bins = self._sampling_act_histogram[var_name][1]
hist, _ = np.histogram(var_tensor_abs, bins=bins)
self._sampling_act_histogram[var_name][0] += hist
def _save_input_threhold(self): def _save_input_threhold(self):
''' '''
Save input threshold to the quantized op. Save input threshold to the quantized op.
...@@ -585,27 +602,36 @@ class PostTrainingQuantization(object): ...@@ -585,27 +602,36 @@ class PostTrainingQuantization(object):
op._set_attr(var_name + ".max", op._set_attr(var_name + ".max",
self._quantized_var_max[var_name]) self._quantized_var_max[var_name])
def _sample_data(self, iter): def _collect_activation_abs_min_max(self):
''' '''
Sample the tensor data of quantized variables, Collect the abs_min and abs_max for all activation. When algo = KL,
applied in every iteration. get the min and max value, and then calculate the threshold.
''' '''
assert self._algo == "KL", "The algo should be KL to sample data." for var_name in self._quantized_act_var_name:
if self._is_use_cache_file: var_tensor = _load_variable_data(self._scope, var_name)
for var_name in self._quantized_act_var_name: var_tensor = np.abs(var_tensor)
var_tensor = _load_variable_data(self._scope, var_name) min_value = float(np.min(var_tensor))
var_tensor = var_tensor.ravel() max_value = float(np.max(var_tensor))
save_path = os.path.join( if var_name not in self._sampling_act_abs_min_max:
self._cache_dir, self._sampling_act_abs_min_max[
var_name.replace("/", ".") + "_" + str(iter) + ".npy") var_name] = [min_value, max_value]
np.save(save_path, var_tensor) else:
else: if min_value < self._sampling_act_abs_min_max[var_name][0]:
for var_name in self._quantized_act_var_name: self._sampling_act_abs_min_max[var_name][0] = min_value
if var_name not in self._sampling_data: if max_value > self._sampling_act_abs_min_max[var_name][1]:
self._sampling_data[var_name] = [] self._sampling_act_abs_min_max[var_name][1] = max_value
var_tensor = _load_variable_data(self._scope, var_name)
var_tensor = var_tensor.ravel() def _init_sampling_act_histogram(self):
self._sampling_data[var_name].append(var_tensor) '''
Based on the min/max value, init the sampling_act_histogram.
'''
for var_name in self._quantized_act_var_name:
if var_name not in self._sampling_act_histogram:
min_val = self._sampling_act_abs_min_max[var_name][0]
max_val = self._sampling_act_abs_min_max[var_name][1]
hist, hist_edeges = np.histogram(
[], bins=self._histogram_bins, range=(min_val, max_val))
self._sampling_act_histogram[var_name] = [hist, hist_edeges]
def _calculate_kl_threshold(self): def _calculate_kl_threshold(self):
''' '''
...@@ -621,7 +647,7 @@ class PostTrainingQuantization(object): ...@@ -621,7 +647,7 @@ class PostTrainingQuantization(object):
weight_threshold = float(np.max(np.abs(weight_data))) weight_threshold = float(np.max(np.abs(weight_data)))
elif self._weight_quantize_type == "channel_wise_abs_max": elif self._weight_quantize_type == "channel_wise_abs_max":
weight_threshold = [] weight_threshold = []
if self.weight_op_pairs[ if self._weight_op_pairs[
var_name] in _channelwise_quant_axis1_ops: var_name] in _channelwise_quant_axis1_ops:
for i in range(weight_data.shape[1]): for i in range(weight_data.shape[1]):
weight_threshold.append( weight_threshold.append(
...@@ -632,25 +658,10 @@ class PostTrainingQuantization(object): ...@@ -632,25 +658,10 @@ class PostTrainingQuantization(object):
float(np.max(np.abs(weight_data[i])))) float(np.max(np.abs(weight_data[i]))))
self._quantized_var_kl_threshold[var_name] = weight_threshold self._quantized_var_kl_threshold[var_name] = weight_threshold
# KL threshold for activations for var_name in self._quantized_act_var_name:
if self._is_use_cache_file: hist, hist_edeges = self._sampling_act_histogram[var_name]
for var_name in self._quantized_act_var_name: self._quantized_var_kl_threshold[var_name] = \
sampling_data = [] self._get_kl_scaling_factor(hist, hist_edeges)
filenames = [f for f in os.listdir(self._cache_dir) \
if re.match(var_name.replace("/", ".") + '_[0-9]+.npy', f)]
for filename in filenames:
file_path = os.path.join(self._cache_dir, filename)
sampling_data.append(np.load(file_path))
os.remove(file_path)
sampling_data = np.concatenate(sampling_data)
self._quantized_var_kl_threshold[var_name] = \
self._get_kl_scaling_factor(np.abs(sampling_data))
else:
for var_name in self._quantized_act_var_name:
self._sampling_data[var_name] = np.concatenate(
self._sampling_data[var_name])
self._quantized_var_kl_threshold[var_name] = \
self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name]))
def _update_program(self): def _update_program(self):
''' '''
...@@ -765,22 +776,15 @@ class PostTrainingQuantization(object): ...@@ -765,22 +776,15 @@ class PostTrainingQuantization(object):
for var_name in out_var_names: for var_name in out_var_names:
analysis_and_save_info(op, var_name) analysis_and_save_info(op, var_name)
def _get_kl_scaling_factor(self, activation_blob, num_quantized_bins=255): def _get_kl_scaling_factor(self, hist, hist_edeges, num_quantized_bins=255):
''' '''
Using the KL-divergenc method to get the more precise scaling factor. Using the KL-divergenc method to get the more precise scaling factor.
''' '''
max_val = np.max(activation_blob) ending_iter = self._histogram_bins - 1
min_val = np.min(activation_blob) starting_iter = int(ending_iter * 0.7)
if min_val >= 0:
hist, hist_edeges = np.histogram(
activation_blob, bins=2048, range=(min_val, max_val))
ending_iter = 2047
starting_iter = int(ending_iter * 0.7)
else:
_logger.error("Please first apply abs to activation_blob.")
bin_width = hist_edeges[1] - hist_edeges[0] bin_width = hist_edeges[1] - hist_edeges[0]
P_sum = len(np.array(activation_blob).ravel()) P_sum = np.sum(np.array(hist).ravel())
min_kl_divergence = 0 min_kl_divergence = 0
min_kl_index = 0 min_kl_index = 0
kl_inited = False kl_inited = False
......
...@@ -19,6 +19,7 @@ import six ...@@ -19,6 +19,7 @@ import six
import pickle import pickle
import numpy as np import numpy as np
import paddle
from paddle import compat as cpt from paddle import compat as cpt
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid import framework from paddle.fluid import framework
...@@ -182,9 +183,9 @@ class _ProgramHolder(object): ...@@ -182,9 +183,9 @@ class _ProgramHolder(object):
super(_ProgramHolder, self).__init__() super(_ProgramHolder, self).__init__()
# input, output, persistable var info # input, output, persistable var info
self._input_names = [] self._input_descs = []
self._persistable_names = []
self._output_descs = [] self._output_descs = []
self._persistable_names = []
# execution scope # execution scope
self._inner_scope = core.Scope() self._inner_scope = core.Scope()
...@@ -207,11 +208,11 @@ class _ProgramHolder(object): ...@@ -207,11 +208,11 @@ class _ProgramHolder(object):
return self._train_program_desc return self._train_program_desc
@property @property
def input_names(self): def input_descs(self):
return self._input_names return self._input_descs
@property @property
def output_decs(self): def output_descs(self):
return self._output_descs return self._output_descs
@property @property
...@@ -233,7 +234,8 @@ class _ProgramHolder(object): ...@@ -233,7 +234,8 @@ class _ProgramHolder(object):
ops_to_remove.append(i) ops_to_remove.append(i)
feed_var_name = cpt.to_bytes(op.input('X')[0]) feed_var_name = cpt.to_bytes(op.input('X')[0])
root_block._remove_var(feed_var_name) root_block._remove_var(feed_var_name)
self._input_names.append(cpt.to_bytes(op.output('Out')[0])) self._input_descs.append(
root_block.find_var(cpt.to_bytes(op.output('Out')[0])))
elif op.type() == 'scale' and op.output('Out')[0].startswith( elif op.type() == 'scale' and op.output('Out')[0].startswith(
'save_infer_model/scale_'): 'save_infer_model/scale_'):
ops_to_remove.append(i) ops_to_remove.append(i)
...@@ -257,7 +259,7 @@ class _ProgramHolder(object): ...@@ -257,7 +259,7 @@ class _ProgramHolder(object):
root_block._remove_op(op_idx, op_idx + 1) root_block._remove_op(op_idx, op_idx + 1)
# 2. Input processing, reverse feed vars # 2. Input processing, reverse feed vars
self._input_names.reverse() self._input_descs.reverse()
# 3. Output processing, add scale for outputs # 3. Output processing, add scale for outputs
tmp_program = _build_program_by_desc(program_desc) tmp_program = _build_program_by_desc(program_desc)
...@@ -738,7 +740,7 @@ class TranslatedLayer(layers.Layer): ...@@ -738,7 +740,7 @@ class TranslatedLayer(layers.Layer):
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
var = core.VarBase( var = core.VarBase(
value=value, value=value,
name=program_holder.input_names[i], name=program_holder.input_descs[i].name(),
persistable=False, persistable=False,
place=framework._current_expected_place(), place=framework._current_expected_place(),
zero_copy=True) zero_copy=True)
...@@ -746,7 +748,7 @@ class TranslatedLayer(layers.Layer): ...@@ -746,7 +748,7 @@ class TranslatedLayer(layers.Layer):
var = value var = value
# NOTE: we changed var name here, # NOTE: we changed var name here,
# but it may be an important name set by user # but it may be an important name set by user
var.name = program_holder.input_names[i] var.name = program_holder.input_descs[i].name()
input_vars.append(var) input_vars.append(var)
persistable_vars = [] persistable_vars = []
...@@ -762,7 +764,7 @@ class TranslatedLayer(layers.Layer): ...@@ -762,7 +764,7 @@ class TranslatedLayer(layers.Layer):
% var_name) % var_name)
output_vars = [] output_vars = []
for var_desc in program_holder.output_decs: for var_desc in program_holder.output_descs:
var = core.VarBase(var_desc.dtype(), var = core.VarBase(var_desc.dtype(),
var_desc.shape(), var_desc.shape(),
var_desc.name(), var_desc.type(), False) var_desc.name(), var_desc.type(), False)
...@@ -913,11 +915,7 @@ class TranslatedLayer(layers.Layer): ...@@ -913,11 +915,7 @@ class TranslatedLayer(layers.Layer):
program = translated_layer.program() program = translated_layer.program()
""" """
# 1. get program holder # 1. get program holder
program_holder = self._program_holder_dict.get(method_name, None) program_holder = self._get_program_holder(method_name)
if program_holder is None:
raise ValueError(
"The method `%s` is not exists in loaded TranslatedLayer." %
method_name)
# 2. get inference program desc # 2. get inference program desc
program_desc = program_holder.infer_program program_desc = program_holder.infer_program
...@@ -925,3 +923,44 @@ class TranslatedLayer(layers.Layer): ...@@ -925,3 +923,44 @@ class TranslatedLayer(layers.Layer):
# 3. construct program # 3. construct program
program = _build_program_by_desc(program_desc) program = _build_program_by_desc(program_desc)
return program return program
def _get_program_holder(self, method_name='forward'):
program_holder = self._program_holder_dict.get(method_name, None)
if program_holder is None:
raise ValueError(
"The method `%s` does not exist in loaded TranslatedLayer." %
method_name)
return program_holder
def _input_spec(self, method_name='forward'):
# 1. get program holder
program_holder = self._get_program_holder(method_name)
# 2. build input spec by input desc
input_spec = []
for var_desc in program_holder.input_descs:
spec = paddle.static.InputSpec(
shape=var_desc.shape(),
dtype=var_desc.dtype(),
name=var_desc.name())
input_spec.append(spec)
return input_spec
def _output_spec(self, method_name='forward'):
# 1. get program holder
program_holder = self._get_program_holder(method_name)
# 2. build output spec by output desc
output_spec = []
for var_desc in program_holder.output_descs:
# NOTE(chenweihang): InputSpec describes a tensor, not just input.
# Maybe the name is not good enough. Here we use InputSpec to
# construct the description of Output tensor
spec = paddle.static.InputSpec(
shape=var_desc.shape(),
dtype=var_desc.dtype(),
name=var_desc.name())
output_spec.append(spec)
return output_spec
...@@ -1726,13 +1726,13 @@ class DatasetLoader(DataLoaderBase): ...@@ -1726,13 +1726,13 @@ class DatasetLoader(DataLoaderBase):
logging.warn('thread_num {} which is set in Dataset is ignored'. logging.warn('thread_num {} which is set in Dataset is ignored'.
format(dataset.thread_num)) format(dataset.thread_num))
dataset.set_thread(thread_num) dataset._set_thread(thread_num)
if isinstance(dataset, paddle.distributed.fleet.dataset. if isinstance(dataset, paddle.distributed.fleet.dataset.
InMemoryDataset) and dataset.queue_num > thread_num: InMemoryDataset) and dataset.queue_num > thread_num:
logging.warn("queue_num {} which is set in Dataset is ignored". logging.warn("queue_num {} which is set in Dataset is ignored".
format(dataset.queue_num)) format(dataset.queue_num))
dataset.set_queue_num(thread_num) dataset._set_queue_num(thread_num)
self._dataset = dataset self._dataset = dataset
use_slots = [ use_slots = [
......
...@@ -102,6 +102,7 @@ if(WIN32) ...@@ -102,6 +102,7 @@ if(WIN32)
endif() endif()
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2)
...@@ -399,17 +400,17 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${G ...@@ -399,17 +400,17 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${G
py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS}) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS})
py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS
FLAGS_cudnn_deterministic=1 SERIAL) FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
py_test_modules(test_imperative_resnet_sorted_gradient MODULES test_imperative_resnet_sorted_gradient ENVS py_test_modules(test_imperative_resnet_sorted_gradient MODULES test_imperative_resnet_sorted_gradient ENVS
FLAGS_cudnn_deterministic=1 SERIAL) FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS
FLAGS_cudnn_deterministic=1) FLAGS_cudnn_deterministic=1)
py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mnist_sorted_gradient ENVS py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mnist_sorted_gradient ENVS
FLAGS_cudnn_deterministic=1) FLAGS_cudnn_deterministic=1)
py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS
FLAGS_cudnn_deterministic=1 SERIAL) FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_attention_model ENVS py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_attention_model ENVS
FLAGS_cudnn_deterministic=1 SERIAL) FLAGS_cudnn_deterministic=1 SERIAL)
py_test_modules(test_install_check MODULES test_install_check ENVS py_test_modules(test_install_check MODULES test_install_check ENVS
......
...@@ -208,14 +208,16 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -208,14 +208,16 @@ class TestDistCTR2x2(FleetDistRunnerBase):
filelist = train_file_list filelist = train_file_list
# config dataset # config dataset
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset() dataset = paddle.distributed.QueueDataset()
dataset.set_batch_size(batch_size)
dataset.set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py' pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command)
dataset.init(
batch_size=batch_size,
use_var=self.feeds,
pipe_command=pipe_command,
thread_num=thread_num)
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.set_thread(thread_num)
for epoch_id in range(1): for epoch_id in range(1):
pass_start = time.time() pass_start = time.time()
......
...@@ -114,14 +114,14 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): ...@@ -114,14 +114,14 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
filelist.append(train_file_path) filelist.append(train_file_path)
# config dataset # config dataset
dataset = paddle.fleet.DatasetFactory().create_dataset() dataset = paddle.distributed.QueueDataset()
dataset.set_batch_size(batch_size) dataset._set_batch_size(batch_size)
dataset.set_use_var(self.feeds) dataset._set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py' pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command) dataset._set_pipe_command(pipe_command)
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.set_thread(thread_num) dataset._set_thread(thread_num)
for epoch_id in range(1): for epoch_id in range(1):
pass_start = time.time() pass_start = time.time()
......
...@@ -183,14 +183,14 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): ...@@ -183,14 +183,14 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
print("filelist: {}".format(filelist)) print("filelist: {}".format(filelist))
# config dataset # config dataset
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset() dataset = paddle.distributed.QueueDataset()
dataset.set_batch_size(batch_size) dataset._set_batch_size(batch_size)
dataset.set_use_var(self.feeds) dataset._set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py' pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command) dataset._set_pipe_command(pipe_command)
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.set_thread(thread_num) dataset._set_thread(thread_num)
for epoch_id in range(1): for epoch_id in range(1):
pass_start = time.time() pass_start = time.time()
......
...@@ -17,6 +17,7 @@ import numpy as np ...@@ -17,6 +17,7 @@ import numpy as np
from inference_pass_test import InferencePassTest from inference_pass_test import InferencePassTest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
class TransposeFlattenConcatFusePassTest(InferencePassTest): class TransposeFlattenConcatFusePassTest(InferencePassTest):
...@@ -45,6 +46,37 @@ class TransposeFlattenConcatFusePassTest(InferencePassTest): ...@@ -45,6 +46,37 @@ class TransposeFlattenConcatFusePassTest(InferencePassTest):
use_gpu = True use_gpu = True
self.check_output_with_option(use_gpu) self.check_output_with_option(use_gpu)
PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass')
class TransposeFlattenConcatFusePassWithAxisTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data1 = fluid.data(name="data1", shape=[5, 5, 5], dtype="float32")
data2 = fluid.data(name="data2", shape=[5, 5, 5], dtype="float32")
trans1 = fluid.layers.transpose(data1, perm=[2, 1, 0])
trans2 = fluid.layers.transpose(data2, perm=[2, 1, 0])
flatt1 = fluid.layers.flatten(trans1, axis=2)
flatt2 = fluid.layers.flatten(trans2, axis=2)
concat_out = fluid.layers.concat([flatt1, flatt2], axis=1)
# There is no parameters for above structure.
# Hence, append a batch_norm to avoid failure caused by load_combined.
out = fluid.layers.batch_norm(concat_out, is_test=True)
self.feeds = {
"data1": np.random.random([5, 5, 5]).astype("float32"),
"data2": np.random.random([5, 5, 5]).astype("float32")
}
self.fetch_list = [out]
def test_check_output(self):
# There is no cpu pass for transpose_flatten_concat_fuse
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass')
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
class PadOpTRTTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[1, 3, 128, 128], dtype="float32")
pad_out = fluid.layers.pad(x=data,
paddings=[0, 0, 0, 0, 0, 1, 1, 2],
pad_value=0.0)
out = fluid.layers.batch_norm(pad_out, is_test=True)
self.feeds = {
"data": np.random.random((1, 3, 128, 128)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = PadOpTRTTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
#normal starts && ends
class SlicePluginTRTTest1(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [1, 3]
starts = [0, 1]
ends = [2, 3]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest1.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
#negative starts && ends
class SlicePluginTRTTest2(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [2, 3]
starts = [-3, -2]
ends = [-1, 3]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest2.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
#exceeded bound starts && ends
class SlicePluginTRTTest3(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [2, 3]
starts = [-5, -2]
ends = [-1, 8]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest3.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
#fp16
class SlicePluginTRTTest4(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [2, 3]
starts = [-5, -2]
ends = [-1, 8]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest3.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
class TestBroadcastOpCpu(OpTest):
def setUp(self):
self.op_type = "broadcast"
input = np.random.random((100, 2)).astype("float32")
np_out = input[:]
self.inputs = {"X": input}
self.attrs = {"sync_mode": False, "root": 0}
self.outputs = {"Out": np_out}
def test_check_output_cpu(self):
try:
self.check_output_with_place(place=core.CPUPlace())
except:
print("do not support cpu test, skip")
if __name__ == "__main__":
unittest.main()
...@@ -38,26 +38,22 @@ class TestDataset(unittest.TestCase): ...@@ -38,26 +38,22 @@ class TestDataset(unittest.TestCase):
def test_dataset_create(self): def test_dataset_create(self):
""" Testcase for dataset create. """ """ Testcase for dataset create. """
try: try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset")
except: except:
self.assertTrue(False) self.assertTrue(False)
try: try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.QueueDataset()
"QueueDataset")
except: except:
self.assertTrue(False) self.assertTrue(False)
try: try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.fleet.dataset.FileInstantDataset()
"FileInstantDataset")
except: except:
self.assertTrue(False) self.assertTrue(False)
try: try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.fleet.dataset.MyOwnDataset()
"MyOwnDataset")
self.assertTrue(False) self.assertTrue(False)
except: except:
self.assertTrue(True) self.assertTrue(True)
...@@ -95,18 +91,18 @@ class TestDataset(unittest.TestCase): ...@@ -95,18 +91,18 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1) name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(3) dataset.update_settings(pipe_command="cat1")
dataset._init_distributed_settings(
parse_ins_id=True,
parse_content=True,
fea_eval=True,
candidate_size=10000)
dataset.set_filelist( dataset.set_filelist(
["test_run_with_dump_a.txt", "test_run_with_dump_b.txt"]) ["test_run_with_dump_a.txt", "test_run_with_dump_b.txt"])
dataset.set_parse_ins_id(True)
dataset.set_parse_content(True)
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
dataset.set_fea_eval(10000, True)
dataset.local_shuffle() dataset.local_shuffle()
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
...@@ -176,14 +172,14 @@ class TestDataset(unittest.TestCase): ...@@ -176,14 +172,14 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1) name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32,
dataset.set_thread(3) thread_num=3,
pipe_command="cat",
download_cmd="cat",
use_var=slots_vars)
dataset.set_filelist([filename1, filename2]) dataset.set_filelist([filename1, filename2])
dataset.set_pipe_command("cat")
dataset.set_download_cmd("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -228,22 +224,19 @@ class TestDataset(unittest.TestCase): ...@@ -228,22 +224,19 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1) name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(3) dataset._init_distributed_settings(fea_eval=True, candidate_size=1)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_a.txt",
"test_in_memory_dataset_run_b.txt" "test_in_memory_dataset_run_b.txt"
]) ])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
dataset.set_fea_eval(1, True)
dataset.slots_shuffle(["slot1"]) dataset.slots_shuffle(["slot1"])
dataset.local_shuffle() dataset.local_shuffle()
dataset.set_generate_unique_feasigns(True, 15) dataset._set_generate_unique_feasigns(True, 15)
dataset.generate_local_tables_unlock(0, 11, 1, 25, 15) dataset._generate_local_tables_unlock(0, 11, 1, 25, 15)
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
if self.use_data_loader: if self.use_data_loader:
...@@ -300,17 +293,14 @@ class TestDataset(unittest.TestCase): ...@@ -300,17 +293,14 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="float32", lod_level=1) name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(1) dataset._init_distributed_settings(parse_ins_id=True)
dataset.set_parse_ins_id(True)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset_masterpatch_a.txt", "test_in_memory_dataset_masterpatch_a.txt",
"test_in_memory_dataset_masterpatch_b.txt" "test_in_memory_dataset_masterpatch_b.txt"
]) ])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
dataset.local_shuffle() dataset.local_shuffle()
...@@ -325,7 +315,8 @@ class TestDataset(unittest.TestCase): ...@@ -325,7 +315,8 @@ class TestDataset(unittest.TestCase):
except Exception as e: except Exception as e:
self.assertTrue(False) self.assertTrue(False)
dataset.set_merge_by_lineid(2) #dataset._set_merge_by_lineid(2)
dataset.update_settings(merge_size=2)
dataset.dataset.merge_by_lineid() dataset.dataset.merge_by_lineid()
os.remove("./test_in_memory_dataset_masterpatch_a.txt") os.remove("./test_in_memory_dataset_masterpatch_a.txt")
...@@ -367,17 +358,14 @@ class TestDataset(unittest.TestCase): ...@@ -367,17 +358,14 @@ class TestDataset(unittest.TestCase):
name="slot4", shape=[1], dtype="float32", lod_level=0) name="slot4", shape=[1], dtype="float32", lod_level=0)
slots_vars = [var1, var2, var3, var4] slots_vars = [var1, var2, var3, var4]
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(1) dataset._init_distributed_settings(parse_ins_id=True)
dataset.set_parse_ins_id(True)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset_masterpatch1_a.txt", "test_in_memory_dataset_masterpatch1_a.txt",
"test_in_memory_dataset_masterpatch1_b.txt" "test_in_memory_dataset_masterpatch1_b.txt"
]) ])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
dataset.local_shuffle() dataset.local_shuffle()
...@@ -392,7 +380,7 @@ class TestDataset(unittest.TestCase): ...@@ -392,7 +380,7 @@ class TestDataset(unittest.TestCase):
except Exception as e: except Exception as e:
self.assertTrue(False) self.assertTrue(False)
dataset.set_merge_by_lineid(2) dataset._set_merge_by_lineid(2)
dataset.dataset.merge_by_lineid() dataset.dataset.merge_by_lineid()
os.remove("./test_in_memory_dataset_masterpatch1_a.txt") os.remove("./test_in_memory_dataset_masterpatch1_a.txt")
...@@ -423,16 +411,13 @@ class TestDataset(unittest.TestCase): ...@@ -423,16 +411,13 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="float32", lod_level=1) name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(3)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_a.txt",
"test_in_memory_dataset_run_b.txt" "test_in_memory_dataset_run_b.txt"
]) ])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
dataset.local_shuffle() dataset.local_shuffle()
...@@ -473,9 +458,9 @@ class TestDataset(unittest.TestCase): ...@@ -473,9 +458,9 @@ class TestDataset(unittest.TestCase):
except Exception as e: except Exception as e:
self.assertTrue(False) self.assertTrue(False)
dataset.set_merge_by_lineid(2) dataset._set_merge_by_lineid(2)
dataset.set_parse_ins_id(False) dataset._set_parse_ins_id(False)
dataset.set_fleet_send_sleep_seconds(2) dataset._set_fleet_send_sleep_seconds(2)
dataset.preload_into_memory() dataset.preload_into_memory()
dataset.wait_preload_done() dataset.wait_preload_done()
dataset.release_memory() dataset.release_memory()
...@@ -483,10 +468,25 @@ class TestDataset(unittest.TestCase): ...@@ -483,10 +468,25 @@ class TestDataset(unittest.TestCase):
dataset.wait_preload_done() dataset.wait_preload_done()
dataset.dataset.merge_by_lineid() dataset.dataset.merge_by_lineid()
dataset.release_memory() dataset.release_memory()
dataset.set_merge_by_lineid(30) dataset._set_merge_by_lineid(30)
dataset.set_parse_ins_id(False) dataset._set_parse_ins_id(False)
dataset.load_into_memory() dataset.load_into_memory()
dataset.dataset.merge_by_lineid() dataset.dataset.merge_by_lineid()
dataset.update_settings(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=[],
fs_name="",
fs_ugi="",
download_cmd="cat",
merge_size=-1,
parse_ins_id=False,
parse_content=False,
fleet_send_batch_size=2,
fleet_send_sleep_seconds=2,
fea_eval=True)
fleet_ptr = fluid.core.Fleet() fleet_ptr = fluid.core.Fleet()
fleet_ptr.set_client2client_config(1, 1, 1) fleet_ptr.set_client2client_config(1, 1, 1)
fleet_ptr.get_cache_threshold(0) fleet_ptr.get_cache_threshold(0)
...@@ -517,14 +517,11 @@ class TestDataset(unittest.TestCase): ...@@ -517,14 +517,11 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1) name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.QueueDataset()
"QueueDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(3)
dataset.set_filelist( dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -543,12 +540,9 @@ class TestDataset(unittest.TestCase): ...@@ -543,12 +540,9 @@ class TestDataset(unittest.TestCase):
except Exception as e: except Exception as e:
self.assertTrue(False) self.assertTrue(False)
dataset2 = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset2 = paddle.distributed.QueueDataset()
"QueueDataset") dataset2.init(
dataset2.set_use_var(slots_vars) batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset2.set_batch_size(32)
dataset2.set_thread(3)
dataset2.set_pipe_command("cat")
dataset.set_filelist([]) dataset.set_filelist([])
try: try:
exe.train_from_dataset(fluid.default_main_program(), dataset2) exe.train_from_dataset(fluid.default_main_program(), dataset2)
...@@ -585,14 +579,11 @@ class TestDataset(unittest.TestCase): ...@@ -585,14 +579,11 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="float32", lod_level=1) name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.QueueDataset()
"QueueDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_thread(3)
dataset.set_filelist( dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)) ) else fluid.CUDAPlace(0))
...@@ -641,15 +632,15 @@ class TestDataset(unittest.TestCase): ...@@ -641,15 +632,15 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[None, 1], dtype="int64", lod_level=1) name=slot, shape=[None, 1], dtype="int64", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_input_type(1) batch_size=1,
dataset.set_batch_size(1) thread_num=2,
dataset.set_thread(2) input_type=1,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist( dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
...@@ -721,13 +712,10 @@ class TestDatasetWithFetchHandler(unittest.TestCase): ...@@ -721,13 +712,10 @@ class TestDatasetWithFetchHandler(unittest.TestCase):
inputs(list): inputs of get_dataset inputs(list): inputs of get_dataset
files(list): files of get_dataset files(list): files of get_dataset
""" """
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.QueueDataset()
"QueueDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32, thread_num=3, pipe_command="cat", use_var=inputs)
dataset.set_thread(3)
dataset.set_filelist(files) dataset.set_filelist(files)
dataset.set_pipe_command("cat")
dataset.set_use_var(inputs)
return dataset return dataset
def setUp(self): def setUp(self):
...@@ -879,16 +867,17 @@ class TestDataset2(unittest.TestCase): ...@@ -879,16 +867,17 @@ class TestDataset2(unittest.TestCase):
except ImportError as e: except ImportError as e:
print("warning: no mpi4py") print("warning: no mpi4py")
exe.run(startup_program) exe.run(startup_program)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset")
dataset.set_batch_size(32) dataset.init(
dataset.set_thread(3) batch_size=32,
thread_num=3,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset2_run_a.txt", "test_in_memory_dataset2_run_a.txt",
"test_in_memory_dataset2_run_b.txt" "test_in_memory_dataset2_run_b.txt"
]) ])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
fleet._opt_info = None fleet._opt_info = None
fleet._fleet_ptr = None fleet._fleet_ptr = None
...@@ -949,16 +938,16 @@ class TestDataset2(unittest.TestCase): ...@@ -949,16 +938,16 @@ class TestDataset2(unittest.TestCase):
except ImportError as e: except ImportError as e:
print("warning: no mpi4py") print("warning: no mpi4py")
exe.run(startup_program) exe.run(startup_program)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(
dataset.set_batch_size(32) batch_size=32,
dataset.set_thread(3) thread_num=3,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset2_run2_a.txt", "test_in_memory_dataset2_run2_a.txt",
"test_in_memory_dataset2_run2_b.txt" "test_in_memory_dataset2_run2_b.txt"
]) ])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
try: try:
dataset.global_shuffle(fleet) dataset.global_shuffle(fleet)
...@@ -966,14 +955,11 @@ class TestDataset2(unittest.TestCase): ...@@ -966,14 +955,11 @@ class TestDataset2(unittest.TestCase):
print("warning: catch expected error") print("warning: catch expected error")
fleet._opt_info = None fleet._opt_info = None
fleet._fleet_ptr = None fleet._fleet_ptr = None
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset.init(fs_name="", fs_ugi="")
dataset.set_rank_offset("")
dataset.set_pv_batch_size(1)
dataset.set_hdfs_config("", "")
d = paddle.distributed.fleet.DatasetBase() d = paddle.distributed.fleet.DatasetBase()
try: try:
dataset.set_feed_type("MultiSlotInMemoryDataFeed") dataset._set_feed_type("MultiSlotInMemoryDataFeed")
except: except:
print("warning: catch expected error") print("warning: catch expected error")
dataset.thread_num = 0 dataset.thread_num = 0
...@@ -981,9 +967,6 @@ class TestDataset2(unittest.TestCase): ...@@ -981,9 +967,6 @@ class TestDataset2(unittest.TestCase):
dataset._prepare_to_run() dataset._prepare_to_run()
except: except:
print("warning: catch expected error") print("warning: catch expected error")
dataset.set_parse_logkey(True)
dataset.set_merge_by_sid(True)
dataset.set_enable_pv_merge(True)
try: try:
dataset.preprocess_instance() dataset.preprocess_instance()
except: except:
...@@ -996,16 +979,15 @@ class TestDataset2(unittest.TestCase): ...@@ -996,16 +979,15 @@ class TestDataset2(unittest.TestCase):
dataset.postprocess_instance() dataset.postprocess_instance()
except: except:
print("warning: catch expected error") print("warning: catch expected error")
dataset.set_fleet_send_batch_size(1024) dataset._set_fleet_send_batch_size(1024)
try: try:
dataset.global_shuffle() dataset.global_shuffle()
except: except:
print("warning: catch expected error") print("warning: catch expected error")
dataset.get_pv_data_size() #dataset.get_pv_data_size()
dataset.get_memory_data_size() dataset.get_memory_data_size()
dataset.get_shuffle_data_size() dataset.get_shuffle_data_size()
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.QueueDataset()
"QueueDataset")
try: try:
dataset.local_shuffle() dataset.local_shuffle()
except: except:
...@@ -1027,6 +1009,120 @@ class TestDataset2(unittest.TestCase): ...@@ -1027,6 +1009,120 @@ class TestDataset2(unittest.TestCase):
os.remove("./test_in_memory_dataset2_run2_a.txt") os.remove("./test_in_memory_dataset2_run2_a.txt")
os.remove("./test_in_memory_dataset2_run2_b.txt") os.remove("./test_in_memory_dataset2_run2_b.txt")
def test_bosps_dataset_fleet2(self):
"""
Testcase for InMemoryDataset from create to run.
"""
with open("test_in_memory_dataset2_run2_a.txt", "w") as f:
data = "1 1 2 3 3 4 5 5 5 5 1 1\n"
data += "1 2 2 3 4 4 6 6 6 6 1 2\n"
data += "1 3 2 3 5 4 7 7 7 7 1 3\n"
f.write(data)
with open("test_in_memory_dataset2_run2_b.txt", "w") as f:
data = "1 4 2 3 3 4 5 5 5 5 1 4\n"
data += "1 5 2 3 4 4 6 6 6 6 1 5\n"
data += "1 6 2 3 5 4 7 7 7 7 1 6\n"
data += "1 7 2 3 6 4 8 8 8 8 1 7\n"
f.write(data)
train_program = fluid.Program()
startup_program = fluid.Program()
scope = fluid.Scope()
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
with fluid.program_guard(train_program, startup_program):
slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
slots_vars = []
for slot in slots:
var = fluid.layers.data(\
name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var)
fake_cost = \
fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1])
fake_cost = fluid.layers.mean(fake_cost)
with fluid.scope_guard(scope):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
try:
fleet.init()
except ImportError as e:
print("warning: no mpi4py")
adam = fluid.optimizer.Adam(learning_rate=0.000005)
try:
adam = fleet.distributed_optimizer(
adam,
strategy={
"fs_uri": "fs_uri_xxx",
"fs_user": "fs_user_xxx",
"fs_passwd": "fs_passwd_xxx",
"fs_hadoop_bin": "fs_hadoop_bin_xxx"
})
adam.minimize([fake_cost], [scope])
except AttributeError as e:
print("warning: no mpi")
except ImportError as e:
print("warning: no mpi4py")
exe.run(startup_program)
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.init(
batch_size=32,
thread_num=3,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist([
"test_in_memory_dataset2_run2_a.txt",
"test_in_memory_dataset2_run2_b.txt"
])
dataset.load_into_memory()
try:
dataset.global_shuffle(fleet)
except:
print("warning: catch expected error")
fleet._opt_info = None
fleet._fleet_ptr = None
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.init(
rank_offset="",
pv_batch_size=1,
fs_name="",
fs_ugi="",
data_feed_type="MultiSlotInMemoryDataFeed",
parse_logkey=True,
merge_by_sid=True,
enable_pv_merge=True)
d = paddle.distributed.fleet.DatasetBase()
try:
dataset._set_feed_type("MultiSlotInMemoryDataFeed")
except:
print("warning: catch expected error")
dataset.thread_num = 0
try:
dataset._prepare_to_run()
except:
print("warning: catch expected error")
dataset._set_parse_logkey(True)
dataset._set_merge_by_sid(True)
dataset._set_enable_pv_merge(True)
try:
dataset.preprocess_instance()
except:
print("warning: catch expected error")
try:
dataset.set_current_phase(1)
except:
print("warning: catch expected error")
try:
dataset.postprocess_instance()
except:
print("warning: catch expected error")
dataset._set_fleet_send_batch_size(1024)
try:
dataset.global_shuffle()
except:
print("warning: catch expected error")
#dataset.get_pv_data_size()
dataset.get_memory_data_size()
dataset.get_shuffle_data_size()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -97,9 +97,11 @@ class DatasetLoaderTestBase(unittest.TestCase): ...@@ -97,9 +97,11 @@ class DatasetLoaderTestBase(unittest.TestCase):
def check_batch_number(self, place, randomize_batch_num=False): def check_batch_number(self, place, randomize_batch_num=False):
main_prog, startup_prog, feeds = self.build_network() main_prog, startup_prog, feeds = self.build_network()
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( if self.dataset_name == "QueueDataset":
self.dataset_name) dataset = paddle.distributed.QueueDataset()
dataset.set_batch_size(BATCH_SIZE) else:
dataset = paddle.distributed.InMemoryDataset()
dataset._set_batch_size(BATCH_SIZE)
if isinstance(place, fluid.CPUPlace): if isinstance(place, fluid.CPUPlace):
file_num = 10 file_num = 10
...@@ -128,8 +130,8 @@ class DatasetLoaderTestBase(unittest.TestCase): ...@@ -128,8 +130,8 @@ class DatasetLoaderTestBase(unittest.TestCase):
fake_reader(batch_num=BATCH_NUM + random_delta_batch_size[i])) fake_reader(batch_num=BATCH_NUM + random_delta_batch_size[i]))
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.set_use_var(feeds) dataset._set_use_var(feeds)
dataset.set_pipe_command("cat") dataset._set_pipe_command("cat")
if self.dataset_name == 'InMemoryDataset': if self.dataset_name == 'InMemoryDataset':
dataset.load_into_memory() dataset.load_into_memory()
......
...@@ -141,7 +141,7 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): ...@@ -141,7 +141,7 @@ class TestFleetLambMetaOptimizer(unittest.TestCase):
ops = [op.type for op in avg_cost.block.ops] ops = [op.type for op in avg_cost.block.ops]
self.assertIn('lamb', ops) self.assertIn('lamb', ops)
self.assertIn('cast', ops) self.assertIn('cast', ops)
self.assertIn('isfinite', ops) self.assertIn('check_finite_and_unscale', ops)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -145,7 +145,7 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): ...@@ -145,7 +145,7 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase):
ops = [op.type for op in avg_cost.block.ops] ops = [op.type for op in avg_cost.block.ops]
self.assertIn('lars_momentum', ops) self.assertIn('lars_momentum', ops)
self.assertIn('cast', ops) self.assertIn('cast', ops)
self.assertIn('isfinite', ops) self.assertIn('check_finite_and_unscale', ops)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -79,9 +79,9 @@ if [ -f $file_1 ]; then ...@@ -79,9 +79,9 @@ if [ -f $file_1 ]; then
rm $file_1 rm $file_1
fi fi
# test use DISTRIBUTED_TRAINER_ENDPOINTS env in paddlecloud
unset PADDLE_PORT unset PADDLE_PORT
unset TRAINER_PORTS_NUM export DISTRIBUTED_TRAINER_ENDPOINTS=127.0.0.1:6170,127.0.0.1:6171,127.0.0.2:6170,127.0.0.2:6171
echo "" echo ""
echo "paddle.distributed.launch async poll process test" echo "paddle.distributed.launch async poll process test"
......
...@@ -163,10 +163,9 @@ class TestCloudRoleMaker2(unittest.TestCase): ...@@ -163,10 +163,9 @@ class TestCloudRoleMaker2(unittest.TestCase):
data = "1 1 1 1\n" data = "1 1 1 1\n"
f.write(data) f.write(data)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset")
dataset.set_filelist(["test_fleet_gloo_role_maker_1.txt"]) dataset.set_filelist(["test_fleet_gloo_role_maker_1.txt"])
dataset.set_use_var([show, label]) dataset._set_use_var([show, label])
dataset.load_into_memory() dataset.load_into_memory()
dataset.get_memory_data_size(fleet) dataset.get_memory_data_size(fleet)
dataset.get_shuffle_data_size(fleet) dataset.get_shuffle_data_size(fleet)
......
...@@ -48,9 +48,9 @@ if [ -f $file_1 ]; then ...@@ -48,9 +48,9 @@ if [ -f $file_1 ]; then
rm $file_1 rm $file_1
fi fi
# test use DISTRIBUTED_TRAINER_ENDPOINTS env in paddlecloud
unset PADDLE_PORT unset PADDLE_PORT
unset TRAINER_PORTS_NUM export DISTRIBUTED_TRAINER_ENDPOINTS=127.0.0.1:6170,127.0.0.1:6171,127.0.0.2:6170,127.0.0.2:6171
echo "" echo ""
echo "paddle.distributed.launch async poll process test" echo "paddle.distributed.launch async poll process test"
......
...@@ -400,7 +400,8 @@ class TestCUDNNLstmOp(OpTest): ...@@ -400,7 +400,8 @@ class TestCUDNNLstmOp(OpTest):
'Input': input, 'Input': input,
'W': flat_w, 'W': flat_w,
'InitH': init_h, 'InitH': init_h,
'InitC': init_c 'InitC': init_c,
'SequenceLength': self.sequence_length
} }
self.attrs = { self.attrs = {
'dropout_prob': 0.0, 'dropout_prob': 0.0,
...@@ -408,7 +409,6 @@ class TestCUDNNLstmOp(OpTest): ...@@ -408,7 +409,6 @@ class TestCUDNNLstmOp(OpTest):
'input_size': input_size, 'input_size': input_size,
'hidden_size': hidden_size, 'hidden_size': hidden_size,
'num_layers': 1, 'num_layers': 1,
'sequence_length': self.sequence_length.tolist()
} }
self.outputs = { self.outputs = {
'Out': output, 'Out': output,
...@@ -436,13 +436,6 @@ class TestCUDNNLstmOp(OpTest): ...@@ -436,13 +436,6 @@ class TestCUDNNLstmOp(OpTest):
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
class TestCUDNNLstmOp2(TestCUDNNLstmOp): class TestCUDNNLstmOp2(TestCUDNNLstmOp):
def set_attrs(self):
self.sequence_length = np.array([], dtype=np.int32)
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNLstmOp3(TestCUDNNLstmOp):
def set_attrs(self): def set_attrs(self):
self.num_layers = 2 self.num_layers = 2
......
...@@ -52,18 +52,17 @@ class TestDatasetWithStat(unittest.TestCase): ...@@ -52,18 +52,17 @@ class TestDatasetWithStat(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1) name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var) slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( dataset = paddle.distributed.InMemoryDataset()
"InMemoryDataset") dataset._set_batch_size(32)
dataset.set_batch_size(32) dataset._set_thread(3)
dataset.set_thread(3)
dataset.set_filelist([ dataset.set_filelist([
"test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_a.txt",
"test_in_memory_dataset_run_b.txt" "test_in_memory_dataset_run_b.txt"
]) ])
dataset.set_pipe_command("cat") dataset._set_pipe_command("cat")
dataset.set_use_var(slots_vars) dataset._set_use_var(slots_vars)
dataset.load_into_memory() dataset.load_into_memory()
dataset.set_fea_eval(1, True) dataset._set_fea_eval(1, True)
dataset.slots_shuffle(["slot1"]) dataset.slots_shuffle(["slot1"])
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from functools import partial
import contextlib
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
import paddle.fluid.framework as framework
import paddle.fluid.optimizer as optimizer
import paddle.regularizer as regularizer
from paddle.fluid.backward import append_backward
def bow_net(data,
label,
dict_dim,
is_sparse=False,
emb_dim=8,
hid_dim=8,
hid_dim2=6,
class_dim=2):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb = fluid.layers.embedding(
input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim])
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestRegularizer(unittest.TestCase):
def setUp(self):
self.word_dict = paddle.dataset.imdb.word_dict()
reader = paddle.batch(
paddle.dataset.imdb.train(self.word_dict), batch_size=1)()
self.train_data = [next(reader) for _ in range(1)]
def get_places(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
return places
@contextlib.contextmanager
def scope_prog_guard(self, main_prog, startup_prog):
scope = fluid.core.Scope()
with fluid.unique_name.guard():
with fluid.scope_guard(scope):
with fluid.program_guard(main_prog, startup_prog):
yield
def run_program(self, place, feed_list):
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe.run(fluid.default_startup_program())
main_prog = fluid.default_main_program()
param_list = [var.name for var in main_prog.block(0).all_parameters()]
param_sum = []
for data in self.train_data:
out = exe.run(main_prog,
feed=feeder.feed(data),
fetch_list=param_list)
p_sum = 0
for v in out:
p_sum += np.sum(np.abs(v))
param_sum.append(p_sum)
return param_sum
def check_l2decay_regularizer(self, place, model):
paddle.manual_seed(1)
paddle.framework.random._manual_program_seed(1)
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
with self.scope_prog_guard(
main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost = model(data, label, len(self.word_dict))
optimizer = fluid.optimizer.Adagrad(
learning_rate=0.1,
regularization=paddle.regularizer.L2Decay(1.0))
optimizer.minimize(avg_cost)
param_sum = self.run_program(place, [data, label])
return param_sum
def check_l2decay(self, place, model):
paddle.manual_seed(1)
paddle.framework.random._manual_program_seed(1)
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
with self.scope_prog_guard(
main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost_l2 = model(data, label, len(self.word_dict))
param_list = fluid.default_main_program().block(0).all_parameters()
para_sum = []
for para in param_list:
para_mul = fluid.layers.square(x=para)
para_sum.append(fluid.layers.reduce_sum(input=para_mul))
avg_cost_l2 += fluid.layers.sums(para_sum) * .5
optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
optimizer.minimize(avg_cost_l2)
param_sum = self.run_program(place, [data, label])
return param_sum
def test_l2(self):
for place in self.get_places():
dense_sparse_p_sum = []
for sparse in [True, False]:
model = partial(bow_net, is_sparse=sparse)
framework_l2 = self.check_l2decay_regularizer(place, model)
l2 = self.check_l2decay(place, model)
assert len(l2) == len(framework_l2)
for i in range(len(l2)):
assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5)
dense_sparse_p_sum.append(framework_l2)
assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1])
for i in range(len(dense_sparse_p_sum[0])):
assert np.isclose(
a=dense_sparse_p_sum[0][i],
b=dense_sparse_p_sum[1][i],
rtol=5e-5)
def test_repeated_regularization(self):
l1 = paddle.regularizer.L1Decay(0.1)
l2 = paddle.regularizer.L2Decay(0.01)
fc_param_attr = fluid.ParamAttr(regularizer=l1)
with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss)
with fluid.dygraph.guard():
input = fluid.dygraph.to_variable(
np.random.randn(3, 2).astype('float32'))
paddle.manual_seed(1)
paddle.framework.random._manual_program_seed(1)
linear1 = fluid.dygraph.Linear(
2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr)
linear2 = fluid.dygraph.Linear(
2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr)
loss1 = linear1(input)
loss1.backward()
# set l2 regularizer in optimizer, but l1 in fluid.ParamAttr
fluid.optimizer.SGD(parameter_list=linear1.parameters(),
learning_rate=1e-2,
regularization=l2).minimize(loss1)
# only set l1 in fluid.ParamAttr
loss2 = linear2(input)
loss2.backward()
fluid.optimizer.SGD(parameter_list=linear2.parameters(),
learning_rate=1e-2).minimize(loss2)
# they should both be applied by l1, and keep the same
self.assertTrue(
np.allclose(linear1.weight.numpy(), linear2.weight.numpy()),
"weight should use the regularization in fluid.ParamAttr!")
self.assertTrue(
np.allclose(linear1.bias.numpy(), linear2.bias.numpy()),
"bias should use the regularization in fluid.ParamAttr!")
if __name__ == '__main__':
unittest.main()
...@@ -49,7 +49,10 @@ class LinearNet(nn.Layer): ...@@ -49,7 +49,10 @@ class LinearNet(nn.Layer):
super(LinearNet, self).__init__() super(LinearNet, self).__init__()
self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@paddle.jit.to_static @paddle.jit.to_static(input_spec=[
paddle.static.InputSpec(
shape=[None, IMAGE_SIZE], dtype='float32', name='x')
])
def forward(self, x): def forward(self, x):
return self._linear(x) return self._linear(x)
...@@ -152,6 +155,34 @@ class TestTranslatedLayer(unittest.TestCase): ...@@ -152,6 +155,34 @@ class TestTranslatedLayer(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
program = translated_layer.program('not_exists') program = translated_layer.program('not_exists')
def test_get_input_spec(self):
# load
translated_layer = paddle.jit.load(self.model_path)
expect_spec = [
paddle.static.InputSpec(
shape=[None, IMAGE_SIZE], dtype='float32', name='x')
]
actual_spec = translated_layer._input_spec()
for spec_x, spec_y in zip(expect_spec, actual_spec):
self.assertEqual(spec_x, spec_y)
def test_get_output_spec(self):
# load
translated_layer = paddle.jit.load(self.model_path)
expect_spec = [
paddle.static.InputSpec(
shape=[None, CLASS_NUM],
dtype='float32',
name='translated_layer/scale_0.tmp_1')
]
actual_spec = translated_layer._output_spec()
for spec_x, spec_y in zip(expect_spec, actual_spec):
self.assertEqual(spec_x, spec_y)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -26,4 +26,5 @@ NEED_TO_FIX_OP_LIST = [ ...@@ -26,4 +26,5 @@ NEED_TO_FIX_OP_LIST = [
'squared_l2_distance', 'squared_l2_distance',
'tree_conv', 'tree_conv',
'cvm', 'cvm',
'cudnn_lstm',
] ]
...@@ -12,8 +12,134 @@ ...@@ -12,8 +12,134 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# TODO: define the regularizer functions __all__ = ['L1Decay', 'L2Decay']
# __all__ = ['L1Decay',
# 'L1DecayRegularizer', import paddle.fluid as fluid
# 'L2Decay',
# 'L2DecayRegularizer']
class L1Decay(fluid.regularizer.L1Decay):
"""
Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse.
It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ).
When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in
``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has
higher priority than ``optimizer`` , which means that for a trainable parameter, if regularizer is defined
in its ParamAttr, then the regularizer in Optimizer will be ignored. Otherwise the regularizer
in Optimizer will be used.
In the implementation, the formula of L1 Weight Decay Regularization is as follows:
.. math::
L1WeightDecay = reg\_coeff * sign(parameter)
Args:
coeff(float, optional): regularization coeff. Default:0.0.
Examples:
.. code-block:: python
# Example1: set Regularizer in optimizer
import paddle
from paddle.regularizer import L1Decay
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
momentum = paddle.optimizer.Momentum(
learning_rate=0.1,
parameters=linear.parameters(),
weight_decay=L1Decay(0.0001))
back = out.backward()
momentum.step()
momentum.clear_grad()
# Example2: set Regularizer in parameters
# Set L1 regularization in parameters.
# Global regularizer does not take effect on my_conv2d for this case.
from paddle.nn import Conv2d
from paddle import ParamAttr
from paddle.regularizer import L2Decay
my_conv2d = Conv2d(
in_channels=10,
out_channels=10,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(regularizer=L2Decay(coeff=0.01)),
bias_attr=False)
"""
def __init__(self, coeff=0.0):
super(L1Decay, self).__init__(coeff)
class L2Decay(fluid.regularizer.L2Decay):
"""
Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ).
When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in
``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has
higher priority than ``optimizer`` , which means that for a trainable parameter, if regularizer is defined
in its ParamAttr, then the regularizer in Optimizer will be ignored. Otherwise the regularizer
in Optimizer will be used.
In the implementation, the formula of L2 Weight Decay Regularization is as follows:
.. math::
L2WeightDecay = reg\_coeff * parameter
Args:
regularization_coeff(float, optional): regularization coeff. Default:0.0
Examples:
.. code-block:: python
# Example1: set Regularizer in optimizer
import paddle
from paddle.regularizer import L2Decay
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
momentum = paddle.optimizer.Momentum(
learning_rate=0.1,
parameters=linear.parameters(),
weight_decay=L2Decay(0.0001))
back = out.backward()
momentum.step()
momentum.clear_grad()
# Example2: set Regularizer in parameters
# Set L2 regularization in parameters.
# Global regularizer does not take effect on my_conv2d for this case.
from paddle.nn import Conv2d
from paddle import ParamAttr
from paddle.regularizer import L2Decay
my_conv2d = Conv2d(
in_channels=10,
out_channels=10,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(regularizer=L2Decay(coeff=0.01)),
bias_attr=False)
"""
def __init__(self, coeff=0.0):
super(L2Decay, self).__init__(coeff)
...@@ -37,7 +37,11 @@ def get_cluster_from_args(selected_gpus): ...@@ -37,7 +37,11 @@ def get_cluster_from_args(selected_gpus):
free_ports = find_free_ports(len(selected_gpus)) free_ports = find_free_ports(len(selected_gpus))
if free_ports is not None: if free_ports is not None:
free_ports = list(free_ports) free_ports = list(free_ports)
return get_cluster(node_ips, node_ip, free_ports, selected_gpus)
trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports])
return get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus)
def get_gpus(selected_gpus): def get_gpus(selected_gpus):
......
...@@ -16,12 +16,13 @@ from .profiler import ProfilerOptions ...@@ -16,12 +16,13 @@ from .profiler import ProfilerOptions
from .profiler import Profiler from .profiler import Profiler
from .profiler import get_profiler from .profiler import get_profiler
from .deprecated import deprecated from .deprecated import deprecated
from ..fluid.framework import unique_name
from ..fluid.framework import load_op_library
from ..fluid.framework import require_version
from . import download from . import download
__all__ = ['dump_config', 'deprecated', 'download'] __all__ = ['dump_config', 'deprecated', 'download']
#TODO: define new api under this directory #TODO: define new api under this directory
# __all__ = ['unique_name', __all__ += ['unique_name', 'load_op_library', 'require_version']
# 'load_op_library',
# 'require_version']
:: Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
::
:: Licensed under the Apache License, Version 2.0 (the "License");
:: you may not use this file except in compliance with the License.
:: You may obtain a copy of the License at
::
:: http://www.apache.org/licenses/LICENSE-2.0
::
:: Unless required by applicable law or agreed to in writing, software
:: distributed under the License is distributed on an "AS IS" BASIS,
:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
:: See the License for the specific language governing permissions and
:: limitations under the License.
::
:: ===============================
:: Build Paddle compile enviroment
:: ===============================
:: Description:
::
:: Install compile enviroment for xly CI.
::
:: Include:
:: 1. CMake 3.17.0
:: 2. Git 2.28.0
:: 3. Python 3.7.8
:: 4. Visual Studio 2015 with update 3
:: 5. CUDA 10 [miss cudnn]
:: 6. java jre [not complete]
:: 7. xly agent [not complete]
:: Echo command is not required.
@echo off
:: ===== start step 0: wget tool =====
:: Download wget for windows when there is not wget tool.
echo ">>>>>>>> step [0/7]: wget tool"
wget --help > nul 2> nul || call:install_wget
goto cmake
:install_wget
echo There is not wget in this PC, will download wget 1.20.
echo Download package from https://eternallybored.org/misc/wget/1.20/64/wget.exe ...
certutil -urlcache -split -f https://eternallybored.org/misc/wget/1.20/64/wget.exe > nul 2> nul
if %errorlevel% == 0 (
echo Download wget tool into %cd% success.
) else (
echo Error***** Download wget tool failed, please download it before rerun.
exit /b 1
)
goto :eof
:: ===== end step 0: wget tool =====
:: ===== start step 1: cmake =====
:: Download CMake-3.17.0 and add in PATH when it not installed.
:: TODO: limit version >= 3.17.0
:cmake
echo ">>>>>>>> step [1/7]: CMake 3.17.0"
cmake --help > nul 2> nul || call :install_cmake
goto git
:install_cmake
echo There is not cmake in this PC, will install cmake-3.17.0.
echo Download package from https://cmake.org/files/v3.17/cmake-3.17.0-win64-x64.msi ...
wget -O cmake-3.17.0-win64-x64.msi https://cmake.org/files/v3.17/cmake-3.17.0-win64-x64.msi
echo Install cmake-3.17.0 ...
:: /passive [silent installation]
:: /norestart [do not restart]
:: ADD_CMAKE_TO_PATH = System [add CMake to the system PATH for all users]
start /wait cmake-3.17.0-win64-x64.msi /passive /norestart ADD_CMAKE_TO_PATH=System
if %errorlevel% == 0 (
echo Install CMake-3.17.0 success!
) else (
echo Error***** Install Cmake-3.17.0 failed, please re-install it manually.
)
del cmake-3.17.0-win64-x64.msi
goto :eof
:: ===== end step 1: cmake =====
:: ===== start step 2: Git =====
:: Download Git-2.28.0 and add in PATH when it not installed.
:: TODO: limit version >= 2.28.0
:git
echo ">>>>>>>> step [2/8]: Git 2.28.0"
git --help > nul 2> nul || call :install_git
goto python
:install_git
echo There is not git in this PC, will install Git-2.28.0.
echo Download package from https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe ...
wget -O Git-2.28.0-64-bit.exe https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe
echo Install Git-2.28.0 ...
:: /SILENT [silent install]
:: /ALLUSERS [add path for all users]
:: /NORESTART [do not restart]
start /wait Git-2.28.0-64-bit.exe /SILENT /ALLUSERS /NORESTART
if %errorlevel% == 0 (
echo Install Git-2.28.0 success!
) else (
echo Error***** Install Git-2.28.0 failed, please re-install it manually.
)
del Git-2.28.0-64-bit.exe
goto :eof
:: ===== end step 2: Git =====
:: ===== start step 3: Python =====
:: Download Python-3.7.8 and add in PATH when it not installed.
:: TODO: limit version >= 3.7.8
:python
echo ">>>>>>>> step [3/7]: Python 3.7.8"
python -V 2>&1 | findstr /C:"Python 3.7.8" > nul 2> nul || call :install_python
goto vs2015
:install_python
echo There is not Python in this PC, will install Python-3.7.8.
echo Download package from https://npm.taobao.org/mirrors/python/3.7.8/python-3.7.8-amd64.exe ...
wget -O python-3.7.8-amd64.exe https://npm.taobao.org/mirrors/python/3.7.8/python-3.7.8-amd64.exe
echo Install Python-3.7.8 ...
:: /passive [silent install]
:: InstallAllUsers [add path for all users]
:: PrependPath [add script/install into PATH]
:: TargetDir [install directory]
start /wait python-3.7.8-amd64.exe /passive InstallAllUsers=1 PrependPath=1 TargetDir=C:\Python37
if %errorlevel% == 0 (
echo Install python-3.7.8 success!
) else (
echo Error***** Install python-3.7.8 failed, please re-install it manually.
)
del python-3.7.8-amd64.exe
goto :eof
:: ===== end step 3: Python =====
:: ===== start step 4: Visual Studio 2015 =====
:: Download Visual Studio 2015 when it not installed.
:vs2015
echo ">>>>>>>> step [4/7]: Visual Studio 2015"
cmd /C "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 > nul 2> nul || call :install_visual_studio
goto :cuda10
:install_visual_studio
echo There is not Visual Studio in this PC, will install VS2015.
echo Download package from "https://download.my.visualstudio.com/pr/en_visual_studio_professional_2015_with_update_3_x86_x64_web_installer_8922978.exe"
wget -O vs_installer.exe "https://download.my.visualstudio.com/pr/en_visual_studio_professional_2015_with_update_3_x86_x64_web_installer_8922978.exe?t=9ee7a96d-ca80-4b84-af2c-7dd86996a0aa&e=1600103404&h=3cdea1e81c04aa4e846f5314972c46eb&su=1"
echo Install Visual Studio 2015 ...
:: /passive [silent install]
:: /norestart [no restart]
:: /NoRefresh [no refresh]
:: /InstallSelectableItems NativeLanguageSupport_Group [select Visual C++ for installing]
start /wait visual_installer.exe /passive /norestart /NoRefresh /InstallSelectableItems NativeLanguageSupport_Group
if %errorlevel% == 0 (
echo Install Visual Studio 2015 success!
) else (
echo Error***** Install Visual Studio 2015 failed, please re-install it manually.
)
del vs_installer.exe
goto :eof
:: ===== end step 4: Visual Studio 2015 =====
:: ===== start step 5: CUDA 10 =====
:cuda10
echo ">>>>>>>> step [5/7]: CUDA 10.0"
nvcc --version > nul 2> nul || call :install_cuda
goto java-jre
:install_cuda
echo There is not CUDA in this PC, will install CUDA-10.0.
echo Download package from "https://developer.download.nvidia.cn/compute/cuda/10.0/secure/Prod/network_installers/cuda_10.0.130_win10_network.exe"
wget -O cuda_installer.exe "https://developer.download.nvidia.cn/compute/cuda/10.0/secure/Prod/network_installers/cuda_10.0.130_win10_network.exe?hG7oBtA2CnxZG7d39onmBdtzrIa2cOukrmW8I0qk3h36vb2Sj0yYGjMElJlxlNhjx8Xu5RlbmdBhCWvP2QcEqMjCoKCXe5lOgr5uIIso_7LqrotgQHbZRZSVBYRT4bIAHPVSPrr4_4KczKvI9Nf3mbO9RJ2Vj6ECD5QphRMJBus0KKNVxO1gsplVL5qaCnE"
echo Install CUDA-10.0 ...
:: -s [silent install]
start /wait cuda_installer.exe -s
if %errorlevel% == 0 (
echo Install CUDA-10.0 success!
) else (
echo Error***** Install CUDA-10.0 failed, please re-install it manually.
)
del cuda_installer.exe
goto :eof
:: ===== end step 5: CUDA 10 =====
:: ===== start step 6: java jre =====
:java-jre
echo ">>>>>>>> step [6/7]: java jre"
goto xly-agent
:: ===== end step 6: java jre =====
:: ===== start step 7: xly agent =====
:xly-agent
echo ">>>>>>>> step [7/7]: xly agent"
goto :eof
:: ===== end step 8: xly agent =====
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册