“0bb9c80ef960d777c5937f8fed8ddf75f2ac6a18”上不存在“paddle/fluid/pybind/compatible.h”
提交 c6e0ee6d 编写于 作者: J jingqinghe
......@@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
# No unit test should exceed 2 minutes.
if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else()
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
endif()
......@@ -748,7 +748,7 @@ function(py_test TARGET_NAME)
endif()
if (APPLE OR WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
else()
# No unit test should exceed 2 minutes in Linux.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120)
......
......@@ -138,12 +138,17 @@ function(op_library TARGET)
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}")
# [ \t\r\n]* is used for blank characters
string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}")
if (one_register STREQUAL "")
string(REPLACE "_op" "" TARGET "${TARGET}")
else ()
string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
string(REPLACE "," "" TARGET "${TARGET}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}")
endif()
# pybind USE_NO_KERNEL_OP
......
......@@ -102,6 +102,8 @@ if(WITH_MKLDNN)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn)
pass_library(fc_mkldnn_pass inference DIR mkldnn)
pass_library(cpu_quantize_placement_pass base DIR mkldnn)
pass_library(cpu_quantize_pass inference DIR mkldnn)
......@@ -162,4 +164,6 @@ endif()
cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor)
cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass)
cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass)
cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass)
cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
endif ()
......@@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()(
return op;
}
PDNode *patterns::Bfloat16Placement::operator()(
const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
std::unordered_set<std::string> supported_op_types =
std::unordered_set<std::string>();
if (!bfloat16_enabled_op_types.empty()) {
supported_op_types = bfloat16_enabled_op_types;
}
auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types);
return op;
}
PDNode *patterns::OrphanedBfloat16::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"float32";
});
prev_op->LinksTo({prev_out});
op->LinksFrom({prev_out}).LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::LastBfloat16Ops::operator()() {
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
auto *op_out = pattern->NewNode(op_out_repr())->AsOutput();
auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();
next_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
op->LinksTo({op_out});
next_op->LinksFrom({op_out});
return next_op;
}
PDNode *patterns::FirstBfloat16Ops::operator()() {
auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
prev_op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") !=
"bfloat16";
});
auto *op_in = pattern->NewNode(op_in_repr())->AsOutput();
auto *op = pattern->NewNode(op_repr())->assert_is_op();
op->assert_more([&](Node *node) {
return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
"bfloat16";
});
prev_op->LinksTo({op_in});
op->LinksFrom({op_in});
return op;
}
PDNode *patterns::MKLDNNInPlace::operator()() {
const std::unordered_set<std::string> &supported_op_types = {
"abs",
......
......@@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase {
PATTERN_DECL_NODE(op);
};
struct Bfloat16Placement : public PatternBase {
Bfloat16Placement(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "bfloat16_placement") {}
PDNode* operator()(
const std::unordered_set<std::string>& bfloat16_enabled_op_types);
PATTERN_DECL_NODE(op);
};
struct OrphanedBfloat16 : public PatternBase {
OrphanedBfloat16(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "orphaned_bfloat16") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(prev_out);
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct LastBfloat16Ops : public PatternBase {
LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "last_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(op);
PATTERN_DECL_NODE(op_out);
PATTERN_DECL_NODE(next_op);
};
struct FirstBfloat16Ops : public PatternBase {
FirstBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "first_bfloat16_ops") {}
PDNode* operator()();
PATTERN_DECL_NODE(prev_op);
PATTERN_DECL_NODE(op_in);
PATTERN_DECL_NODE(op);
};
// Pattern used for enforcing inplace computation for in-place computation
// supporting DNNL ops. softmax, batch_norm and layer_norm
struct MKLDNNInPlace : public PatternBase {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void UnlinkNodes(ir::Node* a, ir::Node* b) {
a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
a->outputs.end());
b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
b->inputs.end());
}
void CPUBFloat16Pass::SetInputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::FirstBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"first_bfloat16_ops"};
bfloat16_ops();
int quantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_in, op_in, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
if (op->Op()->Type() != "conv2d" && prev_op->Op()->Type() != "quantize") {
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
// create a quantize op node
OpDesc q_desc;
q_desc.SetType("quantize");
q_desc.SetInput("Input", std::vector<std::string>({op_in->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("Scale", 1.f);
q_desc.SetAttr("bfloat16", true);
q_desc.SetAttr("output_format", Has("data_layout")
? Get<std::string>("data_layout")
: "NCHW");
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.
std::string op_input_name;
for (auto name : op->Op()->InputNames()) {
for (auto input_name : op->Op()->Input(name)) {
if (input_name == op_in->Name()) op_input_name = name;
}
}
PADDLE_ENFORCE_NE(
op_input_name.empty(), true,
platform::errors::NotFound(
"Operator before operator should have input as op output"));
op->Op()->SetInput(op_input_name,
std::vector<std::string>({quantize_out_node->Name()}));
UnlinkNodes(op_in, op);
IR_NODE_LINK_TO(op_in, quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
IR_NODE_LINK_TO(quantize_out_node, op);
quantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d quantize op before bfloat16 op",
quantize_counter);
}
void CPUBFloat16Pass::SetOutputDataType(ir::Graph* graph) const {
GraphPatternDetector gpd;
patterns::LastBfloat16Ops bfloat16_ops{gpd.mutable_pattern(),
"last_bfloat16_ops"};
bfloat16_ops();
int force_fp32_counter = 0, dequantize_counter = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(op_out, op_out, bfloat16_ops);
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, bfloat16_ops);
if ((op->Op()->HasAttr("force_fp32_output") ||
op->Op()->HasProtoAttr("force_fp32_output")) &&
!op->Op()->GetAttrIfExists<bool>("fuse_residual_connection")) {
op->Op()->SetAttr("force_fp32_output", true);
force_fp32_counter++;
} else if (op->Op()->Type() != "prior_box") {
// Create dequantize input variable
VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
// create a dequantize op node for output.
OpDesc deq_desc;
deq_desc.SetType("dequantize");
deq_desc.SetInput("Input",
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetOutput("Output", std::vector<std::string>({op_out->Name()}));
deq_desc.SetAttr("Scale", 1.0f);
auto dequantize_op = g->CreateOpNode(&deq_desc);
std::string op_output_name;
for (auto name : op->Op()->OutputNames()) {
for (auto output_name : op->Op()->Output(name)) {
if (output_name == op_out->Name()) op_output_name = name;
}
}
PADDLE_ENFORCE_NE(
op_output_name.empty(), true,
platform::errors::NotFound(
"Operator after operator should have input as op output"));
op->Op()->SetOutput(op_output_name, std::vector<std::string>(
{dequantize_in_node->Name()}));
UnlinkNodes(op, op_out);
IR_NODE_LINK_TO(op, dequantize_in_node);
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
IR_NODE_LINK_TO(dequantize_op, op_out);
dequantize_counter++;
}
};
gpd(graph, handler);
PrettyLogDetail("--- added %d dequantize op and used %d force_fp32_output",
dequantize_counter, force_fp32_counter);
}
void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const {
SetInputDataType(graph);
SetOutputDataType(graph);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class CPUBFloat16Pass : public Pass {
protected:
void SetInputDataType(ir::Graph* graph) const;
void SetOutputDataType(ir::Graph* graph) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, bool use_mkldnn,
const std::string& mkldnn_data_type = "float32",
const bool force_fp32_output = false) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("name", name);
if (type == "conv2d") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("force_fp32_output", force_fp32_output);
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "dropout") {
op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "fc") {
op->SetInput("Input", {inputs[0]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "concat") {
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "matmul" || type == "elementwise_add") {
op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]});
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
}
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
graph->reset(pass->Apply(graph->release()));
*original_nodes_num = (*graph)->Nodes().size();
(*graph).reset(pass->Apply((*graph).release()));
*current_nodes_num = (*graph)->Nodes().size();
}
static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
ProgramDesc BuildProgramDesc(bool use_mkldnn) {
ProgramDesc prog;
for (auto& v : variable_names) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "dropout", "Dropout1", {"z"}, {"a"}, use_mkldnn, "float32");
SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn, "bfloat16");
SetOp(&prog, "pool2d", "Pool1", {"b"}, {"c"}, use_mkldnn, "bfloat16");
SetOp(&prog, "conv2d", "Conv1", {"c"}, {"d"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout2", {"d"}, {"e"}, use_mkldnn, "float32");
SetOp(&prog, "transpose2", "Transpose1", {"e"}, {"f"}, use_mkldnn,
"bfloat16");
SetOp(&prog, "reshape2", "Reshape1", {"f"}, {"g"}, use_mkldnn, "bfloat16");
SetOp(&prog, "concat", "Concat1", {"g"}, {"h"}, use_mkldnn, "bfloat16");
SetOp(&prog, "dropout", "Dropout3", {"h"}, {"i"}, use_mkldnn, "float32");
return prog;
}
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
int transpose_count, int quant_count, int dequant_count,
int added_nodes_count) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names, &original_nodes_num,
&current_nodes_num);
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int conv2d_nodes_count = 0;
int pool2d_nodes_count = 0;
int transpose2_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "conv2d") {
conv2d_nodes_count++;
} else if (op->Type() == "pool2d") {
pool2d_nodes_count++;
} else if (op->Type() == "transpose2") {
transpose2_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(conv2d_nodes_count, conv_count);
EXPECT_EQ(pool2d_nodes_count, pool_count);
EXPECT_EQ(transpose2_nodes_count, transpose_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, quantize) {
bool use_mkldnn = true;
// 1 quantize + 1 dequantize
int added_nodes = 2;
MainTest(BuildProgramDesc(use_mkldnn), 2, 1, 1, 1, 2, added_nodes);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_pass);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void CPUBfloat16PlacementPass::SetMkldnnDataType(
ir::Graph* graph, int* bfloat16_operators) const {
const auto& op_types_list =
Get<std::unordered_set<std::string>>("bfloat16_enabled_op_types");
// set mkldnn_data_type to bfloat16 to all operators that are in
// bfloat16_enabled_op_types vector or they are included to Bfloat16Placement
// pattern
GraphPatternDetector gpd;
patterns::Bfloat16Placement bfloat16_placement_pattern{gpd.mutable_pattern(),
"bfloat16_placement"};
bfloat16_placement_pattern(op_types_list);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_placement_pattern);
if ((op->Op()->HasAttr("mkldnn_data_type") ||
op->Op()->HasProtoAttr("mkldnn_data_type")) &&
!platform::HasOpINT8DataType(op->Op())) {
op->Op()->SetAttr("mkldnn_data_type", std::string("bfloat16"));
(*bfloat16_operators)++;
}
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::RemoveOrhanedOperators(
ir::Graph* graph, int* bfloat16_operators) const {
// find orphaned bfloat16 operator that is between two float32 operators
// revert mkldnn_data_type attr to float32
GraphPatternDetector gpd;
patterns::OrphanedBfloat16 orphaned_bfloat16_pattern{gpd.mutable_pattern(),
"orphaned_bfloat16"};
orphaned_bfloat16_pattern();
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(op, op, orphaned_bfloat16_pattern);
op->Op()->SetAttr("mkldnn_data_type", std::string("float32"));
bfloat16_operators--;
};
gpd(graph, handler);
}
void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const {
int bfloat16_operators = 0;
SetMkldnnDataType(graph, &bfloat16_operators);
RemoveOrhanedOperators(graph, &bfloat16_operators);
PrettyLogDetail("--- marked %d operators to bfloat16 ",
bfloat16_operators);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(cpu_bfloat16_placement_pass,
paddle::framework::ir::CPUBfloat16PlacementPass)
// a vector of operator type names with bfloat16 support ("conv2d" etc.)
// the second param is the default value for this vector
.DefaultPassAttr("bfloat16_enabled_op_types",
new std::unordered_set<std::string>());
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
/*
* Specifies which operators should be run on bfloat16.
*/
class CPUBfloat16PlacementPass : public Pass {
protected:
void SetMkldnnDataType(ir::Graph* graph, int* bfloat16_operators) const;
void RemoveOrhanedOperators(ir::Graph* graph, int* bfloat16_operators) const;
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator mkldnn_data_type
// ---------------------------------------
// (a,b)->concat->c float32
// c->conv->f float32
// f->relu->g float32
// g->pool->h float32
// h->conv->k float32
// k->pool->l float32
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "f", "g", "h", "k", "l"})) {
prog.MutableBlock(0)->Var(v);
}
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"});
SetOp(&prog, "conv2d", "conv1", {"c"}, {"f"});
SetOp(&prog, "relu", "relu1", {"f"}, {"g"});
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"});
SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"});
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"});
return prog;
}
void MainTest(std::initializer_list<std::string> bfloat16_enabled_op_types,
unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
pass->Set("bfloat16_enabled_op_types",
new std::unordered_set<std::string>(bfloat16_enabled_op_types));
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass");
graph.reset(pass->Apply(graph.release()));
unsigned bfloat16_data_type_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
if (platform::HasOpBFLOAT16DataType(node->Op())) {
++bfloat16_data_type_count;
}
}
}
EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count);
}
TEST(Bfloat16PlacementPass, enable_all) {
MainTest({"conv2d", "pool2d", "relu", "concat"}, 6);
}
TEST(Bfloat16PlacementPass, enabled_conv_and_pool) {
// 2 conv2d + 2 pool2 - 1 orphaned conv2d
MainTest({"conv2d", "pool2d"}, 3);
}
TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); }
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(cpu_bfloat16_placement_pass);
......@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
......@@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS(transpose_flatten_concat_fuse_pass,
paddle::framework::ir::TransposeFlattenConcatFusePass);
REGISTER_PASS_CAPABILITY(transpose_flatten_concat_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("transpose", 0)
.EQ("transpose2", 0)
.EQ("flatten", 0)
.EQ("concat", 0)
.EQ("fusion_transpose_flatten_concat", 0));
......@@ -69,7 +69,8 @@ class OpInfo {
const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator's Creator has not been registered");
platform::errors::NotFound(
"Operator's Creator has not been registered."));
return creator_;
}
......@@ -79,11 +80,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL(
grad_op_maker_,
platform::errors::NotFound(
"Operator %s's GradOpMaker has not been "
"registered.\nPlease check whether %s_op has "
"grad_op.\nIf not, please set stop_gradient to True "
"registered.\nPlease check whether (%s) operator has "
"gradient operator.\nIf not, please set stop_gradient to be True "
"for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str());
type.c_str(), type.c_str()));
return grad_op_maker_;
}
......@@ -100,11 +102,12 @@ class OpInfo {
std::string type = proto_ ? proto_->type() : "unknown";
PADDLE_ENFORCE_NOT_NULL(
dygraph_grad_op_maker_,
platform::errors::NotFound(
"Operator %s's DygraphGradOpMaker has not been "
"registered.\nPlease check whether %s_op has "
"grad_op.\nIf not, please set stop_gradient to True "
"registered.\nPlease check whether (%s) operator has "
"gradient operator.\nIf not, please set stop_gradient to be True "
"for its input and output variables using var.stop_gradient=True.",
type.c_str(), type.c_str());
type.c_str(), type.c_str()));
return dygraph_grad_op_maker_;
}
......@@ -130,14 +133,17 @@ class OpInfoMap {
}
void Insert(const std::string& type, const OpInfo& info) {
PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
PADDLE_ENFORCE_NE(Has(type), true,
platform::errors::AlreadyExists(
"Operator (%s) has been registered.", type));
map_.insert({type, info});
}
const OpInfo& Get(const std::string& type) const {
auto op_info_ptr = GetNullable(type);
PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered",
type);
PADDLE_ENFORCE_NOT_NULL(
op_info_ptr,
platform::errors::NotFound("Operator (%s) is not registered.", type));
return *op_info_ptr;
}
......
......@@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const {
cur_loc += OpKernelType::kLibBits;
int customized_value = key.customized_type_value_;
PADDLE_ENFORCE(customized_value < (1 << OpKernelType::kCustomizeBits));
PADDLE_ENFORCE_LT(customized_value, (1 << OpKernelType::kCustomizeBits),
platform::errors::Unavailable(
"Too many custom OpKernel attribute values, expected "
"maximum value is %d, received value is %d.",
(1 << OpKernelType::kCustomizeBits), customized_value));
customized_value = customized_value << cur_loc;
cur_loc += OpKernelType::kCustomizeBits;
PADDLE_ENFORCE(cur_loc < 64);
PADDLE_ENFORCE_LT(cur_loc, 64,
platform::errors::Unavailable(
"Too many OpKernel attribute values, expected maximum "
"value is 64, received value is %d.",
cur_loc));
std::hash<int> hasher;
return hasher(place + data_type + data_layout + library_type +
......
......@@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name);
PADDLE_ENFORCE_EQ(
names.count(name), 0,
platform::errors::AlreadyExists("Attribute [%s] is duplicated.", name));
names.insert(name);
};
for (auto& attr : proto_->attrs()) {
......
......@@ -54,9 +54,10 @@ class Registrar {
template <typename... ARGS>
struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) {
if (OpInfoMap::Instance().Has(op_type)) {
PADDLE_THROW("'%s' is registered more than once.", op_type);
}
PADDLE_ENFORCE_EQ(
OpInfoMap::Instance().Has(op_type), false,
platform::errors::AlreadyExists(
"Operator '%s' is registered more than once.", op_type));
static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass");
OpInfo info;
......
......@@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
AddInput("input", "input of cosine op").AsDuplicable();
AddOutput("output", "output of cosine op").AsIntermediate();
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument(
"'test_attr' must be even!"));
};
AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker);
......
......@@ -152,10 +152,10 @@ class OpVersionRegistrar {
return instance;
}
OpVersion& Register(const std::string& op_type) {
if (op_version_map_.find(op_type) != op_version_map_.end()) {
PADDLE_THROW("'%s' is registered in operator version more than once.",
op_type);
}
PADDLE_ENFORCE_EQ(
op_version_map_.find(op_type), op_version_map_.end(),
platform::errors::AlreadyExists(
"'%s' is registered in operator version more than once.", op_type));
op_version_map_.insert({op_type, OpVersion()});
return op_version_map_[op_type];
}
......
......@@ -164,15 +164,20 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place);
PADDLE_THROW(platform::errors::Unavailable(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with CUDA support.",
place));
#else
auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
platform::SetDeviceId(dev_id);
#endif
} else if (platform::is_xpu_place(place)) {
#ifndef PADDLE_WITH_XPU
PADDLE_THROW(platform::errors::Unimplemented(
"Cannot run operator on place %s", place));
PADDLE_THROW(platform::errors::Unavailable(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with XPU support.",
place));
#else
auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device;
platform::SetXPUDeviceId(dev_id);
......@@ -214,7 +219,7 @@ std::string OperatorBase::Input(const std::string& name) const {
auto& ins = Inputs(name);
PADDLE_ENFORCE_LE(
ins.size(), 1UL,
platform::errors::AlreadyExists(
platform::errors::InvalidArgument(
"Operator %s's input %s should contain only one variable.", type_,
name));
return ins.empty() ? kEmptyVarName : ins[0];
......@@ -223,8 +228,10 @@ std::string OperatorBase::Input(const std::string& name) const {
const std::vector<std::string>& OperatorBase::Inputs(
const std::string& name) const {
auto it = inputs_.find(name);
PADDLE_ENFORCE(it != inputs_.end(), "Operator %s does not have the input %s.",
type_, name);
PADDLE_ENFORCE_NE(
it, inputs_.end(),
platform::errors::NotFound("Operator %s does not have the input %s.",
type_, name));
return it->second;
}
......@@ -238,17 +245,21 @@ bool OperatorBase::HasOutputs(const std::string& name) const {
std::string OperatorBase::Output(const std::string& name) const {
auto& outs = Outputs(name);
PADDLE_ENFORCE_LE(outs.size(), 1UL,
"Operator %s's output %s should contain only one variable.",
type_, name);
PADDLE_ENFORCE_LE(
outs.size(), 1UL,
platform::errors::InvalidArgument(
"Operator %s's output %s should contain only one variable.", type_,
name));
return outs.empty() ? kEmptyVarName : outs[0];
}
const std::vector<std::string>& OperatorBase::Outputs(
const std::string& name) const {
auto it = outputs_.find(name);
PADDLE_ENFORCE(it != outputs_.end(),
"Operator %s does not have an output called %s.", type_, name);
PADDLE_ENFORCE_NE(
it, outputs_.end(),
platform::errors::NotFound(
"Operator %s does not have an output called %s.", type_, name));
return it->second;
}
......@@ -391,16 +402,19 @@ void OperatorBase::CheckAllInputOutputSet() const {
for (auto& in : info_->Proto().inputs()) {
if (!in.dispensable()) {
PADDLE_ENFORCE(inputs_.find(in.name()) != inputs_.end(),
"Operator %s's input, %s, is not set", Type(), in.name());
PADDLE_ENFORCE_NE(
inputs_.find(in.name()), inputs_.end(),
platform::errors::NotFound("Operator %s's input (%s) is not set.",
Type(), in.name()));
}
}
for (auto& out : info_->Proto().outputs()) {
if (!out.dispensable()) {
PADDLE_ENFORCE(outputs_.find(out.name()) != outputs_.end(),
"Operator %s's output, %s, is not set", Type(),
out.name());
PADDLE_ENFORCE_NE(
outputs_.find(out.name()), outputs_.end(),
platform::errors::NotFound("Operator %s's output (%s) is not set.",
Type(), out.name()));
}
}
}
......@@ -428,8 +442,9 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
} else if (var.IsType<SelectedRows>()) {
return &(var.Get<SelectedRows>().value());
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
ToTypeName(var.Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable type is %s, expect LoDTensor or SelectedRows.",
ToTypeName(var.Type())));
}
}
......@@ -439,8 +454,9 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
} else if (var->IsType<SelectedRows>()) {
return var->GetMutable<SelectedRows>()->mutable_value();
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
ToTypeName(var->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Variable type is %s, expect LoDTensor or SelectedRows.",
ToTypeName(var->Type())));
}
}
......@@ -462,7 +478,7 @@ const Variable* ExecutionContext::InputVar(const std::string& name) const {
PADDLE_ENFORCE_LE(
it->second.size(), 1UL,
platform::errors::AlreadyExists(
platform::errors::InvalidArgument(
"Operator %s's input %s should contain only one variable.",
op_.Type(), name));
return it->second.empty() ? nullptr : it->second[0];
......@@ -472,9 +488,11 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const {
auto it = ctx_.outputs.find(name);
if (it == ctx_.outputs.end()) return nullptr;
PADDLE_ENFORCE_LE(it->second.size(), 1UL,
PADDLE_ENFORCE_LE(
it->second.size(), 1UL,
platform::errors::InvalidArgument(
"Operator %s's output %s should contain only one variable.",
op_.Type(), name);
op_.Type(), name));
return it->second.empty() ? nullptr : it->second[0];
}
......@@ -497,10 +515,11 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
std::transform(vars.begin(), vars.end(), std::back_inserter(res),
[&](const Variable* var) -> const Tensor* {
if (var == nullptr) return nullptr;
PADDLE_ENFORCE(
var->IsType<LoDTensor>(),
"should be LoDTensor, but the received type is %s",
ToTypeName(var->Type()));
PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"Input variable should be LoDTensor, "
"but the received type is %s.",
ToTypeName(var->Type())));
return &(var->Get<LoDTensor>());
});
return res;
......@@ -558,8 +577,10 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
const auto& in = it->second;
if (in.size() == 0) return false;
PADDLE_ENFORCE_EQ(in.size(), 1UL,
"Input %s should not have more than one inputs", name);
PADDLE_ENFORCE_EQ(
in.size(), 1UL,
platform::errors::InvalidArgument(
"Input %s should not contain more than one inputs.", name));
return in[0] != nullptr;
}
......@@ -574,8 +595,10 @@ class RuntimeInferShapeContext : public InferShapeContext {
if (out.size() == 0) {
return false;
}
PADDLE_ENFORCE_EQ(out.size(), 1UL,
"Output %s should not have more than one outputs", name);
PADDLE_ENFORCE_EQ(
out.size(), 1UL,
platform::errors::InvalidArgument(
"Output %s should not contain more than one outputs.", name));
return out[0] != nullptr;
}
......@@ -644,16 +667,31 @@ class RuntimeInferShapeContext : public InferShapeContext {
size_t j = 0) override {
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE(in_it != ctx_.inputs.end() && in_it->second.size() > i,
"Inputs %s should have %llu argument", in, i);
PADDLE_ENFORCE(out_it != ctx_.outputs.end() && out_it->second.size() > j,
"Outputs %s should have %llu argument", out, j);
PADDLE_ENFORCE_NE(
in_it, ctx_.inputs.end(),
platform::errors::NotFound("Input %s does not exist.", in));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second[i];
Variable* out_var = out_it->second[j];
PADDLE_ENFORCE(in_var->Type() == out_var->Type(),
"The type of %s and %s is not the same.", in, out);
PADDLE_ENFORCE_EQ(
in_var->Type(), out_var->Type(),
platform::errors::InvalidArgument(
"The type of input (%s) and output (%s) are inconsistent.", in,
out));
if (in_var->IsType<framework::SelectedRows>()) {
auto& in_sele_rows = in_var->Get<framework::SelectedRows>();
......@@ -666,9 +704,9 @@ class RuntimeInferShapeContext : public InferShapeContext {
auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>();
out_lod_tensor->Resize(in_lod_tensor.dims());
} else {
PADDLE_THROW(
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, the input type of ShareDim only can be LoDTensor "
"or SelectedRows.");
"or SelectedRows."));
}
}
......@@ -721,16 +759,30 @@ class RuntimeInferShapeContext : public InferShapeContext {
size_t j = 0) const override {
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE(in_it != ctx_.inputs.end() && in_it->second.size() > i,
"Inputs %s should have %llu argument", in, i);
PADDLE_ENFORCE(out_it != ctx_.outputs.end() && out_it->second.size() > j,
"Outputs %s should have %llu argument", out, j);
PADDLE_ENFORCE_NE(
in_it, ctx_.inputs.end(),
platform::errors::NotFound("Input %s does not exist.", in));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second.at(i);
if (!in_var->IsType<LoDTensor>()) return;
Variable* out_var = out_it->second.at(j);
PADDLE_ENFORCE(out_var->IsType<LoDTensor>(),
"The %d-th output of Output(%s) must be LoDTensor.", j, out);
PADDLE_ENFORCE_EQ(
out_var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"The %zu-th output of Output(%s) must be LoDTensor.", j, out));
auto& in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod());
......@@ -757,18 +809,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override {
PADDLE_THROW(
PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be "
"set in the runtime kernel.");
"set in the runtime kernel."));
}
void SetLoDLevel(const std::string& out, int32_t lod_level,
size_t j = 0) const override {
PADDLE_THROW(
PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be "
"set in the runtime kernel.");
"set in the runtime kernel."));
}
bool IsRuntime() const override { return true; }
......@@ -794,9 +846,11 @@ class RuntimeInferShapeContext : public InferShapeContext {
DDim GetInputDim(const std::string& name) const override {
const std::vector<Variable*>& vars = InputVars(name);
PADDLE_ENFORCE_EQ(vars.size(), 1UL,
"Input(%s) should hold one element, but now it holds %d",
name, vars.size());
PADDLE_ENFORCE_EQ(
vars.size(), 1UL,
platform::errors::InvalidArgument(
"Input(%s) should hold one element, but now it holds %zu elements.",
name, vars.size()));
return this->GetDim(vars[0]);
}
......@@ -817,9 +871,11 @@ class RuntimeInferShapeContext : public InferShapeContext {
void SetOutputDim(const std::string& name, const DDim& dim) override {
auto& vars = OutputVars(name);
PADDLE_ENFORCE_EQ(vars.size(), 1UL,
"Output(%s) should hold one element, but now it holds %d",
name, vars.size());
PADDLE_ENFORCE_EQ(
vars.size(), 1UL,
platform::errors::InvalidArgument("Output(%s) should hold one element, "
"but now it holds %zu elements.",
name, vars.size()));
SetDim(vars[0], dim);
}
......@@ -831,16 +887,17 @@ class RuntimeInferShapeContext : public InferShapeContext {
protected:
DDim GetDim(Variable* var) const {
PADDLE_ENFORCE_NOT_NULL(var);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::InvalidArgument("Input variable is nullptr."));
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().GetCompleteDims();
} else {
PADDLE_THROW(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s.",
ToTypeName(var->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Only LoDTensor or SelectedRows support 'GetDim', but input "
"Variable's type is %s.",
ToTypeName(var->Type())));
}
}
......@@ -853,7 +910,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
std::vector<DDim> GetRepeatedDims(const std::string& name) const override {
PADDLE_THROW("Only compile time support this method");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetRepeatedDims method only ban be used in compile time."));
}
void SetDim(Variable* var, const DDim& dim) {
......@@ -862,15 +920,22 @@ class RuntimeInferShapeContext : public InferShapeContext {
} else if (var->IsType<SelectedRows>()) {
var->GetMutable<SelectedRows>()->set_height(dim[0]);
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
ToTypeName(var->Type()));
PADDLE_THROW(platform::errors::Unimplemented(
"Variable type error, expect LoDTensor or SelectedRows, but received "
"(%s).",
ToTypeName(var->Type())));
}
}
void SetDims(const std::vector<Variable*>& vars,
const std::vector<DDim>& dims) {
size_t length = vars.size();
PADDLE_ENFORCE_EQ(length, dims.size());
PADDLE_ENFORCE_EQ(length, dims.size(),
platform::errors::InvalidArgument(
"The number of input variables do not match the "
"number of input dimensions, the number of variables "
"is %zu, the number of dimensions is %zu.",
length, dims.size()));
for (size_t i = 0; i < length; ++i) {
if (vars[i] == nullptr) {
continue;
......@@ -881,7 +946,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
void SetRepeatedDims(const std::string& name,
const std::vector<DDim>& dims) override {
PADDLE_THROW("Only compile time support this method");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetRepeatedDims method only can be used in compile time."));
}
std::vector<proto::VarType::Type> GetVarTypes(
......@@ -901,16 +967,19 @@ class RuntimeInferShapeContext : public InferShapeContext {
private:
const std::vector<Variable*>& InputVars(const std::string& name) const {
auto it = ctx_.inputs.find(name);
PADDLE_ENFORCE(it != ctx_.inputs.end(),
"Operator %s does not have the input %s.", op_.Type(), name);
PADDLE_ENFORCE_NE(
it, ctx_.inputs.end(),
platform::errors::NotFound(
"Operator (%s) does not have the input (%s).", op_.Type(), name));
return it->second;
}
const std::vector<Variable*>& OutputVars(const std::string& name) const {
auto it = ctx_.outputs.find(name);
PADDLE_ENFORCE(it != ctx_.outputs.end(),
"Operator %s does not have the outputs %s.", op_.Type(),
name);
PADDLE_ENFORCE_NE(
it, ctx_.outputs.end(),
platform::errors::NotFound(
"Operator (%s) does not have the outputs (%s).", op_.Type(), name));
return it->second;
}
......@@ -928,10 +997,14 @@ static void CheckTensorNANOrInf(const std::string& op_type,
tensor.type() != proto::VarType::FP64) {
return;
}
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
"Operator %s output Tensor %s contains Inf", op_type, name);
PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor),
"Operator %s output Tensor %s contains NAN", op_type, name);
PADDLE_ENFORCE_NE(
framework::TensorContainsInf(tensor), true,
platform::errors::Fatal("Operator %s output Tensor %s contains Inf.",
op_type, name));
PADDLE_ENFORCE_NE(
framework::TensorContainsNAN(tensor), true,
platform::errors::Fatal("Operator %s output Tensor %s contains NAN.",
op_type, name));
}
void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
......@@ -1074,10 +1147,11 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
// check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_);
if (kernels_iter == all_op_kernels.end()) {
PADDLE_THROW(
"There are no kernels which are registered in the %s operator.", type_);
}
PADDLE_ENFORCE_NE(
kernels_iter, all_op_kernels.end(),
platform::errors::Unavailable(
"There are no kernels which are registered in the %s operator.",
type_));
OpKernelMap& kernels = kernels_iter->second;
......@@ -1131,10 +1205,10 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
if (kernel_iter == kernels.end()) {
PADDLE_THROW("op %s does not have kernel for %s", type_,
KernelTypeToString(expected_kernel_key));
}
PADDLE_ENFORCE_NE(kernel_iter, kernels.end(),
platform::errors::NotFound(
"Operator (%s) does not have kernel for %s.", type_,
KernelTypeToString(expected_kernel_key)));
std::lock_guard<std::mutex> lock(cache_update_mutex_);
if (kernel_type_.get() == nullptr || kernel_func_.get() == nullptr) {
......@@ -1149,13 +1223,14 @@ void OperatorWithKernel::TransferInplaceVarsBack(
for (auto& var_name : inplace_vars) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
auto* origin_var = scope.FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(origin_var, "The var[%s] should not be nullptr.",
var_name);
PADDLE_ENFORCE_NOT_NULL(origin_var,
platform::errors::InvalidArgument(
"The variable[%s] is nullptr.", var_name));
auto* original_tensor =
GetMutableLoDTensorOrSelectedRowsValueFromVar(origin_var);
auto* var = transfer_scope.FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var, "The var[%s] should not be nullptr.",
var_name);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument(
"The variable[%s] is nullptr.", var_name));
auto* transformed_tensor = GetLoDTensorOrSelectedRowsValueFromVar(*var);
auto original_dims = original_tensor->dims();
original_tensor->ShareDataWith(*transformed_tensor);
......@@ -1380,9 +1455,11 @@ proto::VarType::Type OperatorWithKernel::IndicateVarDataType(
ParseInputDataType(ctx, name, &data_type);
PADDLE_ENFORCE_NE(
data_type, dafault_data_type,
"The Input Variable(%s) of %s Op used to determine kernel data type "
"is empty or not LoDTensor or SelectedRows or LoDTensorArray.",
name, Type());
platform::errors::InvalidArgument(
"The Input Variable(%s) of (%s) Operator used to determine kernel "
"data type is empty or not LoDTensor or SelectedRows or "
"LoDTensorArray.",
name, Type()));
return data_type;
}
......
......@@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) {
EXPECT_TRUE(
ex_msg.find(
"The Input Variable(Other) of "
"indicate_other_data_type_test Op used to "
"(indicate_other_data_type_test) Operator used to "
"determine kernel data type "
"is empty or not LoDTensor or SelectedRows or LoDTensorArray") !=
"is empty or not LoDTensor or SelectedRows or LoDTensorArray.") !=
std::string::npos);
}
ASSERT_TRUE(caught);
......
......@@ -20,7 +20,10 @@ namespace framework {
void ReaderBase::ReadNext(std::vector<LoDTensor> *out) {
std::lock_guard<std::mutex> lock(mu_);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning);
PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning,
platform::errors::Unavailable(
"The current reader has stopped running and cannot "
"continue to read the next batch of data."));
ReadNextImpl(out);
}
......
......@@ -32,17 +32,21 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
"acquire read lock failed");
PADDLE_ENFORCE_EQ(
pthread_rwlock_rdlock(&lock_), 0,
platform::errors::External("The pthread failed to acquire read lock."));
}
inline void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed");
platform::errors::External(
"The pthread failed to acquire write lock."));
}
inline void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
PADDLE_ENFORCE_EQ(
pthread_rwlock_unlock(&lock_), 0,
platform::errors::External("The pthread failed to unlock."));
}
private:
......
......@@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) {
VLOG(5) << "Can't read [" << length << "] from file"
<< "file seems breakem";
PADDLE_THROW("Model load error, file seems breaken");
PADDLE_THROW(platform::errors::Unavailable(
"Model load failed, istream state error."));
}
}
......@@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) {
sizeof(char) * tensor_number_mark.size());
std::string str_read_tensor_number_mark(tensor_number_mark_buffer,
tensor_number_mark.size());
PADDLE_ENFORCE_EQ(
tensor_number_mark, str_read_tensor_number_mark,
"Tensor number mark not match, expect [%s], but read from file is [%]",
tensor_number_mark, str_read_tensor_number_mark);
PADDLE_ENFORCE_EQ(tensor_number_mark, str_read_tensor_number_mark,
platform::errors::InvalidArgument(
"Tensor number mark does not match, expect mark is "
"[%s], but the mark read from file is [%s].",
tensor_number_mark, str_read_tensor_number_mark));
size_t tensor_number = 0;
istre.read(reinterpret_cast<char*>(&tensor_number), sizeof(tensor_number));
......@@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) {
std::string str_read_tensor_name_mark(name_mark_buffer,
tensor_name_mark.size());
PADDLE_ENFORCE_EQ(
tensor_name_mark, str_read_tensor_name_mark,
"Tensor name mark not match, expect [%s], but read from file is [%]",
tensor_name_mark, str_read_tensor_name_mark);
PADDLE_ENFORCE_EQ(tensor_name_mark, str_read_tensor_name_mark,
platform::errors::InvalidArgument(
"Tensor name mark does not match, expect mark is [%s], "
"but the mark read from file is [%s].",
tensor_name_mark, str_read_tensor_name_mark));
size_t tensor_name_length = 0;
istre.read(reinterpret_cast<char*>(&tensor_name_length),
......@@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Variable find error, when save model, can't not find vairable [%s], "
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
var_ptr, platform::errors::NotFound("Variable (%s) is not found when "
"saving model, please make sure "
"that exe.run(startup_program) has "
"been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, please make sure "
"that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
map_tensor[vec_tensor_name_list[i]] = tensor;
}
......@@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk(
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed,"
"Please make sure you have run StartUpProgram",
vec_var_base_list[i]->Name());
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, please make sure "
"that exe.run(startup_program) has been executed.",
vec_var_base_list[i]->Name()));
map_tensor[vec_var_base_list[i]->Name()] = tensor;
}
......@@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk(
for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) {
auto it = map_load_tensor.find(vec_tensor_name_list[i]);
PADDLE_ENFORCE(it != map_load_tensor.end(),
"Paramete not found in Model file, "
"Can not find [%s] in model file [%s]",
vec_tensor_name_list[i], file_name);
PADDLE_ENFORCE_NE(it, map_load_tensor.end(),
platform::errors::NotFound(
"Parameter (%s) not found in model file (%s).",
vec_tensor_name_list[i], file_name));
auto var_ptr = scope.FindVar(vec_tensor_name_list[i]);
PADDLE_ENFORCE_NE(
var_ptr, nullptr,
"Parameter not created, when load model, can't not find parameter [%s] "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
var_ptr,
platform::errors::PreconditionNotMet(
"Parameter (%s) is not created when loading model, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
Tensor* tensor = var_ptr->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NE(tensor, nullptr,
"Paramter [%s] not initialzed "
"please make sure you have run startUpProgram",
vec_tensor_name_list[i]);
PADDLE_ENFORCE_NOT_NULL(
tensor,
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has been executed.",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true,
"Paramter [%s] not initialzed "
"please make sure you have run StartUpProgram",
vec_tensor_name_list[i]);
platform::errors::PreconditionNotMet(
"Paramter [%s] is not initialzed, "
"please make sure that exe.run(startup_program) has "
"been executed.v",
vec_tensor_name_list[i]));
PADDLE_ENFORCE_EQ(
tensor->dims(), it->second->dims(),
"Shape not matching: the Program requires a parameter with a shape of "
"(%s), "
"while the loaded parameter (namely [ %s ]) has a shape of (%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims());
platform::errors::InvalidArgument(
"Shape does not match, the program requires a parameter with a "
"shape of "
"(%s), while the loaded parameter (namely [ %s ]) has a shape of "
"(%s).",
tensor->dims(), vec_tensor_name_list[i], it->second->dims()));
TensorCopySync(*(it->second.get()), tensor->place(), tensor);
......@@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name,
MkDirRecursively(DirName(file_name).c_str());
std::ofstream fout(file_name, std::ios::binary);
if (!fout) {
PADDLE_THROW("File open error. Can not open file [%s]", file_name);
}
PADDLE_ENFORCE_EQ(
fout.is_open(), true,
platform::errors::Unavailable("File (%s) open failed.", file_name));
// first 256 byte for reserve for fulture upgrade
char* kReserveBuffer = new char[model_file_reserve_size];
......@@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name,
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
data_ptr = temp.data<void>();
#else
PADDLE_THROW(
"Tensor is in CUDA device, but paddle not compile with CUDA, this "
"should not happen");
PADDLE_THROW(platform::errors::Unavailable(
"Tensor is in CUDA device, but paddle not compiled with CUDA."));
#endif
}
fout.write(static_cast<const char*>(data_ptr),
......@@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name,
}
if (!fout) {
PADDLE_THROW("Model save failed, data write to model file [%s] error",
file_name);
PADDLE_THROW(platform::errors::Unavailable(
"Model save failed, error when writing data into model file [%s].",
file_name));
}
fout.close();
......@@ -316,9 +329,9 @@ bool LoadTensorFromDisk(
std::map<std::string, std::shared_ptr<Tensor>>* map_tensor) {
std::ifstream fin(file_name, std::ios::binary);
if (!fin) {
PADDLE_THROW("File open error. Can not open model file [%s]", file_name);
}
PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::Unavailable("File (%s) open failed.", file_name));
ReadReserveBuffer(fin);
......@@ -331,7 +344,8 @@ bool LoadTensorFromDisk(
uint32_t version;
fin.read(reinterpret_cast<char*>(&version), sizeof(version));
CheckInStreamState(fin, sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument(
"Only version 0 tensor is supported."));
proto::VarType::TensorDesc desc;
{
// int32_t size
......@@ -344,7 +358,7 @@ bool LoadTensorFromDisk(
CheckInStreamState(fin, sizeof(size));
PADDLE_ENFORCE_EQ(
desc.ParseFromArray(buf.get(), size), true,
platform::errors::InvalidArgument("Cannot parse tensor desc"));
platform::errors::InvalidArgument("Parse tensor desc failed."));
}
{ // read tensor
......
......@@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
// the 1st field, unit32_t version for SelectedRows
uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
PADDLE_ENFORCE_EQ(version, 0U,
platform::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
}
{
// the 2st field, rows information
......@@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
rwlock_->UNLock();
if (!auto_grown) {
PADDLE_THROW("key %d not found", key);
}
PADDLE_ENFORCE_EQ(
auto_grown, true,
platform::errors::NotFound("Input key(%lld) is not found.", key));
rwlock_->WRLock();
auto map_size = id_to_index_.size();
auto vector_size = rows_.size();
if (map_size != vector_size) {
rwlock_->UNLock();
PADDLE_THROW(
"id_to_index_ size %d should have the same size with rows_ %d",
map_size, vector_size);
PADDLE_THROW(platform::errors::InvalidArgument(
"Row map size(%zu) should be equal to rows size(%zu).", map_size,
vector_size));
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
int row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
PADDLE_THROW(platform::errors::InvalidArgument(
"Selected rows is full, then length exceed the length of first "
"dimension (%d).",
row_num));
}
// key logic to put a key into id_to_index_
rows_.push_back(key);
......@@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() {
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown, bool is_test) {
PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized.");
PADDLE_ENFORCE_EQ(value->IsInitialized(), true,
platform::errors::InvalidArgument(
"The value tensor is not initialized."));
if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!";
} else {
int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
"output tensor should have the same shape with table "
"except the dims[0].");
PADDLE_ENFORCE_EQ(
value_width, value->numel() / value->dims()[0],
platform::errors::InvalidArgument(
"Output tensor should have the same shape with table "
"except the first dimmension, excepted value width not counting "
"the first dimension is %d, actual value width is %d.",
value_width, value->numel() / value->dims()[0]));
for (int i = 0; i < ids.numel(); ++i) {
auto id = ids.data<int64_t>()[i];
int64_t index = AutoGrownIndex(id, auto_grown, is_test);
......
......@@ -82,7 +82,8 @@ class SelectedRows {
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
PADDLE_THROW("id %s not in table", key);
PADDLE_THROW(platform::errors::NotFound(
"Input id (%lld) is not in current rows table.", key));
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
......
......@@ -25,20 +25,22 @@ namespace framework {
std::vector<DDim> InferShapeContext::GetReaderDims(
const std::string &name) const {
const std::vector<std::string> &arg_names = Inputs(name);
PADDLE_ENFORCE_EQ(
arg_names.size(), 1UL,
"Reader input '%s' should hold one element, but now it holds %d", name,
arg_names.size());
PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
platform::errors::InvalidArgument(
"Reader input '%s' should hold one element, but now it "
"holds %d elements.",
name, arg_names.size()));
return this->GetRepeatedDims(arg_names[0]);
}
void InferShapeContext::SetReaderDims(const std::string &name,
const std::vector<DDim> &dims) {
const std::vector<std::string> &arg_names = Outputs(name);
PADDLE_ENFORCE_EQ(
arg_names.size(), 1UL,
"Reader output '%s' should hold one element, but now it holds %d", name,
arg_names.size());
PADDLE_ENFORCE_EQ(arg_names.size(), 1UL,
platform::errors::InvalidArgument(
"Reader output '%s' should hold one element, but now "
"it holds %d elements.",
name, arg_names.size()));
return this->SetRepeatedDims(arg_names[0], dims);
}
......
......@@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Source place and context place do not match, source "
"place is %s, context place is %s.",
src_gpu_place, ctx_gpu_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
......@@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place,
platform::errors::Unavailable(
"Destination place and context place do not match, "
"destination place is %s, context place is %s.",
dst_gpu_place, ctx_gpu_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
......@@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place);
auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true);
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx_place), true,
platform::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
ctx_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
......@@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
PADDLE_THROW(platform::errors::Unavailable(
"Context place dose not match the source and destination place."));
}
}
}
else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
PADDLE_THROW(platform::errors::Unimplemented(
"Copying from %s to %s is not supported.", src_place, dst_place));
}
#endif
}
......@@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
nullptr);
}
else { // NOLINT
PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
}
......@@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
const platform::Place& dst_place) {
// vector types not currently supported
PADDLE_ENFORCE_LE(type.lanes, 1, "vector types not currently supported");
PADDLE_ENFORCE_LE(type.lanes, 1,
platform::errors::Unimplemented(
"Vector type is not supported currently."));
switch (type.bits) {
case 8:
......@@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst,
return static_cast<void*>(dst->mutable_data<int8_t>(dst_place));
if (type.code == kDLUInt)
return static_cast<void*>(dst->mutable_data<uint8_t>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 16:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int16_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(
dst->mutable_data<paddle::platform::float16>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 32:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int32_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<float>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
case 64:
if (type.code == kDLInt)
return static_cast<void*>(dst->mutable_data<int64_t>(dst_place));
if (type.code == kDLFloat)
return static_cast<void*>(dst->mutable_data<double>(dst_place));
PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.",
type.code, type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"DLDataType code <%d> is illegal when DLDataType.bits is <%d>.",
type.code, type.bits));
default:
PADDLE_THROW("Unsupport type.bits %d", type.bits);
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported DLDataType.bits %d.", type.bits));
}
}
......
......@@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true);
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(src.place()), true,
platform::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
src.place()));
memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);
......
......@@ -27,8 +27,9 @@ Analyzer::Analyzer() {}
void Analyzer::Run(Argument *argument) { RunAnalysis(argument); }
void Analyzer::RunAnalysis(Argument *argument) {
PADDLE_ENFORCE(argument->analysis_passes_valid(),
"analsis_passes is not valid in the argument.");
PADDLE_ENFORCE_EQ(argument->analysis_passes_valid(), true,
platform::errors::InvalidArgument(
"analsis_passes is not valid in the argument."));
const bool disable_logs = argument->disable_logs();
for (auto &pass : argument->analysis_passes()) {
if (!disable_logs) {
......@@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) {
continue;
auto *ptr = PassRegistry::Global().Retreive(pass);
PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass);
PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet(
"no analysis pass called %s", pass));
ptr->Run(argument);
}
}
......
......@@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) {
std::vector<PaddleTensor> outputs;
CHECK(predictor->Run(slots, &outputs));
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
PADDLE_ENFORCE_EQ(outputs.size(), 1UL,
platform::errors::PreconditionNotMet(
"Output size should be 1, but got %d", outputs.size()));
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL);
PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL,
platform::errors::PreconditionNotMet(
"Output's data length should be 33168 but got %d",
outputs.front().data.length()));
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
......
......@@ -79,7 +79,9 @@ struct Argument {
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE(Has(#field__), "There is no such field"); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return field__##_; \
} \
void Set##Field(const type__& x) { \
......@@ -98,8 +100,11 @@ struct Argument {
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
public: \
type__& field__() { \
PADDLE_ENFORCE_NOT_NULL(field__##_); \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \
"filed should not be null.")); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return *static_cast<type__*>(field__##_.get()); \
} \
void Set##Field(type__* x) { \
......@@ -113,11 +118,15 @@ struct Argument {
} \
DECL_ARGUMENT_FIELD_VALID(field__); \
type__* field__##_ptr() { \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
return static_cast<type__*>(field__##_.get()); \
} \
type__* Release##Field() { \
PADDLE_ENFORCE(Has(#field__)); \
PADDLE_ENFORCE_EQ( \
Has(#field__), true, \
platform::errors::PreconditionNotMet("There is no such field")); \
valid_fields_.erase(#field__); \
return static_cast<type__*>(field__##_.release()); \
} \
......@@ -227,8 +236,10 @@ struct Argument {
};
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
PADDLE_ENFORCE(argument__->Has(#fieldname__), \
"the argument field [%s] should be set", #fieldname__);
PADDLE_ENFORCE_EQ( \
argument__->Has(#fieldname__), true, \
platform::errors::PreconditionNotMet( \
"the argument field [%s] should be set", #fieldname__));
} // namespace analysis
} // namespace inference
......
......@@ -73,12 +73,15 @@ struct DataTypeNamer {
template <typename T>
const std::string &repr() const {
auto x = std::type_index(typeid(T));
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation");
PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(x);
}
const std::string &repr(const std::type_index &type) const { // NOLINT
PADDLE_ENFORCE(dic_.count(type), "unknown type for representation");
PADDLE_ENFORCE_GT(dic_.count(type), 0,
platform::errors::PreconditionNotMet(
"unknown type for representation"));
return dic_.at(type);
}
......@@ -116,7 +119,9 @@ template <typename T>
class OrderedRegistry {
public:
T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name);
PADDLE_ENFORCE_EQ(dic_.count(name), 0,
platform::errors::PreconditionNotMet(
"There exists duplicate key [%s]", name));
dic_[name] = elements_.size();
elements_.emplace_back(std::unique_ptr<T>(x));
return elements_.back().get();
......@@ -136,14 +141,20 @@ class OrderedRegistry {
template <typename T>
T &GetFromScope(const framework::Scope &scope, const std::string &name) {
framework::Variable *var = scope.FindVar(name);
PADDLE_ENFORCE(var != nullptr);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::PreconditionNotMet(
"The var which name is %s should not be nullptr.", name));
return *var->GetMutable<T>();
}
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string &model_path) {
std::ifstream fin(model_path, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(fin.is_open(), "Cannot open file %s", model_path);
PADDLE_ENFORCE_EQ(
fin.is_open(), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file exists",
model_path));
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
......@@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) {
static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) {
std::string opt_cache_dir = model_root + "/_opt_cache/";
if (!PathExists(opt_cache_dir)) {
PADDLE_ENFORCE(MKDIR(opt_cache_dir.c_str()) != -1,
PADDLE_ENFORCE_NE(
MKDIR(opt_cache_dir.c_str()), -1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
opt_cache_dir);
opt_cache_dir));
}
return opt_cache_dir;
}
......
......@@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) {
graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
if (argument->Has("scope")) {
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
......@@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument,
std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid,
PADDLE_ENFORCE_EQ(
int8_valid, true,
platform::errors::PreconditionNotMet(
"When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
"config.SetOptimCacheDir()");
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
"config.SetOptimCacheDir()"));
PADDLE_ENFORCE_EQ(
!(model_from_memory && use_static_engine), true,
platform::errors::PreconditionNotMet(
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false.");
"from memory, you should set the use_static to false."));
if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
......
......@@ -123,7 +123,9 @@ void RenameAndGetOutputs(
auto add_block_var = [&](const std::string &graph_arg,
const std::string &block_arg) {
auto arg_var_node = graph_var_map.find(graph_arg);
PADDLE_ENFORCE(arg_var_node != graph_var_map.end());
PADDLE_ENFORCE_NE(arg_var_node, graph_var_map.end(),
platform::errors::InvalidArgument(
"Can not find %s in graph_var_map", graph_arg));
auto *var_t = block_desc->Var(block_arg);
var_t->SetShape(arg_var_node->second->Var()->GetShape());
var_t->SetDataType(arg_var_node->second->Var()->GetDataType());
......@@ -133,7 +135,10 @@ void RenameAndGetOutputs(
framework::proto::OpDesc *op = block_desc->Op(index)->Proto();
framework::OpDesc op_desc(*op, nullptr);
auto correspond_node = subgraph_nodes[index];
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type());
PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type(),
platform::errors::PreconditionNotMet(
"We should get %s, but get %s", op->type(),
correspond_node->Name()));
std::unordered_map<std::string, size_t> var2id;
std::unordered_map<std::string, framework::ir::Node *> in_vars;
......
......@@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::vector<std::string> *repetitive_params) const {
auto *op_desc = node->Op();
auto &subgraph = *framework::ir::Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty());
PADDLE_ENFORCE_EQ(subgraph.empty(), false,
platform::errors::PreconditionNotMet(
"The subgraph should not be empty."));
framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program");
......@@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// to Tensor.
std::vector<std::string> output_mapping;
for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0);
PADDLE_ENFORCE_NE(output_name_map.count(name), 0,
platform::errors::PreconditionNotMet(
"The output_name_map should have %s", name));
output_mapping.push_back(output_name_map[name]);
}
PADDLE_ENFORCE(!output_mapping.empty());
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
"the block has no var-desc");
PADDLE_ENFORCE_EQ(output_mapping.empty(), false,
platform::errors::PreconditionNotMet(
"The output_mapping should not be empty."));
PADDLE_ENFORCE_EQ(
!block_desc.Proto()->vars().empty(), true,
platform::errors::PreconditionNotMet("the block has no var-desc"));
// Set attrs
op_desc->SetType("tensorrt_engine");
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include <memory>
#include <utility>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
......@@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
// Apply passes.
IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph));
PADDLE_ENFORCE_GT(graph->Nodes().size(), 0);
PADDLE_ENFORCE_GT(
graph->Nodes().size(), 0,
platform::errors::PreconditionNotMet(
"The graph nodes size should be greater than 0, but got 0"));
argument->SetMainGraph(graph.release());
CollectFusionStatis(argument);
}
......
......@@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if (!argument->scope_valid()) {
argument->SetScope(new framework::Scope);
}
PADDLE_ENFORCE(argument->use_gpu_valid());
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
// The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying
......@@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument->model_from_memory_valid() && argument->model_from_memory());
argument->SetMainProgram(program.release());
} else {
PADDLE_THROW(
"either model_dir or (program path and parameter path) should be set.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"either model_dir or (program path and parameter path) should be "
"set."));
}
auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program()));
argument->SetMainGraph(graph.release());
auto *scope_ptr = argument->scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr,
platform::errors::PreconditionNotMet(
"The scope ptr should not be nullptr."));
argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
}
......
......@@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) {
std::unordered_set<const framework::ir::Node*> invalid_nodes;
int valid_op = 0;
for (auto* node : graph.Nodes()) {
PADDLE_ENFORCE_NOT_NULL(node);
PADDLE_ENFORCE_NOT_NULL(node, platform::errors::PreconditionNotMet(
"The node should not be nullptr."));
if (is_valid_node(node)) {
invalid_nodes.insert(node);
} else if (node->IsOp()) {
......
......@@ -23,8 +23,12 @@ namespace inference {
namespace analysis {
void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE(argument->scope_valid());
PADDLE_ENFORCE(argument->use_gpu_valid());
PADDLE_ENFORCE_EQ(
argument->scope_valid(), true,
platform::errors::PreconditionNotMet("The scope field should be valid"));
PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true,
platform::errors::PreconditionNotMet(
"The use_gpu field should be valid"));
platform::Place place;
......@@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
LOG(INFO) << "Sync params from CPU to GPU";
PADDLE_ENFORCE(argument->gpu_device_id_valid());
PADDLE_ENFORCE_EQ(argument->gpu_device_id_valid(), true,
platform::errors::PreconditionNotMet(
"The gpu_device_id field should be valid"));
place = platform::CUDAPlace(argument->gpu_device_id());
auto *scope = argument->scope_ptr();
......@@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
continue;
}
auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE(var != nullptr);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"The var should not be nullptr"));
if (var->IsType<framework::LoDTensor>() ||
var->IsType<framework::Tensor>()) {
auto *t = var->GetMutable<framework::LoDTensor>();
......
......@@ -224,7 +224,9 @@ void UpdateOpDescsByReuse(
// modify the graph
for (auto input_node : node->inputs) {
PADDLE_ENFORCE(input_node->IsVar());
PADDLE_ENFORCE_EQ(input_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The input node should be a variable."));
std::string input_node_name = input_node->Name();
if (reuse_table.count(input_node_name) &&
reuse_table.at(input_node_name) != input_node_name) {
......@@ -246,7 +248,9 @@ void UpdateOpDescsByReuse(
// modify the graph
for (auto out_node : node->outputs) {
PADDLE_ENFORCE(out_node->IsVar());
PADDLE_ENFORCE_EQ(out_node->IsVar(), true,
platform::errors::PreconditionNotMet(
"The output node should be a variable."));
std::string out_node_name = out_node->Name();
if (reuse_table.count(out_node_name) &&
reuse_table.at(out_node_name) != out_node_name) {
......
......@@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() {
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet.");
platform::errors::PreconditionNotMet(
"MkldnnQuantizer was not enabled yet."));
return mkldnn_quantizer_config_.get();
}
......
......@@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope(
if (parent_scope) {
PADDLE_ENFORCE_NOT_NULL(
parent_scope,
"Both program and parent_scope should be set in Clone mode.");
platform::errors::PreconditionNotMet(
"Both program and parent_scope should be set in Clone mode."));
scope_ = parent_scope;
status_is_cloned_ = true;
} else {
......@@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare(sub_scope_, *inference_program_, 0,
config_.use_feed_fetch_ops_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
return true;
}
......@@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
timer.tic();
// set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(scope, "The scope should not be nullptr.");
PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
......@@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
......@@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() {
if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir());
} else {
PADDLE_ENFORCE(
!config_.params_file().empty(),
"Either model_dir or (param_file, prog_file) should be set.");
PADDLE_ENFORCE(!config_.prog_file().empty());
PADDLE_ENFORCE_EQ(config_.params_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or param_file should be set."));
PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false,
platform::errors::PreconditionNotMet(
"Either model_dir or prog_file should be set."));
std::string dir = inference::analysis::GetDirRoot(config_.prog_file());
argument_.SetModelProgramPath(config_.prog_file());
......@@ -503,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
PrepareArgument();
Analyzer().Run(&argument_);
PADDLE_ENFORCE(argument_.scope_valid());
PADDLE_ENFORCE_EQ(
argument_.scope_valid(), true,
platform::errors::InvalidArgument("The argument scope should be valid."));
VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program);
inference_program_.reset(
......@@ -525,8 +537,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
FLAGS_minloglevel = 2; // GLOG_ERROR
}
VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE(config.is_valid(),
"Note: Each config can only be used for one predictor.");
PADDLE_ENFORCE_EQ(
config.is_valid(), true,
platform::errors::InvalidArgument(
"Note: Each config can only be used for one predictor."));
if (config.use_gpu()) {
static std::once_flag gflags_initialized;
......@@ -623,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() {
}
void AnalysisPredictor::PrepareFeedFetch() {
PADDLE_ENFORCE_NOT_NULL(sub_scope_);
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::InvalidArgument(
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
......@@ -646,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() {
}
void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope);
PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument(
"The scope should not be nullptr."));
auto *var = scope->Var("feed");
var->GetMutable<framework::FeedList>();
var = scope->Var("fetch");
......@@ -667,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() {
std::vector<std::string> names = GetInputNames();
for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "input %s does not exist.", name);
PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(
"Input %s does not exist.", name));
input_shapes[name] = var->GetShape();
}
return input_shapes;
......@@ -683,7 +701,11 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"The variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = true;
......@@ -700,7 +722,11 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
PADDLE_ENFORCE_NOT_NULL(
executor_->scope()->FindVar(name),
platform::errors::PreconditionNotMet(
"he variable named %s is not found in the scope of the exector.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = false;
......@@ -761,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() {
std::string pb_content;
// Read binary
std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin.is_open()), "Cannot open file %s",
filename);
PADDLE_ENFORCE_EQ(
static_cast<bool>(fin.is_open()), true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
filename));
fin.seekg(0, std::ios::end);
pb_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
......@@ -779,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() {
bool AnalysisPredictor::LoadParameters() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
"The inference program should be loaded first.");
platform::errors::PreconditionNotMet(
"The inference program should be loaded first."));
const auto &global_block = inference_program_->MutableBlock(0);
......@@ -855,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() {
#if PADDLE_WITH_TENSORRT
bool AnalysisPredictor::SaveTrtCalibToDisk() {
PADDLE_ENFORCE(config_.tensorrt_engine_enabled(),
"This func can be invoked only in trt mode");
PADDLE_ENFORCE_EQ(config_.tensorrt_engine_enabled(), true,
platform::errors::PreconditionNotMet(
"This func can be invoked only in trt mode"));
auto &block = inference_program_->Block(0);
for (auto &op_desc : block.AllOps()) {
if (op_desc->Type() == "tensorrt_engine") {
......
......@@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
if (other.length() && other.data())
memcpy(data_, other.data(), other.length());
else if (other.length())
PADDLE_THROW(
PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid argument, null pointer data with length %u is passed",
other.length());
other.length()));
length_ = other.length();
memory_owned_ = true;
......@@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) {
length_ = length;
memory_owned_ = true;
} else {
PADDLE_THROW("The memory is allocated externally, can not Resized");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"The memory is allocated externally, can not Resized"));
}
}
......@@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) {
void PaddleBuf::Free() {
if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0UL);
PADDLE_ENFORCE_GT(
length_, 0UL,
platform::errors::PreconditionNotMet(
"The memory used in PaddleBuf %d should be greater than 0",
length_));
delete[] static_cast<char *>(data_);
data_ = nullptr;
length_ = 0;
......
......@@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init(
if (parent_scope) {
scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope());
PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr."));
} else {
paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope());
......@@ -182,7 +184,10 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(cls.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init";
return nullptr;
......@@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false;
}
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
PADDLE_ENFORCE_NOT_NULL(input_ptr,
platform::errors::InvalidArgument(
"The input_ptr should not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(
inputs[i].data.data(),
platform::errors::InvalidArgument(
"The data of input tensor should not be null."));
if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
......@@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
platform::CPUPlace(), inputs[i].data.data(),
inputs[i].data.length(), dev_ctx->stream());
#else
PADDLE_THROW("Not compile with CUDA, should not reach here.");
PADDLE_THROW(platform::errors::Unavailable(
"Not compile with CUDA, should not reach here."));
#endif
}
......@@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
PADDLE_ENFORCE_EQ(
static_cast<size_t>(idx), i,
platform::errors::InvalidArgument(
"Fetch op's col attr(%d) should be equal to the index(%d)", idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
......@@ -318,10 +333,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memory
PADDLE_ENFORCE_GE(
config.fraction_of_gpu_memory, 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
platform::errors::InvalidArgument(
"fraction_of_gpu_memory in the config should be set "
"to range (0., 1.]"));
PADDLE_ENFORCE_GE(config.device, 0,
platform::errors::PreconditionNotMet(
"Invalid device id %d, the device id should be "
"greater than or equal to 0.",
config.device));
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
......@@ -336,7 +356,9 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get()));
dynamic_cast<NativePaddlePredictor *>(predictor.get()),
platform::errors::PreconditionNotMet(
"Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr;
}
......
......@@ -112,16 +112,19 @@ static T convert(const std::string &item,
std::string message =
"invalid_argument exception when try to convert : " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid_argument exception when try to convert %s.", item));
} catch (std::out_of_range &e) {
std::string message =
"out_of_range exception when try to convert : " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"out_of_range exception when try to convert %s.", item));
} catch (...) {
std::string message = "unexpected exception when try to convert " + item;
LOG(ERROR) << message;
PADDLE_THROW(message);
PADDLE_THROW(platform::errors::InvalidArgument(
"unexpected exception when try to convert %s.", item));
}
return res;
}
......@@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) {
PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size.");
PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument(
"Non-positive batch size."));
double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======";
......
......@@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
PADDLE_ENFORCE(var->IsType<LoDTensor>(),
"Only support lod tensor now.");
PADDLE_ENFORCE_NOT_NULL(var,
platform::errors::PreconditionNotMet(
"%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ(var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"Only support lod tensor now."));
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
// force unsigned type if already know it
......@@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
} else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
PADDLE_ENFORCE_NE(
scales_.find(input_var_name), scales_.end(),
platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
"output scales to infer if output is unsigned."));
if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name];
}
......@@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
is_unsigned = true;
double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(
scales_.find(input_var_name) != scales_.end(),
PADDLE_ENFORCE_NE(
scales_.find(input_var_name), scales_.end(),
platform::errors::PreconditionNotMet(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
"output scales to infer if output is unsigned."));
is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min(
min_scale,
......@@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE(
var_tensor.numel() > 0,
PADDLE_ENFORCE_GT(
var_tensor.numel(), 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op "
"%s of connection %s should not be empty.",
var_name, op_type_name, conn_name);
var_name, op_type_name, conn_name));
switch (rule) {
case ScaleAlgo::MAX:
......@@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
float min_val = eigen_tensor.minCoeff();
bool is_positive = min_val >= 0.0f;
if (is_unsigned)
PADDLE_ENFORCE(
is_positive,
PADDLE_ENFORCE_EQ(
is_positive, true,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
int num_quantized_bins = 255;
......@@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
float max_abs = eigen_tensor.abs().maxCoeff();
float min_val = eigen_tensor.minCoeff();
if (is_unsigned)
PADDLE_ENFORCE(
min_val >= 0.0f,
PADDLE_ENFORCE_GE(
min_val, 0.0f,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs;
......@@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
std::pair<bool, LoDTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const {
PADDLE_ENFORCE(var_tensor.dims().size() > 0, "Tensor dimension is empty.");
PADDLE_ENFORCE_GT(
var_tensor.dims().size(), 0,
platform::errors::InvalidArgument("Tensor dimension is empty."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1};
float min_val = eigen_tensor.minCoeff();
if (is_unsigned)
PADDLE_ENFORCE(
min_val >= 0.0f,
PADDLE_ENFORCE_GE(
min_val, 0.0f,
platform::errors::InvalidArgument(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
min_val));
auto dims = var_tensor.dims();
constexpr int num_col_dims = 1;
......@@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
const framework::LoDTensor& var_tensor, float min_val, float max_val,
size_t num_bins) const {
PADDLE_ENFORCE_GT(num_bins, 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, num_bins (" +
std::to_string(num_bins) + ") must be positive.");
PADDLE_ENFORCE_GT(
var_tensor.numel(), 0,
"MkldnnQuantizer: To calculate Histogram, the tensor must not be empty.");
PADDLE_ENFORCE(max_val >= min_val,
std::to_string(num_bins) + ") must be positive."));
PADDLE_ENFORCE_GT(var_tensor.numel(), 0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, the tensor "
"must not be empty."));
PADDLE_ENFORCE_GE(max_val, min_val,
platform::errors::InvalidArgument(
"MkldnnQuantizer: To calculate Histogram, max_val (" +
std::to_string(max_val) +
") must be greater or equal"
std::to_string(max_val) + ") must be greater or equal"
"to min_val (" +
std::to_string(min_val) + ").");
std::to_string(min_val) + ")."));
ConstEigenVectorArrayMap eigen_tensor{var_tensor.data<float>(),
var_tensor.numel(), 1};
auto bin_width = std::abs(max_val - min_val) / num_bins;
......@@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
auto graph = std::unique_ptr<Graph>(new Graph(arg.main_program()));
arg.SetMainGraph(graph.release());
auto* scope_ptr = arg.scope_ptr();
PADDLE_ENFORCE(scope_ptr);
PADDLE_ENFORCE_NOT_NULL(scope_ptr, platform::errors::PreconditionNotMet(
"The scope should not be nullptr."));
arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
auto* builder = predictor_.config_.pass_builder();
......@@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const {
PrepareArgument();
auto& arg = predictor_.argument_;
Analyzer().Run(&arg);
PADDLE_ENFORCE(arg.scope_valid());
PADDLE_ENFORCE_EQ(
arg.scope_valid(), true,
platform::errors::PreconditionNotMet("The scope should be valid."));
VLOG(5) << "to prepare executor";
ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program);
predictor_.inference_program_.reset(
......@@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
VLOG(3) << "Predictor: run a quantization warmup iteration";
auto warmup_data = qconfig_->warmup_data();
PADDLE_ENFORCE_NOT_NULL(warmup_data,
"Warmup data cannot be NULL in the config.");
platform::errors::PreconditionNotMet(
"Warmup data cannot be NULL in the config."));
PrettyLogH1("--- Running warmup iteration for quantization");
// Run the inference program
......@@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const {
float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
std::vector<int> reference_distr_P, int P_sum,
std::vector<int> candidate_distr_Q, int Q_sum) const {
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size());
PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size(),
platform::errors::InvalidArgument(
"The P size %d should be equal to Q size %d",
reference_distr_P.size(), candidate_distr_Q.size()));
float tmp_sum1 = 0;
float tmp_sum2 = 0;
for (size_t idx = 0; idx < reference_distr_P.size(); idx++) {
......@@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy(
tmp_sum1 += 0;
tmp_sum2 += 0;
} else {
PADDLE_ENFORCE(q_idx != 0, "MkldnnQuantizer: Fatal error!, idx = " +
std::to_string(idx) +
" qindex = 0! p_idx = " +
std::to_string(p_idx));
PADDLE_ENFORCE_NE(
q_idx, 0,
platform::errors::PreconditionNotMet(
"MkldnnQuantizer: Fatal error!, idx = " + std::to_string(idx) +
" qindex = 0! p_idx = " + std::to_string(p_idx)));
}
tmp_sum1 += p_idx * (log(Q_sum * p_idx));
tmp_sum2 += p_idx * (log(P_sum * q_idx));
......
......@@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_bfloat16_) {
passes_.push_back("cpu_bfloat16_placement_pass");
passes_.push_back("cpu_bfloat16_pass");
}
use_mkldnn_bfloat16_ = true;
#else
use_mkldnn_bfloat16_ = false;
......
......@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter {
itensors.push_back(engine_->GetITensor(input_name));
}
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis > 0,
"The axis attr of Concat op should be large than 0 for trt");
PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument(
"The axis attr of Concat"
" op should be larger than 0 for trt. "
"But received %d.",
axis));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(),
itensors.size());
......
......@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias);
PADDLE_ENFORCE(layer != nullptr);
PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
layer->setStride(nv_strides);
layer->setPadding(nv_paddings);
layer->setNbGroups(groups);
......
......@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(Y_v);
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr;
weight_data =
......@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
......
......@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter {
// NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override {
PADDLE_ENFORCE(out != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
PADDLE_ENFORCE_NOT_NULL(out,
platform::errors::InvalidArgument(
"The input param 'out' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = in.place();
size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size);
PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor in's memory_size shoule be less than or equal to "
"the input max_size. But in's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyHostToDevice, *stream_));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
out, in.data<float>(), size, cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_));
PADDLE_ENFORCE_EQ(
0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else {
PADDLE_THROW("Unknown device for converter");
PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
}
cudaStreamSynchronize(*stream_);
}
// NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override {
PADDLE_ENFORCE(in != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
PADDLE_ENFORCE_NOT_NULL(in,
platform::errors::InvalidArgument(
"The input param 'in' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = out->place();
size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size);
PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor out's memory_size shoule be less than or equal "
"to the input max_size. "
"But out's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_));
cudaMemcpyDeviceToHost, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToHost) error."));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_));
PADDLE_ENFORCE_EQ(
0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else {
PADDLE_THROW("Unknown device for converter");
PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
}
cudaStreamSynchronize(*stream_);
}
......
......@@ -44,10 +44,14 @@ class EngineIOConverter {
static void ConvertInput(const std::string& op_type, const LoDTensor& in,
void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in is not supported yet.", op_type.c_str()));
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
......@@ -55,10 +59,14 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size,
cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in not supported yet.", op_type.c_str()));
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
......
......@@ -53,7 +53,12 @@ class OpConverter {
OpConverter* it{nullptr};
if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
it = Registry<OpConverter>::Global().Lookup("fc");
......@@ -66,38 +71,51 @@ class OpConverter {
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"};
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
int op_type_len = op_desc.Type().size();
std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len);
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type);
PADDLE_ENFORCE_GT(
add_weight_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented(
"no OpConverter for optype [%s]", op_desc.Type()));
} else {
PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0,
"Unsupported elementwise type" + op_type);
PADDLE_ENFORCE_GT(
add_tensor_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor");
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]",
op_desc.Type());
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine);
(*it)(op, scope, test_mode);
......@@ -149,9 +167,13 @@ class OpConverter {
for (auto& input : inputs) {
if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE(var, "no variable called %s", input);
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
"TensorRT engine only takes LoDTensor as input");
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("no variable called %s in block.",
input.c_str()));
PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input"));
auto var_shape = var->GetShape();
if (engine->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
......
......@@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter {
nvinfer1::Dims input_shape = input->getDimensions();
int nbDims = input_shape.nbDims;
int pad_size = static_cast<int>(paddings.size());
PADDLE_ENFORCE_GE(nbDims, 2);
PADDLE_ENFORCE_EQ((nbDims + 1) * 2, pad_size);
PADDLE_ENFORCE(pad_value == 0.0, "The pad layer of TRT only support zero.");
PADDLE_ENFORCE_GE(
nbDims, 2,
platform::errors::InvalidArgument(
"Input X[0]'s dimension should greater than or equal to 2. "
"But received %d.",
nbDims));
PADDLE_ENFORCE_EQ(
(nbDims + 1) * 2, pad_size,
platform::errors::InvalidArgument("Input X[0]'s dimension(nbDims for "
"short) should meet the condition:"
"(nbDims + 1) * 2 == pad_size. But "
"received nbDims:%d, pad_size:%d.",
nbDims, pad_size));
PADDLE_ENFORCE_EQ(pad_value, 0.0,
platform::errors::InvalidArgument(
"The pad layer of TRT only support zero."));
nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]);
nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]);
......@@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter {
*const_cast<nvinfer1::ITensor*>(input),
pre_pad, post_pad);
PADDLE_ENFORCE(layer != nullptr);
PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::External(
"add padding layer to tensorrt engine error"));
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode);
}
......
......@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
// This OP is implemented by trt dynamic shpae plugin.
// Dynamic shape plugin requires TRT version greater than 6.0.
#if IS_TRT_VERSION_GE(6000)
// This OP is implemented by trt dynamic shpae plugin.
// Dynamic shape plugin requires TRT version greater than 6.0.
VLOG(4) << "convert slice op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......@@ -38,27 +37,65 @@ class SliceOpConverter : public OpConverter {
std::vector<int> ends =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("ends"));
PADDLE_ENFORCE_EQ(
starts.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of starts must be equal to the size of axes."));
PADDLE_ENFORCE_EQ(
ends.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of ends must be equal to the size of axes."));
auto input_dims = input->getDimensions();
if (!engine_->with_dynamic_shape()) {
// notice that input shape is [CHW] without batch axis when input has
// static shape
for (size_t i = input_dims.nbDims; i > 0; i--) {
input_dims.d[i] = input_dims.d[i - 1];
}
input_dims.d[0] = 1; // fake batchsize, not useful here
for (size_t i = 0; i < axes.size(); i++) {
// split on batch is not supported in TensorRT
PADDLE_ENFORCE_NE(axes[i], 0, platform::errors::InvalidArgument(
"Invalid slice axis. Slice on batch "
"axis is not supported in TensorRT"));
if (starts[i] < 0) {
starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0);
}
if (ends[i] < 0) {
ends[i] = std::max(ends[i] + input_dims.d[axes[i]], 0);
}
ends[i] = std::min(ends[i], input_dims.d[axes[i]]);
PADDLE_ENFORCE_GT(
ends[i], starts[i],
platform::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received ends = %d, starts = %d.",
ends[i], starts[i]));
}
}
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePluginDynamic* plugin =
new plugin::SlicePluginDynamic(starts, ends, ends, ban_fp16);
new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16);
layer = engine_->AddPluginV2(&input, 1, plugin);
} else {
PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static"
"shape mode, which is not supported for the time being.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape mode."));
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "skip_layernorm", {output_name}, test_mode);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePlugin* plugin =
new plugin::SlicePlugin(starts, ends, axes, ban_fp16);
layer = engine_->AddPlugin(&input, 1, plugin);
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode);
}
};
......
......@@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
int input_num = op_desc.Input("X").size();
PADDLE_ENFORCE(input_num == 1);
PADDLE_ENFORCE_EQ(input_num, 1,
platform::errors::InvalidArgument(
"The input X's size must equal to 1 in TRT swish op."
" But received X's size %d.",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output
size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE(output_num == 1);
PADDLE_ENFORCE_EQ(
output_num, 1UL,
platform::errors::InvalidArgument(
"The ouput Out's size must equal to 1 in TRT swish op. "
"But received Out's size %u.",
output_num));
// Get attrs
float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta"));
......
......@@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
const platform::DeviceContext& ctx) {
auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0);
PADDLE_ENFORCE_GT(
num_elements, 0UL,
platform::errors::PermissionDenied("RandomizeTensor only can be used for "
"tensor which dims is not zero."));
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
......@@ -79,7 +82,8 @@ class TRTConvertValidation {
scope_(scope),
if_add_batch_(if_add_batch),
max_batch_size_(max_batch_size) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0,
platform::errors::External("cudaStreamCreate error."));
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size));
engine_->InitNetwork();
}
......@@ -154,7 +158,12 @@ class TRTConvertValidation {
void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op
PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
PADDLE_ENFORCE_LE(batch_size, max_batch_size_,
platform::errors::InvalidArgument(
"Runtime batch_size should be less than or equal to "
"max_batch_size_. "
"But received batch_size:%d, max_batch_size_:%d",
batch_size, max_batch_size_));
platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_);
cudaStreamSynchronize(stream_);
......
......@@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("fused_embedding_eltwise_layernorm");
teller_set.insert("multihead_matmul");
teller_set.insert("skip_layernorm");
teller_set.insert("slice");
#endif
}
......
......@@ -26,8 +26,10 @@ namespace inference {
namespace tensorrt {
namespace plugin {
// Dynamic Plugin below.
#if IS_TRT_VERSION_GE(6000)
SlicePlugin *CreateSlicePluginDeserialize(const void *buffer, size_t length) {
return new SlicePlugin(buffer, length);
}
REGISTER_TRT_PLUGIN("slice_plugin", CreateSlicePluginDeserialize);
template <typename T>
__global__ void SliceKernel(int num, int dims, const T *input,
......@@ -56,11 +58,196 @@ __global__ void SliceKernel(int num, int dims, const T *input,
}
}
SlicePlugin::SlicePlugin(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePlugin::SlicePlugin(void const *serial_data, size_t serial_length) {
deserializeBase(serial_data, serial_length);
DeserializeValue(&serial_data, &serial_length, &starts_);
DeserializeValue(&serial_data, &serial_length, &ends_);
DeserializeValue(&serial_data, &serial_length, &axes_);
DeserializeValue(&serial_data, &serial_length, &ban_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePlugin::~SlicePlugin() {
cudaStreamDestroy(copy_stream_);
cudaEventDestroy(copy_event_);
cudaFree(offset_temp_data_);
}
SlicePlugin *SlicePlugin::clone() const {
return new SlicePlugin(starts_, ends_, axes_, ban_fp16_);
}
bool SlicePlugin::supportsFormat(nvinfer1::DataType type,
nvinfer1::PluginFormat format) const {
#ifdef SUPPORTS_CUDA_FP16
return ((type == nvinfer1::DataType::kFLOAT ||
type == nvinfer1::DataType::kHALF) &&
(format == nvinfer1::PluginFormat::kNCHW));
#else
return ((type == nvinfer1::DataType::kFLOAT) &&
(format == nvinfer1::PluginFormat::kNCHW));
#endif
}
nvinfer1::Dims SlicePlugin::getOutputDimensions(int index,
const nvinfer1::Dims *inputs,
int nb_input_dims) {
auto in_dims = inputs[0];
nvinfer1::Dims out_dims = in_dims;
for (size_t i = 0; i < axes_.size(); i++) {
int start = starts_[i];
int end = ends_[i];
out_dims.d[axes_[i] - 1] = end - start;
}
return out_dims;
}
int SlicePlugin::enqueue(int batch_size, const void *const *inputs,
void **outputs, void *workspace, cudaStream_t stream) {
auto input_dims = getInputDims(0);
// notice input dims is [C, H, W], add input batch dim here
auto out_dims = getOutputDimensions(0, &input_dims, 1);
input_dims.nbDims += 1;
out_dims.nbDims += 1;
for (auto i = input_dims.nbDims; i > 0; --i) {
input_dims.d[i] = input_dims.d[i - 1];
out_dims.d[i] = out_dims.d[i - 1];
}
input_dims.d[0] = batch_size;
out_dims.d[0] = batch_size;
auto num_dims = input_dims.nbDims;
size_t out_num = ProductDim(out_dims);
std::vector<int> seg_offsets;
std::vector<int> offsets;
std::vector<int> extends;
offsets.resize(num_dims);
extends.resize(num_dims);
seg_offsets.resize(num_dims);
seg_offsets[num_dims - 1] = 1;
for (int i = num_dims - 2; i >= 0; i--) {
seg_offsets[i] = input_dims.d[i + 1] * seg_offsets[i + 1];
}
for (size_t i = 0; i < num_dims; ++i) {
offsets[i] = 0;
extends[i] = out_dims.d[i];
}
for (size_t i = 0; i < axes_.size(); ++i) {
offsets[axes_[i]] = starts_[i];
}
std::vector<int> offset_info;
for (size_t i = 0; i < num_dims; ++i) {
offset_info.push_back(offsets[i]);
offset_info.push_back(extends[i]);
offset_info.push_back(seg_offsets[i]);
}
if (offset_temp_data_ == nullptr) {
cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int));
}
cudaMemcpyAsync(offset_temp_data_, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice,
copy_stream_);
cudaEventRecord(copy_event_, copy_stream_);
cudaStreamWaitEvent(stream, copy_event_, 0);
int threads = 256;
int blocks = (out_num + threads - 1) / threads;
auto input_type = getDataType();
if (input_type == nvinfer1::DataType::kFLOAT) {
const float *input1 = static_cast<const float *>(inputs[0]);
float *output = static_cast<float *>(outputs[0]);
SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data_, output);
} else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16
const half *input1 = static_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]);
SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data_, output);
#else
PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600."));
#endif
} else {
PADDLE_THROW(platform::errors::Fatal(
"The Slice TRT Plugin's input type should be float or half."));
}
return cudaGetLastError() != cudaSuccess;
}
size_t SlicePlugin::getSerializationSize() {
return getBaseSerializationSize() + SerializedSize(getPluginType()) +
SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_) + SerializedSize(ban_fp16_);
}
void SlicePlugin::serialize(void *buffer) {
SerializeValue(&buffer, getPluginType());
serializeBase(buffer);
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, ban_fp16_);
}
// Dynamic Plugin below.
#if IS_TRT_VERSION_GE(6000)
SlicePluginDynamic::SlicePluginDynamic(std::vector<int> starts,
std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
SlicePluginDynamic::SlicePluginDynamic(void const *serialData,
size_t serialLength) {
DeserializeValue(&serialData, &serialLength, &starts_);
DeserializeValue(&serialData, &serialLength, &ends_);
DeserializeValue(&serialData, &serialLength, &axes_);
DeserializeValue(&serialData, &serialLength, &ban_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
void SlicePluginDynamic::destroy() {
cudaStreamDestroy(copy_stream_);
cudaEventDestroy(copy_event_);
cudaFree(offset_temp_data_);
delete this;
}
int SlicePluginDynamic::initialize() { return 0; }
size_t SlicePluginDynamic::getSerializationSize() const { return 0; }
size_t SlicePluginDynamic::getSerializationSize() const {
size_t size = SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_) + SerializedSize(ban_fp16_);
void SlicePluginDynamic::serialize(void *buffer) const {}
return size;
}
void SlicePluginDynamic::serialize(void *buffer) const {
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, ban_fp16_);
}
nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
......@@ -136,9 +323,9 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
std::vector<int> offsets;
std::vector<int> extends;
offsets.reserve(num_dims);
extends.reserve(num_dims);
seg_offsets.reserve(num_dims);
offsets.resize(num_dims);
extends.resize(num_dims);
seg_offsets.resize(num_dims);
seg_offsets[num_dims - 1] = 1;
for (int i = num_dims - 2; i >= 0; i--) {
......@@ -160,16 +347,16 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
offset_info.push_back(seg_offsets[i]);
}
framework::Tensor offset_temp_tensor;
if (offset_temp_data_ == nullptr) {
cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int));
}
int device_id;
cudaGetDevice(&device_id);
offset_temp_tensor.Resize({3 * num_dims});
auto *offset_temp_data =
offset_temp_tensor.mutable_data<int>(platform::CUDAPlace(device_id));
cudaMemcpyAsync(offset_temp_data_, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice,
copy_stream_);
cudaMemcpyAsync(offset_temp_data, offset_info.data(),
sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice, stream);
cudaEventRecord(copy_event_, copy_stream_);
cudaStreamWaitEvent(stream, copy_event_, 0);
int threads = 256;
int blocks = (out_num + threads - 1) / threads;
......@@ -178,13 +365,13 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
const float *input1 = static_cast<const float *>(inputs[0]);
float *output = static_cast<float *>(outputs[0]);
SliceKernel<float><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data, output);
out_num, num_dims, input1, offset_temp_data_, output);
} else if (input_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16
const half *input1 = static_cast<const half *>(inputs[0]);
half *output = static_cast<half *>(outputs[0]);
SliceKernel<half><<<blocks, threads, 3 * num_dims * sizeof(int), stream>>>(
out_num, num_dims, input1, offset_temp_data, output);
out_num, num_dims, input1, offset_temp_data_, output);
#else
PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600."));
......
......@@ -26,17 +26,56 @@ namespace inference {
namespace tensorrt {
namespace plugin {
class SlicePlugin : public PluginTensorRT {
public:
explicit SlicePlugin(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16);
// It was used for tensorrt deserialization.
// It should not be called by users.
SlicePlugin(void const* serial_data, size_t serial_length);
~SlicePlugin();
SlicePlugin* clone() const override;
const char* getPluginType() const override { return "slice_plugin"; }
int getNbOutputs() const override { return 1; }
int initialize() override { return 0; }
bool supportsFormat(nvinfer1::DataType type,
nvinfer1::PluginFormat format) const override;
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nb_input_dims) override;
int enqueue(int batch_size, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream) override;
protected:
size_t getSerializationSize() override;
// TRT will call this func to serialize the configuration of TRT
// It should not be called by users.
void serialize(void* buffer) override;
private:
std::vector<int> starts_;
std::vector<int> ends_;
std::vector<int> axes_;
bool ban_fp16_{false};
int* offset_temp_data_{nullptr};
cudaEvent_t copy_event_;
cudaStream_t copy_stream_;
};
#if IS_TRT_VERSION_GE(6000)
class SlicePluginDynamic : public DynamicPluginTensorRT {
public:
explicit SlicePluginDynamic(std::vector<int> starts, std::vector<int> ends,
std::vector<int> axes, bool ban_fp16)
: starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {}
SlicePluginDynamic(void const* serialData, size_t serialLength) {}
std::vector<int> axes, bool ban_fp16);
nvinfer1::IPluginV2DynamicExt* clone() const override {
return new SlicePluginDynamic(starts_, ends_, axes_, ban_fp16_);
}
SlicePluginDynamic(void const* serialData, size_t serialLength);
const char* getPluginType() const override { return "slice_plugin"; }
int getNbOutputs() const override { return 1; }
int initialize() override;
......@@ -72,15 +111,54 @@ class SlicePluginDynamic : public DynamicPluginTensorRT {
const nvinfer1::DataType* inputTypes,
int nbInputs) const override;
void destroy() override { delete this; }
void destroy() override;
private:
std::vector<int> starts_;
std::vector<int> ends_;
std::vector<int> axes_;
bool ban_fp16_{false};
int* offset_temp_data_{nullptr};
cudaEvent_t copy_event_;
cudaStream_t copy_stream_;
};
class SlicePluginV2Creator : public nvinfer1::IPluginCreator {
public:
SlicePluginV2Creator() {}
const char* getPluginName() const override { return "slice_plugin"; }
const char* getPluginVersion() const override { return "1"; }
const nvinfer1::PluginFieldCollection* getFieldNames() override {
return &field_collection_;
}
nvinfer1::IPluginV2* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override {
return nullptr;
}
nvinfer1::IPluginV2* deserializePlugin(const char* name,
const void* serialData,
size_t serialLength) override {
auto plugin = new SlicePluginDynamic(serialData, serialLength);
return plugin;
}
void setPluginNamespace(const char* libNamespace) override {
namespace_ = libNamespace;
}
const char* getPluginNamespace() const override { return namespace_.c_str(); }
private:
std::string namespace_;
nvinfer1::PluginFieldCollection field_collection_;
};
REGISTER_TRT_PLUGIN_V2(SlicePluginV2Creator);
#endif
} // namespace plugin
......
......@@ -480,10 +480,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz")
endif()
# disable test_trt_dynamic_shape_ernie_ser_deser temporary
#inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc
# EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
# ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized)
inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized)
endif()
......
......@@ -245,8 +245,14 @@ TEST(Analyzer_bert, transfer_scope_cache) {
// Since paddle::framework::global_transfer_scope_cache() and
// paddle::framework::global_transfer_data_cache() are thread_local,
// their pointer should be different among different thread id.
PADDLE_ENFORCE(global_transfer_scope_cache.size(), threads_num);
PADDLE_ENFORCE(global_transfer_data_cache.size(), threads_num);
PADDLE_ENFORCE_EQ(
global_transfer_scope_cache.size(), threads_num,
paddle::platform::errors::Fatal(
"The size of scope cache is not equal to thread number."));
PADDLE_ENFORCE_EQ(
global_transfer_data_cache.size(), threads_num,
paddle::platform::errors::Fatal(
"The size of data cache is not equal to thread number."));
}
} // namespace inference
......
......@@ -69,11 +69,13 @@ void PD_run() {
PD_DeletePaddleTensor(input);
int size;
const int* out_shape = PD_GetPaddleTensorShape(out_data, &size);
CHECK(size == 2) << "The Output shape's size is NOT match.";
PADDLE_ENFORCE_EQ(size, 2, paddle::platform::errors::InvalidArgument(
"The Output shape's size is NOT match."));
std::vector<int> ref_outshape_size({9, 6});
for (int i = 0; i < 2; ++i) {
CHECK(out_shape[i] == ref_outshape_size[i])
<< "The Output's shape is NOT match.";
PADDLE_ENFORCE_EQ(out_shape[i], ref_outshape_size[i],
paddle::platform::errors::InvalidArgument(
"The Output shape's size is NOT match."));
}
PD_DeletePaddleBuf(buf);
}
......
......@@ -36,9 +36,9 @@ void zero_copy_run() {
PD_SwitchIrDebug(config, true);
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
CHECK(!use_feed_fetch) << "NO";
EXPECT_FALSE(use_feed_fetch);
bool specify_input_names = PD_SpecifyInputName(config);
CHECK(specify_input_names) << "NO";
EXPECT_TRUE(specify_input_names);
const int batch_size = 1;
const int channels = 3;
......@@ -85,13 +85,13 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
PD_SwitchIrDebug(config, true);
PD_EnableMKLDNN(config);
bool mkldnn_enable = PD_MkldnnEnabled(config);
CHECK(mkldnn_enable) << "NO";
EXPECT_TRUE(mkldnn_enable);
PD_EnableMkldnnQuantizer(config);
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO";
EXPECT_TRUE(quantizer_enable);
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(bfloat16_enable) << "NO";
EXPECT_TRUE(bfloat16_enable);
PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_DeleteAnalysisConfig(config);
......
......@@ -126,7 +126,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
std::string turn_mask_pre = "turn_mask_";
auto one_batch = data->NextBatch();
PADDLE_ENFORCE(!one_batch.response.empty());
PADDLE_ENFORCE(
!one_batch.response.empty(),
paddle::platform::errors::Fatal("The response of one batch is empty."));
int size = one_batch.response[0].size();
CHECK_EQ(size, kMaxTurnLen);
// turn tensor assignment
......@@ -214,11 +216,17 @@ void profile(bool use_mkldnn = false) {
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of outputs should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
PADDLE_ENFORCE_GT(output.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3);
......
......@@ -146,8 +146,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
auto iterations = test_data.size();
PADDLE_ENFORCE_LE(
static_cast<size_t>(num_images), iterations * test_data_batch_size,
paddle::platform::errors::Fatal(
"The requested quantization warmup data size " +
std::to_string(num_images) + " is bigger than all test data size.");
std::to_string(num_images) + " is bigger than all test data size."));
PaddleTensor images;
images.name = "image";
......@@ -237,8 +238,9 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
}
PADDLE_ENFORCE_EQ(
static_cast<size_t>(num_objects), static_cast<size_t>(objects_accum),
"The requested num of objects " + std::to_string(num_objects) +
" is the same as objects_accum.");
paddle::platform::errors::Fatal("The requested num of objects " +
std::to_string(num_objects) +
" is the same as objects_accum."));
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(4);
(*warmup_data)[0] = std::move(images);
......
......@@ -98,7 +98,9 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_tensor.name = "word";
input_tensor.dtype = PaddleDType::INT64;
TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod);
PADDLE_ENFORCE_EQ(batch_size, static_cast<int>(one_batch.lod.size() - 1));
PADDLE_ENFORCE_EQ(
batch_size, static_cast<int>(one_batch.lod.size() - 1),
paddle::platform::errors::Fatal("The lod size of one batch is invaild."));
input_slots->assign({input_tensor});
}
......@@ -137,12 +139,17 @@ TEST(Analyzer_LAC, profile) {
24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25,
44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14,
15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]);
size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
PADDLE_ENFORCE_GE(size, batch1_size);
PADDLE_ENFORCE_GE(size, batch1_size, paddle::platform::errors::Fatal(
"The size of batch is invaild."));
int64_t *pdata = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < batch1_size; ++i) {
EXPECT_EQ(pdata[i], lac_ref_data[i]);
......
......@@ -117,11 +117,17 @@ void profile(bool memory_load = false) {
// the first inference result
const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48, 39, 38, 16, 25};
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
int64_t *result = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < std::min<size_t>(11, size); i++) {
EXPECT_EQ(result[i], chinese_ner_result_data[i]);
......
......@@ -136,11 +136,17 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 1."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
......
......@@ -135,11 +135,17 @@ TEST(Analyzer_rnn2, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
PADDLE_ENFORCE_GT(output.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3);
......
......@@ -47,7 +47,8 @@ struct DataRecord {
num_lines++;
std::vector<std::string> data;
split(line, '\t', &data);
PADDLE_ENFORCE(data.size() >= 4);
PADDLE_ENFORCE_GT(data.size(), 4, paddle::platform::errors::Fatal(
"The size of data is invaild."));
// load title1 data
std::vector<int64_t> title1_data;
split_to_int64(data[0], ' ', &title1_data);
......@@ -120,11 +121,17 @@ TEST(Analyzer_seq_conv1, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
PADDLE_ENFORCE_EQ(output.size(), 1UL,
paddle::platform::errors::Fatal(
"The size of output should be equal to 0."));
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
PADDLE_ENFORCE_GT(size, 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
......
......@@ -56,20 +56,26 @@ struct DataRecord {
std::vector<float> slot_data;
split_to_float(data[1], ' ', &slot_data);
std::string name = data[0];
PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL,
"line %d, %s should be divisible", num_lines, name);
PADDLE_ENFORCE_EQ(
slot_data.size() % 11, 0UL,
paddle::platform::errors::Fatal("line %d, %s should be divisible",
num_lines, name));
datasets[name].emplace_back(std::move(slot_data));
}
num_samples = num_lines / num_slots;
PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),
"num samples should be divisible");
PADDLE_ENFORCE_GT(num_samples, 0UL);
PADDLE_ENFORCE_EQ(
num_samples * num_slots, static_cast<size_t>(num_lines),
paddle::platform::errors::Fatal("num samples should be divisible"));
PADDLE_ENFORCE_GT(num_samples, 0UL,
paddle::platform::errors::Fatal(
"The num of samples should be greater than 0."));
}
void Prepare(int bs) {
for (auto it = datasets.begin(); it != datasets.end(); ++it) {
PADDLE_ENFORCE_EQ(it->second.size(), num_samples,
"size of each slot should be equal");
PADDLE_ENFORCE_EQ(
it->second.size(), num_samples,
paddle::platform::errors::Fatal("size of each slot should be equal"));
}
size_t num_batches = num_samples / bs;
EXPECT_GT(num_batches, 0UL);
......@@ -90,8 +96,10 @@ struct DataRecord {
std::copy(datas[id].begin(), datas[id].end(),
std::back_inserter(slot.data[k]));
size_t len = datas[id].size() / 11;
PADDLE_ENFORCE_EQ(len * 11, datas[id].size(),
"%s %d size should be divisible", slot.name, id);
PADDLE_ENFORCE_EQ(
len * 11, datas[id].size(),
paddle::platform::errors::Fatal("%s %d size should be divisible",
slot.name, id));
lod[k + 1] = lod[k] + len;
}
slot.shape.assign({static_cast<int>(lod[bs]), 11});
......
......@@ -22,7 +22,9 @@ struct DataReader {
: file(new std::ifstream(path)) {}
bool NextBatch(std::vector<PaddleTensor> *input, int batch_size) {
PADDLE_ENFORCE_EQ(batch_size, 1);
PADDLE_ENFORCE_EQ(batch_size, 1,
paddle::platform::errors::Fatal(
"The size of batch should be equal to 1."));
std::string line;
PaddleTensor tensor;
tensor.dtype = PaddleDType::INT64;
......@@ -81,7 +83,9 @@ TEST(Analyzer_Text_Classification, profile) {
if (FLAGS_num_threads == 1) {
// Get output
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
LOG(INFO) << "get outputs " << outputs.back().size();
for (auto &output : outputs.back()) {
LOG(INFO) << "output.shape: " << to_string(output.shape);
......
......@@ -59,7 +59,9 @@ void SetConfig(AnalysisConfig *cfg) {
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
PADDLE_ENFORCE_EQ(
FLAGS_test_all_data, 0,
paddle::platform::errors::Fatal("Only have single batch of data."));
std::string line;
std::ifstream file(FLAGS_infer_data);
std::getline(file, line);
......@@ -99,7 +101,9 @@ void profile(bool use_mkldnn = false) {
auto refer = ProcessALine(line);
file.close();
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_GT(outputs.size(), 0,
paddle::platform::errors::Fatal(
"The size of output should be greater than 0."));
auto &output = outputs.back().front();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
CHECK_EQ(numel, refer.data.size());
......
......@@ -12,15 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <dirent.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <unistd.h>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
namespace paddle {
namespace inference {
int DeleteCache(std::string path) {
DIR* dir = opendir(path.c_str());
if (dir == NULL) return 0;
struct dirent* ptr;
while ((ptr = readdir(dir)) != NULL) {
if (std::strcmp(ptr->d_name, ".") == 0 ||
std::strcmp(ptr->d_name, "..") == 0) {
continue;
} else if (ptr->d_type == 8) {
std::string file_rm = path + "/" + ptr->d_name;
return remove(file_rm.c_str());
}
}
return 0;
}
void run(const AnalysisConfig& config, std::vector<float>* out_data) {
auto predictor = CreatePaddlePredictor(config);
auto input_names = predictor->GetInputNames();
......@@ -86,6 +104,11 @@ void run(const AnalysisConfig& config, std::vector<float>* out_data) {
void trt_ernie(bool with_fp16, std::vector<float> result) {
AnalysisConfig config;
std::string model_dir = FLAGS_infer_model;
// Delete serialization cache to perform serialization first rather than
// deserialization.
std::string opt_cache_dir = FLAGS_infer_model + "/_opt_cache";
DeleteCache(opt_cache_dir);
SetConfig(&config, model_dir, true /* use_gpu */);
config.SwitchUseFeedFetchOps(false);
......
......@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -162,7 +163,8 @@ void TestInference(const std::string& dirname,
// int device_id = place.GetDeviceId();
paddle::platform::SetDeviceId(0);
#else
PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
PADDLE_THROW(paddle::platform::errors::Unavailable(
"'CUDAPlace' is not supported in CPU only device."));
#endif
}
......
......@@ -16,6 +16,7 @@
#include <random>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
......@@ -41,12 +42,14 @@ TEST(BestFitAllocator, concurrent_cuda) {
LockedAllocator concurrent_allocator(
std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
auto th_main = [&](std::random_device::result_type seed) {
std::default_random_engine engine(seed);
std::uniform_int_distribution<size_t> dist(1U, 1024U);
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
std::array<size_t, 1024> buf;
for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine);
......
......@@ -110,10 +110,12 @@ struct VisitDataArgMinMaxFunctor {
CALL_ARG_MINMAX_FUNCTOR(6);
break;
default:
PADDLE_THROW(
PADDLE_ENFORCE_LE(
x_dims.size(), 6,
platform::errors::InvalidArgument(
"%s operator doesn't supports tensors whose ranks are greater "
"than 6.",
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"));
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")));
break;
#undef CALL_ARG_MINMAX_FUNCTOR
}
......@@ -164,7 +166,8 @@ class ArgMinMaxOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_LT(
axis, x_dims.size(),
platform::errors::InvalidArgument(
"'axis'(%d) must be less than Rank(X)(%d).", axis, x_dims.size()));
"'axis'(%d) must be less than Rank(X)(%d) of Input(X).", axis,
x_dims.size()));
const int& dtype = ctx->Attrs().Get<int>("dtype");
PADDLE_ENFORCE_EQ(
......@@ -192,10 +195,11 @@ class ArgMinMaxOp : public framework::OperatorWithKernel {
}
PADDLE_ENFORCE_LE(
all_element_num, INT_MAX,
platform::errors::InvalidArgument(
"The element num of the argmin/argmax input at axis is "
"%d, is larger than int32 maximum value:%d, you must "
"set the dtype of argmin/argmax to 'int64'.",
all_element_num, INT_MAX);
all_element_num, INT_MAX));
}
}
std::vector<int64_t> vec;
......
......@@ -52,7 +52,10 @@ class AssignFunctor {
template <typename T>
void operator()(const T &v) const {
PADDLE_THROW("Not support type for assign op %s", typeid(T).name());
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::PermissionDenied(
"Not support type for assign op with type %s", typeid(T).name()));
}
private:
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
namespace paddle {
namespace operators {
class ScopedRNNBase {
public:
ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size,
int num_layers, float dropout_prob, int seed, int weight_numel,
bool initialized, bool is_bidirec)
: seq_length_(seq_length),
batch_size_(batch_size),
input_size_(input_size),
hidden_size_(hidden_size),
num_layers_(num_layers),
dropout_prob_(dropout_prob),
seed_(seed),
weight_numel_(weight_numel),
initialized_(initialized),
is_bidirec_(is_bidirec) {}
template <typename T>
void Create(const cudnnHandle_t& handle, const platform::Place& place,
const std::vector<int>& sequence_length, size_t* workspace_size,
size_t* reserve_size, framework::Tensor* dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1;
cudnnDataType_t cudnn_type = platform::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1};
std::vector<int> dims_y = {batch_size_, hidden_size_ * numDirections, 1};
std::vector<int> strides_y = {hidden_size_ * numDirections, 1, 1};
for (int i = 0; i < seq_length_; ++i) {
x_descs_.emplace_back(x_desc_.descriptor<T>(dims_x, strides_x));
y_descs_.emplace_back(y_desc_.descriptor<T>(dims_y, strides_y));
}
if (!sequence_length.empty()) {
x_seq_desc_.descriptor<T>(seq_length_, batch_size_, input_size_, true,
sequence_length);
y_seq_desc_.descriptor<T>(seq_length_, batch_size_,
hidden_size_ * numDirections, true,
sequence_length);
}
// ------------------- cudnn hx, hy, cx, cy descriptors----------
std::vector<int> dims_hx = {num_layers_ * numDirections, batch_size_,
hidden_size_};
std::vector<int> strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1};
init_h_desc_.descriptor<T>(dims_hx, strides_hx);
init_c_desc_.descriptor<T>(dims_hx, strides_hx);
last_h_desc_.descriptor<T>(dims_hx, strides_hx);
last_c_desc_.descriptor<T>(dims_hx, strides_hx);
// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
if (!initialized_) {
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
}
dropout_desc_.descriptor(handle, place, initialized_, dropout_prob_,
dropout_state, seed_, state_size);
// ------------------- cudnn rnn descriptors ---------------------
#if CUDNN_VERSION >= 6000
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_.desc(), hidden_size_, num_layers_,
dropout_desc_.desc(), CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, cudnn_type));
#else
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(),
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
cudnn_type));
#endif
if (!sequence_length.empty()) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode(
rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED));
}
// ------------------- cudnn weights_size ---------------------
size_t weights_size_;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type));
PADDLE_ENFORCE_EQ(
weights_size_, sizeof(T) * weight_numel_,
platform::errors::InvalidArgument(
"The cudnn lstm and setting weight size should be same."));
// ------------------- cudnn weight descriptors ---------------------
platform::DataLayout layout = platform::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1};
weight_desc_.descriptor<T>(layout, dim_w);
// ------------------- cudnn workspace, reserve size ---------------------
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_.desc(), seq_length_, x_descs_.data(),
workspace_size));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_.desc(), seq_length_, x_descs_.data(),
reserve_size));
}
cudnnTensorDescriptor_t* x_descs() { return x_descs_.data(); }
cudnnTensorDescriptor_t* y_descs() { return y_descs_.data(); }
cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_.desc(); }
cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_.desc(); }
cudnnTensorDescriptor_t init_h_desc() { return init_h_desc_.desc(); }
cudnnTensorDescriptor_t init_c_desc() { return init_c_desc_.desc(); }
cudnnTensorDescriptor_t last_h_desc() { return last_h_desc_.desc(); }
cudnnTensorDescriptor_t last_c_desc() { return last_c_desc_.desc(); }
cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_.desc(); }
cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_.desc(); }
cudnnFilterDescriptor_t weight_desc() { return weight_desc_.desc(); }
private:
int seq_length_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
float dropout_prob_;
int seed_;
int weight_numel_;
bool initialized_;
bool is_bidirec_;
std::vector<cudnnTensorDescriptor_t> x_descs_;
std::vector<cudnnTensorDescriptor_t> y_descs_;
platform::ScopedTensorDescriptor x_desc_;
platform::ScopedTensorDescriptor y_desc_;
platform::ScopedRNNTensorDescriptor x_seq_desc_;
platform::ScopedRNNTensorDescriptor y_seq_desc_;
platform::ScopedTensorDescriptor init_h_desc_;
platform::ScopedTensorDescriptor init_c_desc_;
platform::ScopedTensorDescriptor last_h_desc_;
platform::ScopedTensorDescriptor last_c_desc_;
platform::ScopedDropoutDescriptor dropout_desc_;
platform::ScopedFilterDescriptor weight_desc_;
platform::ScopedRNNDescriptor rnn_desc_;
};
} // namespace operators
} // namespace paddle
......@@ -51,6 +51,16 @@ class CudnnLSTMOp : public framework::OperatorWithKernel {
"received InitH's rank is %d.",
init_h_dims.size()));
if (ctx->HasInput("SequenceLength")) {
auto seq_dims = ctx->GetInputDim("SequenceLength");
PADDLE_ENFORCE_EQ(
in_dims[1], seq_dims[0],
platform::errors::InvalidArgument(
"The size of SequenceLength has to equal the batch_size. But "
"received batch_size is %d and the size of SequenceLength is %d.",
in_dims[1], seq_dims[0]));
}
PADDLE_ENFORCE_EQ(
in_dims[1], init_h_dims[1],
platform::errors::InvalidArgument(
......@@ -113,6 +123,12 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) the learnable hidden-hidden weights."
" The shape is (N), where N is total weight size of the LSTM. "
" cudnn concatenate all the weight to one Tensor");
AddInput("SequenceLength",
"(Tensor) When the input data is padding, "
"set this parameter. This parameter represents "
"the variable sequence lengths in a batch. "
"The size of the vector has to equal the batch_size.")
.AsDispensable();
AddOutput("Reserve",
"(Tensor, a temporary output Tensor to store the reserve_data "
"of cudnn kernel.")
......@@ -155,13 +171,6 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(1);
AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false);
AddAttr<int>("seed", "seed to used if fix_seed is True").SetDefault(0);
AddAttr<std::vector<int>>("sequence_length",
"(vector<int>) When the input data is padding, "
"set this parameter. This parameter represents "
"the variable sequence"
"lengths in a batch. The size of the vector has "
"to equal the batch_size.")
.SetDefault({});
AddComment(R"DOC(
CUDNN LSTM implementation
......@@ -243,6 +252,9 @@ class CudnnLSTMGradOpMaker : public framework::SingleGradOpMaker<T> {
op->SetInput("InitH", this->Input("InitH"));
op->SetInput("InitC", this->Input("InitC"));
op->SetInput("W", this->Input("W"));
if (this->HasInput("SequenceLength")) {
op->SetInput("SequenceLength", this->Input("SequenceLength"));
}
op->SetInput("Reserve", this->Output("Reserve"));
op->SetInput("StateOut", this->Output("StateOut"));
op->SetInput("Out", this->Output("Out"));
......
......@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/cudnn_lstm_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/cudnn_desc.h"
#include "paddle/fluid/platform/cudnn_helper.h"
......@@ -24,6 +25,43 @@ namespace operators {
using LoDTensor = framework::LoDTensor;
using Tensor = framework::Tensor;
template <typename T>
void LSTMInferece(const bool &has_seq_length, const cudnnHandle_t &handle,
const int &seq_length, ScopedRNNBase *rnn, const T *x_data,
const T *init_h_data, const T *init_c_data, const T *w_data,
T *out_data, T *last_h_data, T *last_c_data,
framework::Tensor *workspace_data,
const size_t &workspace_size) {
if (!has_seq_length) {
// for inference
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference(
handle, rnn->rnn_desc(), seq_length, rnn->x_descs(), x_data,
rnn->init_h_desc(), init_h_data, rnn->init_c_desc(), init_c_data,
rnn->weight_desc(), w_data, rnn->y_descs(), out_data,
rnn->last_h_desc(), last_h_data, rnn->last_c_desc(), last_c_data,
workspace_data->data<uint8_t>(), workspace_size));
} else {
#if CUDNN_VERSION >= 7201
// for inference
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInferenceEx(
handle, rnn->rnn_desc(), rnn->x_seq_desc(), x_data, rnn->init_h_desc(),
init_h_data, rnn->init_c_desc(), init_c_data, rnn->weight_desc(),
w_data, rnn->y_seq_desc(), out_data, rnn->last_h_desc(), last_h_data,
rnn->last_c_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, workspace_data->data<uint8_t>(),
workspace_size));
#else
// CUDNN VERSION has to >=7.2.1
PADDLE_THROW(platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardInferenceEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
#endif
}
}
template <typename T>
class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
public:
......@@ -56,7 +94,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
int num_layers = ctx.Attr<int>("num_layers");
bool is_test = ctx.Attr<bool>("is_test");
int seed = ctx.Attr<int>("seed");
auto sequence_length = ctx.Attr<std::vector<int>>("sequence_length");
bool has_seq_length = ctx.HasInput("SequenceLength");
std::vector<int> SequenceLength;
if (has_seq_length) {
auto *sequence_length = ctx.Input<Tensor>("SequenceLength");
SequenceLength = operators::GetDataFromTensor<int>(sequence_length);
}
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto handle = dev_ctx.cudnn_handle();
......@@ -70,58 +114,32 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
size_t workspace_size;
size_t reserve_size;
platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel,
state_initialized, is_bidirec);
rnn.Create<T>(handle, ctx.GetPlace(), sequence_length, &workspace_size,
rnn.Create<T>(handle, ctx.GetPlace(), SequenceLength, &workspace_size,
&reserve_size, state_out);
framework::Tensor workspace_data_;
workspace_data_.Resize({static_cast<int64_t>(workspace_size)});
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace());
workspace_data_.mutable_data<uint8_t>(
{static_cast<int64_t>(workspace_size)}, ctx.GetPlace());
auto *reserve_data = reserve->mutable_data<uint8_t>(
{static_cast<int64_t>(reserve_size)}, ctx.GetPlace());
if (is_test) {
if (sequence_length.empty()) {
// for inference
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(),
last_h_data, rnn.cy_desc(), last_c_data,
workspace_data_.data<uint8_t>(), workspace_size));
LSTMInferece<T>(has_seq_length, handle, seq_length, &rnn, x_data,
init_h_data, init_c_data, w_data, out_data, last_h_data,
last_c_data, &workspace_data_, workspace_size);
} else {
#if CUDNN_VERSION >= 7201
// for inference
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnRNNForwardInferenceEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(),
init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data,
rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data,
rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr, platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardInferenceEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
#endif
}
} else {
if (sequence_length.empty()) {
if (!has_seq_length) {
// for train
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardTraining(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(),
last_h_data, rnn.cy_desc(), last_c_data,
handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), x_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.weight_desc(), w_data, rnn.y_descs(), out_data,
rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data,
workspace_data_.data<uint8_t>(), workspace_size, reserve_data,
reserve_size));
} else {
......@@ -130,16 +148,15 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnRNNForwardTrainingEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(),
init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data,
rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data,
rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size, reserve_data,
reserve_size));
handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.weight_desc(), w_data, rnn.y_seq_desc(), out_data,
rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, workspace_data_.data<uint8_t>(), workspace_size,
reserve_data, reserve_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr, platform::errors::Unavailable(
PADDLE_THROW(platform::errors::Unavailable(
"The padded input is supported by "
"cudnnRNNForwardTrainingEx, but it only works when "
"the version of cudnn is larger than 7.2.1"));
......@@ -203,7 +220,13 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
int hidden_size = ctx.Attr<int>("hidden_size");
int num_layers = ctx.Attr<int>("num_layers");
int seed = ctx.Attr<int>("seed");
auto sequence_length = ctx.Attr<std::vector<int>>("sequence_length");
bool has_seq_length = ctx.HasInput("SequenceLength");
std::vector<int> SequenceLength;
if (has_seq_length) {
auto *sequence_length = ctx.Input<Tensor>("SequenceLength");
SequenceLength = operators::GetDataFromTensor<int>(sequence_length);
}
int seq_length = input_dims[0];
int batch_size = input->dims()[1];
......@@ -213,33 +236,33 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
size_t workspace_size;
size_t reserve_size;
platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel,
true, is_bidirec);
ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size,
num_layers, dropout_prob, seed, weight_numel, true,
is_bidirec);
rnn.Create<T>(handle, ctx.GetPlace(), sequence_length, &workspace_size,
rnn.Create<T>(handle, ctx.GetPlace(), SequenceLength, &workspace_size,
&reserve_size, const_cast<Tensor *>(state_out));
framework::Tensor workspace_data_;
workspace_data_.Resize({static_cast<int64_t>(workspace_size)});
workspace_data_.mutable_data<uint8_t>(ctx.GetPlace());
workspace_data_.mutable_data<uint8_t>(
{static_cast<int64_t>(workspace_size)}, ctx.GetPlace());
const uint8_t *reserve_data = reserve->data<uint8_t>();
if (sequence_length.empty()) {
if (!has_seq_length) {
// This interface is used when the input/output is unpadded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardData(
handle, rnn.rnn_desc(), seq_length, rnn.y_desc(), out_data,
rnn.y_desc(), out_grad_data, rnn.hy_desc(), last_h_grad_data,
rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, rnn.x_desc(),
in_grad_data, rnn.hx_desc(), init_h_grad_data, rnn.cx_desc(),
init_c_grad_data, workspace_data_.data<uint8_t>(), workspace_size,
const_cast<uint8_t *>(reserve_data), reserve_size));
handle, rnn.rnn_desc(), seq_length, rnn.y_descs(), out_data,
rnn.y_descs(), out_grad_data, rnn.last_h_desc(), last_h_grad_data,
rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.x_descs(), in_grad_data, rnn.init_h_desc(), init_h_grad_data,
rnn.init_c_desc(), init_c_grad_data, workspace_data_.data<uint8_t>(),
workspace_size, const_cast<uint8_t *>(reserve_data), reserve_size));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeights(
handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), input->data<T>(),
rnn.hx_desc(), init_h->data<T>(), rnn.y_desc(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.w_desc(),
handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), input->data<T>(),
rnn.init_h_desc(), init_h->data<T>(), rnn.y_descs(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.weight_desc(),
weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data),
reserve_size));
} else {
......@@ -248,24 +271,22 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
// This interface is used when the input/output is padded.
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardDataEx(
handle, rnn.rnn_desc(), rnn.y_seq_desc(), out_data, rnn.y_seq_desc(),
out_grad_data, nullptr, nullptr, rnn.hy_desc(), last_h_grad_data,
rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data,
rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data,
rnn.x_seq_desc(), in_grad_data, rnn.hx_desc(), init_h_grad_data,
rnn.cx_desc(), init_c_grad_data, nullptr, nullptr,
out_grad_data, nullptr, nullptr, rnn.last_h_desc(), last_h_grad_data,
rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data,
rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data,
rnn.x_seq_desc(), in_grad_data, rnn.init_h_desc(), init_h_grad_data,
rnn.init_c_desc(), init_c_grad_data, nullptr, nullptr,
workspace_data_.data<uint8_t>(), workspace_size,
const_cast<uint8_t *>(reserve_data), reserve_size));
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeightsEx(
handle, rnn.rnn_desc(), rnn.x_seq_desc(), input->data<T>(),
rnn.hx_desc(), init_h->data<T>(), rnn.y_seq_desc(), out->data<T>(),
workspace_data_.data<uint8_t>(), workspace_size, rnn.w_desc(),
weight_grad->data<T>(), const_cast<uint8_t *>(reserve_data),
reserve_size));
rnn.init_h_desc(), init_h->data<T>(), rnn.y_seq_desc(),
out->data<T>(), workspace_data_.data<uint8_t>(), workspace_size,
rnn.weight_desc(), weight_grad->data<T>(),
const_cast<uint8_t *>(reserve_data), reserve_size));
#else
PADDLE_ENFORCE_NOT_NULL(
nullptr,
platform::errors::Unavailable(
PADDLE_THROW(platform::errors::Unavailable(
"The padded input of rnn is supported by cudnnRNNBackwardDataEx, "
"cudnnRNNBackwardWeightsEx, but it only works when the version "
"of cudnn is larger than 7.2.1"));
......
......@@ -76,7 +76,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
}
#else
PADDLE_THROW("PaddlePaddle should compile with GPU.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU."));
#endif
}
};
......
......@@ -58,7 +58,8 @@ template <typename T>
class BroadcastOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW("Broadcast op can run on gpu place only for now.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Broadcast op can run on gpu place only for now."));
}
};
......
......@@ -68,10 +68,11 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
<< " From " << root_dev_id << " to " << dev_id;
if (ctx.Attr<bool>("sync_mode")) {
PADDLE_ENFORCE(cudaStreamSynchronize(stream));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
}
#else
PADDLE_THROW("PaddlePaddle should compile with GPU.");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU."));
#endif
}
};
......
......@@ -33,9 +33,12 @@ namespace operators {
static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) {
if (copy_to_gpu) {
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE(cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice));
PADDLE_ENFORCE_CUDA_SUCCESS(
cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice));
#else
PADDLE_THROW("Not compiled with cuda");
PADDLE_THROW(
platform::errors::InvalidArgument("Check your paddle version, current "
"version is not compiled with cuda"));
#endif
} else {
std::memcpy(dst, src, n);
......@@ -88,11 +91,22 @@ bool TestMain(const platform::Place &place, const framework::DDim &dims,
framework::LoDTensor cpu_out;
auto &out_tensor = scope.FindVar(out_name)->Get<framework::LoDTensor>();
PADDLE_ENFORCE(scope.kids().empty());
PADDLE_ENFORCE_EQ(scope.kids().empty(), true,
platform::errors::InvalidArgument(
"The scope can not have the child scopes,"
"please check your code."));
if (inplace) {
PADDLE_ENFORCE_EQ(&out_tensor, x);
PADDLE_ENFORCE_EQ(
&out_tensor, x,
platform::errors::InvalidArgument(
"The output tensor should be same as input x in inplace mode,"
" but now is not same."));
} else {
PADDLE_ENFORCE_EQ(&out_tensor, z);
PADDLE_ENFORCE_EQ(
&out_tensor, z,
platform::errors::InvalidArgument(
"The output tensor should be same as output z in normal mode,"
" but now is not same."));
}
if (is_gpu_place) {
......
......@@ -92,7 +92,9 @@ class TestElementwiseOpGradGrad {
auto dst_place = BOOST_GET_CONST(platform::CUDAPlace, place_);
memory::Copy(dst_place, dst, src_place, src, bytes, nullptr);
#else
PADDLE_THROW("Not compiled with cuda");
PADDLE_THROW(platform::errors::InvalidArgument(
"Check your paddle version, current version is not compiled with "
"cuda"));
#endif
}
}
......@@ -107,7 +109,10 @@ class TestElementwiseOpGradGrad {
op->Run(scope_, place_);
platform::DeviceContextPool::Instance().Get(place_)->Wait();
framework::LoDTensor cpu_out;
PADDLE_ENFORCE_EQ(scope_.kids().empty(), true, "scope has child scopes");
PADDLE_ENFORCE_EQ(scope_.kids().empty(), true,
platform::errors::InvalidArgument(
"The scope can not have the child scopes,"
"please check your code."));
// get outputs from scope and compare them with expected_outs
bool all_equal = true;
......
......@@ -37,8 +37,21 @@ class GatherOp : public framework::OperatorWithKernel {
"Output(Out) of GatherOp should not be null."));
auto index_dims = ctx->GetInputDim("Index");
PADDLE_ENFORCE(index_dims.size() == 1 ||
(index_dims.size() == 2 && index_dims[1] == 1));
if (index_dims.size() == 2) {
PADDLE_ENFORCE_EQ(
index_dims[1], 1,
platform::errors::InvalidArgument(
"The last dim of index should be 1 when it is 2D, but we get %d",
index_dims[1]));
} else {
PADDLE_ENFORCE_EQ(
index_dims.size(), 1,
platform::errors::InvalidArgument(
"The index should be 1D, when it is not 2D, but we get %d",
index_dims.size()));
}
int batch_size = ctx->GetInputDim("Index")[0];
framework::DDim output_dims(ctx->GetInputDim("X"));
output_dims[0] = batch_size;
......
......@@ -43,7 +43,11 @@ class OverflowOp : public framework::OperatorWithKernel {
} else if (x_var->IsType<framework::SelectedRows>()) {
dtype = x_var->Get<framework::SelectedRows>().value().type();
} else {
PADDLE_THROW("Cannot find the input data type by all input data");
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::InvalidArgument(
"The input type mismatch, the type of Input(X) must be Tensor or "
"SelectedRows, please check your input."));
}
return framework::OpKernelType(framework::proto::VarType::Type(dtype),
ctx.GetPlace());
......
......@@ -57,7 +57,11 @@ class OverflowKernel : public framework::OpKernel<T> {
auto& in = ctx.Input<framework::SelectedRows>("X")->value();
functor(in, out);
} else {
PADDLE_THROW("Unsupported input type.");
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::InvalidArgument(
"The input type mismatch, the type of Input(X) must be Tensor or "
"SelectedRows, please check your input."));
}
}
};
......
......@@ -22,8 +22,6 @@ class LinspaceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Start"),
"Input(Start) of LinspaceOp should not be null.");
OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace");
OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace");
OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace");
......
......@@ -63,7 +63,10 @@ class CUDALinspaceKernel : public framework::OpKernel<T> {
framework::TensorCopy(*num_t, platform::CPUPlace(), &n);
int32_t num = n.data<int32_t>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument(
"The num of linspace op should be larger "
"than 0, but received num is %d",
num));
out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace());
......
......@@ -46,7 +46,10 @@ class CPULinspaceKernel : public framework::OpKernel<T> {
T start = start_t.data<T>()[0];
T stop = stop_t.data<T>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument(
"The num of linspace op should be larger "
"than 0, but received num is %d",
num));
out->Resize(framework::make_ddim({num}));
......
......@@ -48,6 +48,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>();
bool is_negative = ctx.Attr<bool>("is_negative_input");
bool bfloat16 = ctx.Attr<bool>("bfloat16");
std::string key =
platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data,
is_negative, ctx.OutputName("Output"));
......@@ -74,7 +75,10 @@ class QuantOpKernel : public framework::OpKernel<T> {
src_md, engine, to_void_cast<T>(input_data));
std::shared_ptr<mkldnn::memory::desc> dst_md;
if (is_negative) {
if (bfloat16) {
platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);
} else if (is_negative) {
platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine,
dst_md, dst_memory, out_format);
} else {
......@@ -96,7 +100,11 @@ class QuantOpKernel : public framework::OpKernel<T> {
dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem));
auto place = ctx.GetPlace();
if (is_negative) {
if (bfloat16) {
dst_memory->set_data_handle(
output->mutable_data<paddle::platform::bfloat16>(place));
} else if (is_negative) {
dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
} else {
dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
......
......@@ -40,6 +40,8 @@ void QuantOpMaker::Make() {
AddAttr<std::string>("output_format",
"Convert format to NHWC or NCHW during quantization.")
.SetDefault("NHWC");
AddAttr<bool>("bfloat16", "(bool, default false) Convert to bfloat16")
.SetDefault(false);
AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC");
}
......
......@@ -60,7 +60,10 @@ class ScaleKernel : public framework::OpKernel<T> {
out->mutable_data<T>(in->place());
PADDLE_ENFORCE_EQ(in->dims(), out->dims(),
"in and out should have the same dim");
paddle::platform::errors::InvalidArgument(
"the input and output should have the same dim"
"but input dim is %s, output dim is %s",
in->dims(), out->dims()));
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
......
......@@ -186,10 +186,17 @@ class SumOp : public framework::OperatorWithKernel {
}
}
}
PADDLE_THROW("Cannot find the input data type by all input data");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected each tensor in Input(x) in sum op has be initialized, but "
"some tensor in Input(x) is not be initialized, please check your "
"code.",
framework::ToTypeName(x_vars[0]->Type())));
}
PADDLE_THROW("Unexpected branch. Input type is %s",
framework::ToTypeName(x_vars[0]->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Input(X) must be Tensor, SelectedRows or "
"LodTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(x_vars[0]->Type())));
}
};
......
......@@ -169,8 +169,18 @@ void SumToLoDTensor(const framework::ExecutionContext &context) {
auto row_numel = sr_value.numel() / sr_rows.size();
auto out_dims = out->dims();
PADDLE_ENFORCE_EQ(sr.height(), out_dims[0]);
PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height());
PADDLE_ENFORCE_EQ(sr.height(), out_dims[0],
platform::errors::InvalidArgument(
"The table height of input must be same as output, "
"but received input height is %d"
", output height is %d",
sr.height(), out_dims[0]));
PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height(),
platform::errors::InvalidArgument(
"The table width of input must be same as output, "
"but received input width is %d"
", output width is %d",
row_numel, out->numel() / sr.height()));
auto *sr_data = sr_value.data<T>();
auto *sr_out_data = out->data<T>();
......@@ -231,8 +241,11 @@ class SumKernel<platform::CUDADeviceContext, T>
} else if (out_var->IsType<framework::LoDTensorArray>()) {
LodTensorArrayCompute<platform::CUDADeviceContext, T>(context);
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
framework::ToTypeName(out_var->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Ouput(out) must be Tensor, SelectedRows or "
"LodTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
}
};
......
......@@ -182,7 +182,11 @@ class SumKernel : public framework::OpKernel<T> {
auto &in_t = in_vars[i]->Get<framework::SelectedRows>();
functor(context.template device_context<DeviceContext>(), in_t, out);
} else {
PADDLE_THROW("Variable type must be LoDTensor/SelectedRows.");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Input(X) of %d-th must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(in_vars[i]->Type())));
}
}
} else if (out_var->IsType<framework::SelectedRows>()) {
......@@ -190,8 +194,11 @@ class SumKernel : public framework::OpKernel<T> {
} else if (out_var->IsType<framework::LoDTensorArray>()) {
LodTensorArrayCompute<DeviceContext, T>(context);
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
framework::ToTypeName(out_var->Type()));
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Output(out) must be Tensor, SelectedRows, "
"LoDTensorArray. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
}
};
......
......@@ -54,9 +54,11 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
tensor = out_var->GetMutable<framework::LoDTensor>();
if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape));
} else {
PADDLE_THROW(
"uniform_random_op's output only"
"supports SelectedRows and LoDTensor");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Output(out) in uniform_random_op must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
T *data = tensor->mutable_data<T>(ctx.GetPlace());
......
......@@ -116,9 +116,11 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
tensor = out_var->GetMutable<framework::LoDTensor>();
if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape));
} else {
PADDLE_THROW(
"uniform_random_op's output only"
"supports SelectedRows and LoDTensor");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected type of Output(out) in uniform_random_op must be Tensor, "
"SelectedRows. But got "
"unsupport type: %s.",
framework::ToTypeName(out_var->Type())));
}
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
......
......@@ -50,7 +50,10 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor(
}
return vec_new_data;
} else {
PADDLE_THROW("The dtype of shape tensor must be int32 or int64.");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected dtype of ShapeTensor must be int32, int64. But got "
"unsupport dtype: %s.",
paddle::framework::DataTypeToString(new_data_tensor->type())));
}
}
......@@ -84,7 +87,11 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList(
vec_new_shape.push_back(*tensor->data<int64_t>());
}
} else {
PADDLE_THROW("The dtype of shape tensor must be int32 or int64.");
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected dtype of ShapeTensorList of %d-th must be int32, int64. "
"But got "
"unsupport dtype: %s.",
i, paddle::framework::DataTypeToString(tensor->type())));
}
}
......
......@@ -287,6 +287,8 @@ class ScopedTensorDescriptor {
return descriptor(CudnnDataType<T>::type, dim, stride);
}
inline cudnnTensorDescriptor_t desc() { return desc_; }
private:
cudnnTensorDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
......@@ -329,6 +331,8 @@ class ScopedRNNTensorDescriptor {
input_size, time_major, seq_length);
}
inline cudnnRNNDataDescriptor_t desc() { return desc_; }
private:
cudnnRNNDataDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor);
......@@ -361,6 +365,7 @@ class ScopedDropoutDescriptor {
}
return desc_;
}
inline cudnnDropoutDescriptor_t desc() { return desc_; }
private:
cudnnDropoutDescriptor_t desc_;
......@@ -376,7 +381,7 @@ class ScopedRNNDescriptor {
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
}
inline cudnnRNNDescriptor_t descriptor() { return desc_; }
inline cudnnRNNDescriptor_t desc() { return desc_; }
private:
cudnnRNNDescriptor_t desc_;
......@@ -419,172 +424,13 @@ class ScopedFilterDescriptor {
kernel, groups);
}
inline cudnnFilterDescriptor_t desc() { return desc_; }
private:
cudnnFilterDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
};
class ScopedRNNBase {
public:
ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size,
int num_layers, float dropout_prob, int seed, int weight_numel,
bool initialized, bool is_bidirec)
: seq_length_(seq_length),
batch_size_(batch_size),
input_size_(input_size),
hidden_size_(hidden_size),
num_layers_(num_layers),
dropout_prob_(dropout_prob),
seed_(seed),
weight_numel_(weight_numel),
initialized_(initialized),
is_bidirec_(is_bidirec) {}
template <typename T>
void Create(const cudnnHandle_t& handle, const platform::Place& place,
std::vector<int> sequence_length, size_t* workspace_size,
size_t* reserve_size, framework::Tensor* dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1;
cudnnDataType_t cudnn_type = platform::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1};
std::vector<int> dims_y = {batch_size_, hidden_size_ * numDirections, 1};
std::vector<int> strides_y = {hidden_size_ * numDirections, 1, 1};
for (int i = 0; i < seq_length_; ++i) {
x_desc_.emplace_back(x_d.descriptor<T>(dims_x, strides_x));
y_desc_.emplace_back(y_d.descriptor<T>(dims_y, strides_y));
}
if (!sequence_length.empty()) {
x_seq_desc_ = x_seq_d.descriptor<T>(seq_length_, batch_size_, input_size_,
true, sequence_length);
y_seq_desc_ = y_seq_d.descriptor<T>(seq_length_, batch_size_,
hidden_size_ * numDirections, true,
sequence_length);
}
// ------------------- cudnn hx, hy, cx, cy descriptors----------
std::vector<int> dims_hx = {num_layers_ * numDirections, batch_size_,
hidden_size_};
std::vector<int> strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1};
hx_desc_ = hx_d.descriptor<T>(dims_hx, strides_hx);
cx_desc_ = cx_d.descriptor<T>(dims_hx, strides_hx);
hy_desc_ = hy_d.descriptor<T>(dims_hx, strides_hx);
cy_desc_ = cy_d.descriptor<T>(dims_hx, strides_hx);
// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
if (!initialized_) {
PADDLE_ENFORCE_CUDA_SUCCESS(
dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
}
dropout_desc_ =
dropout_d.descriptor(handle, place, initialized_, dropout_prob_,
dropout_state, seed_, state_size);
// ------------------- cudnn rnn descriptors ---------------------
rnn_desc_ = rnn_d.descriptor();
#if CUDNN_VERSION >= 6000
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6(
handle, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
CUDNN_RNN_ALGO_STANDARD, cudnn_type));
#else
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor(
rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT,
is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM,
cudnn_type));
#endif
if (!sequence_length.empty()) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode(
rnn_desc_, CUDNN_RNN_PADDED_IO_ENABLED));
}
// ------------------- cudnn weights_size ---------------------
size_t weights_size_;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize(
handle, rnn_desc_, x_desc_[0], &weights_size_, cudnn_type));
PADDLE_ENFORCE_EQ(
weights_size_, sizeof(T) * weight_numel_,
platform::errors::InvalidArgument(
"The cudnn lstm and setting weight size should be same."));
// ------------------- cudnn weight descriptors ---------------------
platform::DataLayout layout = platform::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1};
w_desc_ = w_d.descriptor<T>(layout, dim_w);
// ------------------- cudnn workspace, reserve size ---------------------
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize(
handle, rnn_desc_, seq_length_, x_desc_.data(), workspace_size));
PADDLE_ENFORCE_CUDA_SUCCESS(
platform::dynload::cudnnGetRNNTrainingReserveSize(
handle, rnn_desc_, seq_length_, x_desc_.data(), reserve_size));
}
cudnnTensorDescriptor_t* x_desc() { return x_desc_.data(); }
cudnnTensorDescriptor_t* y_desc() { return y_desc_.data(); }
cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_; }
cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_; }
cudnnTensorDescriptor_t hx_desc() { return hx_desc_; }
cudnnTensorDescriptor_t cx_desc() { return cx_desc_; }
cudnnTensorDescriptor_t hy_desc() { return hy_desc_; }
cudnnTensorDescriptor_t cy_desc() { return cy_desc_; }
cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_; }
cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_; }
cudnnFilterDescriptor_t w_desc() { return w_desc_; }
private:
int seq_length_;
int batch_size_;
int input_size_;
int hidden_size_;
int num_layers_;
float dropout_prob_;
int seed_;
int weight_numel_;
bool initialized_;
bool is_bidirec_;
std::vector<cudnnTensorDescriptor_t> x_desc_;
std::vector<cudnnTensorDescriptor_t> y_desc_;
cudnnRNNDataDescriptor_t x_seq_desc_;
cudnnRNNDataDescriptor_t y_seq_desc_;
// A tensor descriptor describing the initial hidden state of the RNN.
cudnnTensorDescriptor_t hx_desc_;
// A tensor descriptor describing the initial cell state for LSTM networks.
cudnnTensorDescriptor_t cx_desc_;
// A tensor descriptor describing the final hidden state of the RNN.
cudnnTensorDescriptor_t hy_desc_;
// A tensor descriptor describing the final cell state for LSTM networks.
cudnnTensorDescriptor_t cy_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
cudnnFilterDescriptor_t w_desc_;
cudnnRNNDescriptor_t rnn_desc_;
ScopedTensorDescriptor x_d;
ScopedTensorDescriptor y_d;
ScopedRNNTensorDescriptor x_seq_d;
ScopedRNNTensorDescriptor y_seq_d;
ScopedTensorDescriptor hx_d;
ScopedTensorDescriptor cx_d;
ScopedTensorDescriptor hy_d;
ScopedTensorDescriptor cy_d;
ScopedDropoutDescriptor dropout_d;
ScopedFilterDescriptor w_d;
ScopedRNNDescriptor rnn_d;
};
class ScopedConvolutionDescriptor {
public:
ScopedConvolutionDescriptor() {
......
......@@ -443,6 +443,13 @@ inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) {
op->GetAttrIfExists<bool>("use_quantizer"));
}
inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) {
return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "bfloat16";
}
inline bool HasOpFLOAT32DataType(const paddle::framework::OpDesc* op) {
return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "float32";
}
enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };
} // namespace platform
......
......@@ -38,6 +38,7 @@ set(PYBIND_SRCS
imperative.cc
ir.cc
inference_api.cc
compatible.cc
generator_py.cc)
if(WITH_GLOO)
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/pybind/compatible.h"
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_version_registry.h"
namespace py = pybind11;
using paddle::framework::compatible::PassVersionCheckerRegistrar;
namespace paddle {
namespace pybind {
void BindCompatible(py::module* m) {
py::class_<PassVersionCheckerRegistrar>(*m, "PassVersionChecker")
.def_static("IsCompatible", [](const std::string& name) -> bool {
auto instance = PassVersionCheckerRegistrar::GetInstance();
return instance.IsPassCompatible(name);
});
}
} // namespace pybind
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/pybind11.h>
namespace paddle {
namespace pybind {
void BindCompatible(pybind11::module *m);
} // namespace pybind
} // namespace paddle
......@@ -184,6 +184,7 @@ void BindVarDsec(pybind11::module *m) {
.value("FP16", pd::proto::VarType::FP16)
.value("FP32", pd::proto::VarType::FP32)
.value("FP64", pd::proto::VarType::FP64)
.value("BF16", pd::proto::VarType::BF16)
.value("LOD_TENSOR", pd::proto::VarType::LOD_TENSOR)
.value("SELECTED_ROWS", pd::proto::VarType::SELECTED_ROWS)
.value("FEED_MINIBATCH", pd::proto::VarType::FEED_MINIBATCH)
......
......@@ -60,6 +60,7 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/box_helper_py.h"
#include "paddle/fluid/pybind/compatible.h"
#include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/data_set_py.h"
#include "paddle/fluid/pybind/exception.h"
......@@ -2619,6 +2620,7 @@ All parameter, weight, gradient are variables in Paddle.
BindGraph(&m);
BindNode(&m);
BindInferenceApi(&m);
BindCompatible(&m);
BindDataset(&m);
BindGenerator(&m);
#ifdef PADDLE_WITH_CRYPTO
......
......@@ -51,6 +51,17 @@ if %ERRORLEVEL% NEQ 0 (
exit /b 7
)
rem ------pre install clcache and init config----------
pip install clcache
:: set USE_CLCACHE to enable clcache
set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
clcache.exe -M 21474836480
rem ------initialize common variable------
if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0"
if not defined BRANCH set BRANCH=develop
......@@ -173,7 +184,7 @@ echo Build third_party successfully!
set build_times=1
:build_paddle
echo Build Paddle the %build_times% time:
msbuild /m:%PARALLEL_PROJECT_COUNT% /p:Configuration=Release /verbosity:minimal paddle.sln
msbuild /m:%PARALLEL_PROJECT_COUNT% /p:TrackFileAccess=false /p:CLToolExe=clcache.exe /p:CLToolPath=%PYTHON_ROOT%\Scripts /p:Configuration=Release /verbosity:minimal paddle.sln
if %ERRORLEVEL% NEQ 0 (
set /a build_times=%build_times%+1
if %build_times% GTR 2 (
......
......@@ -49,6 +49,7 @@ import paddle.optimizer
import paddle.metric
import paddle.device
import paddle.incubate.complex as complex
import paddle.regularizer
# TODO: define alias in tensor and framework directory
......
......@@ -21,6 +21,7 @@ from .parallel import get_rank
from .parallel import get_world_size
from paddle.fluid.dygraph.parallel import prepare_context #DEFINE_ALIAS
from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS
from paddle.distributed.fleet.dataset import *
from . import collective
from .collective import *
......@@ -30,11 +31,8 @@ __all__ = ["spawn"]
# dygraph parallel apis
__all__ += [
"init_parallel_env",
"get_rank",
"get_world_size",
"prepare_context",
"ParallelEnv",
"init_parallel_env", "get_rank", "get_world_size", "prepare_context",
"ParallelEnv", "InMemoryDataset", "QueueDataset"
]
# collective apis
......
......@@ -19,7 +19,7 @@ from paddle.distributed.utils import get_cluster, logger
def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus):
"""
args_node_ips, args_node_ip:string
args_node_ips:string, args_node_ip:string, args_port: int, selected_gpus:list
"""
#you can automatically get ip info while using paddlecloud multi nodes mode.
node_ips = os.getenv("PADDLE_TRAINERS")
......@@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus):
node_rank = os.getenv("PADDLE_TRAINER_ID")
assert node_rank is not None, "PADDLE_TRAINER_ID should not be None"
paddle_ports_num = int(os.getenv("TRAINER_PORTS_NUM"))
assert paddle_ports_num is not None, "TRAINER_PORTS_NUM should not be None"
node_ips = node_ips.split(",")
num_nodes = len(node_ips)
node_rank = int(node_rank)
......@@ -47,14 +50,16 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\
Your input cluster_node_ips: {} doesn't equals to IPs: {} from \
paddlecloud environment.".format(args_node_ips, node_ips))
# DISTRIBUTED_TRAINER_ENDPOINTS: new environment since paddlecloud 1.8.4
# e.g: DISTRIBUTED_TRAINER_ENDPOINTS="ip1:port1,ip1:port2,ip1:port3,ip1:port4,ip2:port5,ip2:port6,ip2:port7,ip2:port8"
trainer_endpoints = os.getenv("DISTRIBUTED_TRAINER_ENDPOINTS")
if trainer_endpoints is None:
started_port = args_port
print("num_nodes:", num_nodes)
if num_nodes > 1:
try:
paddle_port = int(os.getenv("PADDLE_PORT", ""))
paddle_port_num = int(os.getenv("TRAINER_PORTS_NUM", ""))
if paddle_port_num >= len(
if paddle_ports_num >= len(
selected_gpus) and paddle_port != args_port:
logger.warning("Use Cloud specified port:{}.".format(
paddle_port))
......@@ -66,13 +71,26 @@ paddlecloud environment.".format(args_node_ips, node_ips))
if started_port is None:
started_port = 6170
logger.debug("parsed from args:node_ips:{} \
node_ip:{} node_rank:{} started_port:{}"
.format(node_ips, node_ip, node_rank, started_port))
ports = [x for x in range(started_port, started_port + len(selected_gpus))]
cluster, pod = get_cluster(node_ips, node_ip, ports, selected_gpus)
ports = [
x for x in range(started_port, started_port + len(selected_gpus))
]
trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in ports])
else:
trainer_endpoints_ori = trainer_endpoints.split(",")
trainer_endpoints = []
assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori)
for i in range(num_nodes):
trainer_endpoints.append(trainer_endpoints_ori[
i * paddle_ports_num:(i + 1) * paddle_ports_num])
logger.debug("parsed from args: node_ips:{} \
node_ip:{} node_rank:{} trainer_endpoints:{}"
.format(node_ips, node_ip, node_rank, trainer_endpoints))
cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints,
selected_gpus)
return cluster, cluster.pods[node_rank]
......
......@@ -23,7 +23,6 @@ from .dataset import *
__all__ = [
"DistributedStrategy",
"UtilBase",
"DatasetFactory",
"UserDefinedRoleMaker",
"PaddleCloudRoleMaker",
"Fleet",
......
......@@ -60,7 +60,7 @@ class StrategyCompiler(StrategyCompilerBase):
def _get_valid_strategy(self, dist_strategy, can_not_apply_optimizer_list):
import copy
valid_strategy = copy.copy(dist_strategy)
valid_strategy = copy.deepcopy(dist_strategy)
invalid_optimizers = []
for candidate in self._meta_optimizer_candidates:
is_valid = False
......
......@@ -19,7 +19,7 @@ from paddle.distributed.fleet.launch_utils import get_cluster, logger
def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170):
"""
args_node_ips, args_node_ip:string
args_node_ips:string, selected_gpus:list, args_port: int
"""
#you can automatically get ip info while using paddlecloud multi nodes mode.
node_ips = os.getenv("PADDLE_TRAINERS")
......@@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170):
node_rank = os.getenv("PADDLE_TRAINER_ID")
assert node_rank is not None, "PADDLE_TRAINER_ID should not be None"
paddle_ports_num = int(os.getenv("TRAINER_PORTS_NUM"))
assert paddle_ports_num is not None, "TRAINER_PORTS_NUM should not be None"
node_ips = node_ips.split(",")
num_nodes = len(node_ips)
node_rank = int(node_rank)
......@@ -42,14 +45,16 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\
Your input cluster_node_ips: {} doesn't equals to IPs: {} from \
paddlecloud environment.".format(args_node_ips, node_ips))
# DISTRIBUTED_TRAINER_ENDPOINTS: new environment since paddlecloud 1.8.4
# e.g: DISTRIBUTED_TRAINER_ENDPOINTS="ip1:port1,ip1:port2,ip1:port3,ip1:port4,ip2:port5,ip2:port6,ip2:port7,ip2:port8"
trainer_endpoints = os.getenv("DISTRIBUTED_TRAINER_ENDPOINTS")
if trainer_endpoints is None:
started_port = args_port
print("num_nodes:", num_nodes)
if num_nodes > 1:
try:
paddle_port = int(os.getenv("PADDLE_PORT", ""))
paddle_port_num = int(os.getenv("TRAINER_PORTS_NUM", ""))
if paddle_port_num >= len(
if paddle_ports_num >= len(
selected_gpus) and paddle_port != args_port:
logger.warning("Use Cloud specified port:{}.".format(
paddle_port))
......@@ -61,13 +66,26 @@ paddlecloud environment.".format(args_node_ips, node_ips))
if started_port is None:
started_port = 6170
logger.debug("parsed from args:node_ips:{} \
node_ip:{} node_rank:{} started_port:{}"
.format(node_ips, node_ip, node_rank, started_port))
ports = [x for x in range(started_port, started_port + len(selected_gpus))]
cluster, pod = get_cluster(node_ips, node_ip, ports, selected_gpus)
ports = [
x for x in range(started_port, started_port + len(selected_gpus))
]
trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in ports])
else:
trainer_endpoints_ori = trainer_endpoints.split(",")
trainer_endpoints = []
assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori)
for i in range(num_nodes):
trainer_endpoints.append(trainer_endpoints_ori[
i * paddle_ports_num:(i + 1) * paddle_ports_num])
logger.debug("parsed from args: node_ips:{} \
node_ip:{} node_rank:{} trainer_endpoints:{}"
.format(node_ips, node_ip, node_rank, trainer_endpoints))
cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints,
selected_gpus)
return cluster, cluster.pods[node_rank]
......@@ -75,7 +93,8 @@ def use_paddlecloud():
node_ips = os.getenv("PADDLE_TRAINERS")
node_ip = os.getenv("POD_IP")
node_rank = os.getenv("PADDLE_TRAINER_ID")
if node_ips is None or node_ip is None or node_rank is None:
paddle_ports_num = os.getenv("TRAINER_PORTS_NUM")
if node_ips is None or node_ip is None or node_rank is None or paddle_ports_num is None:
return False
else:
return True
......
......@@ -14,54 +14,11 @@
"""This is definition of dataset class, which is high performance IO."""
import paddle
import paddle.fluid as fluid
from paddle.fluid.proto import data_feed_pb2
from google.protobuf import text_format
import paddle.fluid.core as core
class DatasetFactory(object):
"""
DatasetFactory is a factory which create dataset by its name,
you can create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset",
the default is "QueueDataset".
Example:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
"""
def __init__(self):
""" Init. """
pass
def create_dataset(self, datafeed_class="QueueDataset"):
"""
Create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset",
the default is "QueueDataset".
Args:
datafeed_class(str): datafeed class name, QueueDataset or InMemoryDataset.
Default is QueueDataset.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
"""
try:
dataset = globals()[datafeed_class]()
return dataset
except:
raise ValueError("datafeed class %s does not exist" %
datafeed_class)
class DatasetBase(object):
""" Base dataset class. """
......@@ -75,96 +32,67 @@ class DatasetBase(object):
self.thread_num = 1
self.filelist = []
def set_pipe_command(self, pipe_command):
def init(self,
batch_size=1,
thread_num=1,
use_var=[],
pipe_command="cat",
input_type=0,
fs_name="",
fs_ugi="",
download_cmd="cat"):
"""
Set pipe command of current dataset
A pipe command is a UNIX pipeline command that can be used only
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_pipe_command("python my_script.py")
should be called only once in user's python scripts to initialize setings of dataset instance.
Normally, it is called by InMemoryDataset or QueueDataset.
Args:
pipe_command(str): pipe command
batch_size(int): batch size. It will be effective during training. default is 1.
thread_num(int): thread num, it is the num of readers. default is 1.
use_var(list): list of variables. Variables which you will use. default is [].
pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat"
input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0.
fs_name(str): fs name. default is "".
fs_ugi(str): fs ugi. default is "".
download_cmd(str): customized download command. default is "cat"
"""
self.proto_desc.pipe_command = pipe_command
def set_rank_offset(self, rank_offset):
"""
Set rank_offset for merge_pv. It set the message of Pv.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_rank_offset("rank_offset")
Args:
rank_offset(str): rank_offset's name
self._set_batch_size(batch_size)
self._set_thread(thread_num)
self._set_use_var(use_var)
self._set_pipe_command(pipe_command)
self._set_input_type(input_type)
self._set_hdfs_config(fs_name, fs_ugi)
self._set_download_cmd(download_cmd)
def _set_pipe_command(self, pipe_command):
"""
self.proto_desc.rank_offset = rank_offset
def set_fea_eval(self, record_candidate_size, fea_eval=True):
"""
set fea eval mode for slots shuffle to debug the importance level of
slots(features), fea_eval need to be set True for slots shuffle.
Args:
record_candidate_size(int): size of instances candidate to shuffle
one slot
fea_eval(bool): whether enable fea eval mode to enable slots shuffle.
default is True.
Set pipe command of current dataset
A pipe command is a UNIX pipeline command that can be used only
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_fea_eval(1000000, True)
"""
if fea_eval:
self.dataset.set_fea_eval(fea_eval, record_candidate_size)
self.fea_eval = fea_eval
def slots_shuffle(self, slots):
"""
Slots Shuffle
Slots Shuffle is a shuffle method in slots level, which is usually used
in sparse feature with large scale of instances. To compare the metric, i.e.
auc while doing slots shuffle on one or several slots with baseline to
evaluate the importance level of slots(features).
import paddle
dataset = paddle.distributed.fleet.dataset.DatasetBase()
dataset._set_pipe_command("python my_script.py")
Args:
slots(list[string]): the set of slots(string) to do slots shuffle.
pipe_command(str): pipe command
Examples:
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_merge_by_lineid()
#suppose there is a slot 0
dataset.slots_shuffle(['0'])
"""
if self.fea_eval:
slots_set = set(slots)
self.dataset.slots_shuffle(slots_set)
self.proto_desc.pipe_command = pipe_command
def set_batch_size(self, batch_size):
def _set_batch_size(self, batch_size):
"""
Set batch size. Will be effective during training
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_batch_size(128)
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
dataset._set_batch_size(128)
Args:
batch_size(int): batch size
......@@ -172,32 +100,16 @@ class DatasetBase(object):
"""
self.proto_desc.batch_size = batch_size
def set_pv_batch_size(self, pv_batch_size):
"""
Set pv batch size. It will be effective during enable_pv_merge
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_pv_batch(128)
Args:
pv_batch_size(int): pv batch size
"""
self.proto_desc.pv_batch_size = pv_batch_size
def set_thread(self, thread_num):
def _set_thread(self, thread_num):
"""
Set thread num, it is the num of readers.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_thread(12)
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
dataset._set_thread(12)
Args:
thread_num(int): thread num
......@@ -212,8 +124,8 @@ class DatasetBase(object):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
dataset.set_filelist(['a.txt', 'b.txt'])
Args:
......@@ -222,19 +134,19 @@ class DatasetBase(object):
self.dataset.set_filelist(filelist)
self.filelist = filelist
def set_input_type(self, input_type):
def _set_input_type(self, input_type):
self.proto_desc.input_type = input_type
def set_use_var(self, var_list):
def _set_use_var(self, var_list):
"""
Set Variables which you will use.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_use_var([data, label])
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
dataset._set_use_var([data, label])
Args:
var_list(list): variable list
......@@ -253,19 +165,19 @@ class DatasetBase(object):
slot_var.type = "uint64"
else:
raise ValueError(
"Currently, fluid.dataset only supports dtype=float32 and dtype=int64"
"Currently, paddle.distributed.fleet.dataset only supports dtype=float32 and dtype=int64"
)
def set_hdfs_config(self, fs_name, fs_ugi):
def _set_hdfs_config(self, fs_name, fs_ugi):
"""
Set hdfs config: fs name ad ugi
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_hdfs_config("my_fs_name", "my_fs_ugi")
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
dataset._set_hdfs_config("my_fs_name", "my_fs_ugi")
Args:
fs_name(str): fs name
......@@ -273,16 +185,16 @@ class DatasetBase(object):
"""
self.dataset.set_hdfs_config(fs_name, fs_ugi)
def set_download_cmd(self, download_cmd):
def _set_download_cmd(self, download_cmd):
"""
Set customized download cmd: download_cmd
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_download_cmd("./read_from_afs")
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
dataset._set_download_cmd("./read_from_afs")
Args:
download_cmd(str): customized download command
......@@ -297,22 +209,22 @@ class DatasetBase(object):
if self.thread_num > len(self.filelist):
self.thread_num = len(self.filelist)
self.dataset.set_thread_num(self.thread_num)
self.dataset.set_data_feed_desc(self.desc())
self.dataset.set_data_feed_desc(self._desc())
self.dataset.create_readers()
def _finish_to_run(self):
self.dataset.destroy_readers()
def desc(self):
def _desc(self):
"""
Returns a protobuf message for this DataFeedDesc
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
print(dataset.desc())
import paddle
dataset = paddle.distributed.fleet.DatasetBase()
print(dataset._desc())
Returns:
A string message
......@@ -330,10 +242,10 @@ class InMemoryDataset(DatasetBase):
"""
InMemoryDataset, it will load data into memory
and shuffle data before training.
This class should be created by DatasetFactory
Example:
dataset = paddle.fluid.DatasetFactory().create_dataset("InMemoryDataset")
import paddle
dataset = paddle.distributed.InMemoryDataset()
"""
def __init__(self):
......@@ -351,7 +263,229 @@ class InMemoryDataset(DatasetBase):
self.merge_by_lineid = False
self.fleet_send_sleep_seconds = None
def set_feed_type(self, data_feed_type):
def _init_distributed_settings(self, **kwargs):
"""
should be called only once in user's python scripts to initialize distributed-related setings of dataset instance
Args:
kwargs: Keyword arguments. Currently, we support following keys in **kwargs:
merge_size(int): ins size to merge, if merge_size > 0, set merge by line id,
instances of same line id will be merged after shuffle,
you should parse line id in data generator. default is -1.
parse_ins_id(bool): Set if Dataset need to parse ins_id. default is False.
parse_content(bool): Set if Dataset need to parse content. default is False.
fleet_send_batch_size(int): Set fleet send batch size in one rpc, default is 1024
fleet_send_sleep_seconds(int): Set fleet send sleep time, default is 0
fea_eval(bool): Set if Dataset need to do feature importance evaluation using slots shuffle.
default is False.
candidate_size(int): if fea_eval is set True, set the candidate size used in slots shuffle.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=[])
dataset._init_distributed_settings(
parse_ins_id=True,
parse_content=True,
fea_eval=True,
candidate_size=10000)
"""
merge_size = kwargs.get("merge_size", -1)
if merge_size > 0:
self._set_merge_by_lineid(merge_size)
parse_ins_id = kwargs.get("parse_ins_id", False)
self._set_parse_ins_id(parse_ins_id)
parse_content = kwargs.get("parse_content", False)
self._set_parse_content(parse_content)
fleet_send_batch_size = kwargs.get("fleet_send_batch_size", None)
if fleet_send_batch_size:
self._set_fleet_send_batch_size(fleet_send_batch_size)
fleet_send_sleep_seconds = kwargs.get("fleet_send_sleep_seconds", None)
if fleet_send_sleep_seconds:
self._set_fleet_send_sleep_seconds(fleet_send_sleep_seconds)
fea_eval = kwargs.get("fea_eval", False)
if fea_eval:
candidate_size = kwargs.get("candidate_size", 10000)
self._set_fea_eval(candidate_size, True)
def update_settings(self, **kwargs):
"""
should be called in user's python scripts to update setings of dataset instance
Args:
kwargs: Keyword arguments. Currently, we support following keys in **kwargs,
including single node settings and advanced distributed related settings:
batch_size(int): batch size. It will be effective during training. default is 1.
thread_num(int): thread num, it is the num of readers. default is 1.
use_var(list): list of variables. Variables which you will use. default is [].
input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0.
fs_name(str): fs name. default is "".
fs_ugi(str): fs ugi. default is "".
pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat"
download_cmd(str): customized download command. default is "cat"
data_feed_type(str): data feed type used in c++ code. default is "MultiSlotInMemoryDataFeed".
queue_num(int): Dataset output queue num, training threads get data from queues. default is-1, which is set same as thread number in c++.
merge_size(int): ins size to merge, if merge_size > 0, set merge by line id,
instances of same line id will be merged after shuffle,
you should parse line id in data generator. default is -1.
parse_ins_id(bool): Set if Dataset need to parse ins_id. default is False.
parse_content(bool): Set if Dataset need to parse content. default is False.
fleet_send_batch_size(int): Set fleet send batch size in one rpc, default is 1024
fleet_send_sleep_seconds(int): Set fleet send sleep time, default is 0
fea_eval(bool): Set if Dataset need to do feature importance evaluation using slots shuffle.
default is False.
candidate_size(int): if fea_eval is set True, set the candidate size used in slots shuffle.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=[])
dataset._init_distributed_settings(
parse_ins_id=True,
parse_content=True,
fea_eval=True,
candidate_size=10000)
dataset.update_settings(batch_size=2)
"""
for key in kwargs:
if key == "pipe_command":
self._set_pipe_command(kwargs[key])
elif key == "batch_size":
self._set_batch_size(kwargs[key])
elif key == "thread_num":
self._set_thread(kwargs[key])
elif key == "use_var":
self._set_use_var(kwargs[key])
elif key == "input_type":
self._set_input_type(kwargs[key])
elif key == "fs_name" and "fs_ugi" in kwargs:
self._set_hdfs_config(kwargs[key], kwargs["fs_ugi"])
elif key == "download_cmd":
self._set_download_cmd(kwargs[key])
elif key == "merge_size" and kwargs.get("merge_size", -1) > 0:
self._set_merge_by_lineid(kwargs[key])
elif key == "parse_ins_id":
self._set_parse_ins_id(kwargs[key])
elif key == "parse_content":
self._set_parse_content(kwargs[key])
elif key == "fleet_send_batch_size":
self._set_fleet_send_batch_size(kwargs[key])
elif key == "fleet_send_sleep_seconds":
self._set_fleet_send_sleep_seconds(kwargs[key])
elif key == "fea_eval" and kwargs[key] == True:
candidate_size = kwargs.get("candidate_size", 10000)
self._set_fea_eval(candidate_size, True)
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
Args:
kwargs: Keyword arguments. Currently, we support following keys in **kwargs:
batch_size(int): batch size. It will be effective during training. default is 1.
thread_num(int): thread num, it is the num of readers. default is 1.
use_var(list): list of variables. Variables which you will use. default is [].
input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0.
fs_name(str): fs name. default is "".
fs_ugi(str): fs ugi. default is "".
pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat"
download_cmd(str): customized download command. default is "cat"
data_feed_type(str): data feed type used in c++ code. default is "MultiSlotInMemoryDataFeed".
queue_num(int): Dataset output queue num, training threads get data from queues. default is -1, which is set same as thread number in c++.
Examples:
.. code-block:: python
import paddle
with open("test_queue_dataset_run_a.txt", "w") as f:
data = "2 1 2 2 5 4 2 2 7 2 1 3\n"
data += "2 6 2 2 1 4 2 2 4 2 2 3\n"
data += "2 5 2 2 9 9 2 2 7 2 1 3\n"
data += "2 7 2 2 1 9 2 3 7 2 5 3\n"
f.write(data)
with open("test_queue_dataset_run_b.txt", "w") as f:
data = "2 1 2 2 5 4 2 2 7 2 1 3\n"
data += "2 6 2 2 1 4 2 2 4 2 2 3\n"
data += "2 5 2 2 9 9 2 2 7 2 1 3\n"
data += "2 7 2 2 1 9 2 3 7 2 5 3\n"
f.write(data)
slots = ["slot1", "slot2", "slot3", "slot4"]
slots_vars = []
for slot in slots:
var = fluid.data(
name=slot, shape=[None, 1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.load_into_memory()
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())
exe.train_from_dataset(fluid.default_main_program(),
dataset)
os.remove("./test_queue_dataset_run_a.txt")
os.remove("./test_queue_dataset_run_b.txt")
"""
batch_size = kwargs.get("batch_size", 1)
thread_num = kwargs.get("thread_num", 1)
use_var = kwargs.get("use_var", [])
input_type = kwargs.get("input_type", 0)
fs_name = kwargs.get("fs_name", "")
fs_ugi = kwargs.get("fs_ugi", "")
pipe_command = kwargs.get("pipe_command", "cat")
download_cmd = kwargs.get("download_cmd", "cat")
super(InMemoryDataset, self).init(
batch_size=batch_size,
thread_num=thread_num,
use_var=use_var,
pipe_command=pipe_command,
input_type=input_type,
fs_name=fs_name,
fs_ugi=fs_ugi,
download_cmd=download_cmd)
data_feed_type = kwargs.get("data_feed_type",
"MultiSlotInMemoryDataFeed")
self._set_feed_type(data_feed_type)
if kwargs.get("queue_num", -1) > 0:
queue_num = kwargs.get("queue_num", -1)
self._set_queue_num(queue_num)
def _set_feed_type(self, data_feed_type):
"""
Set data_feed_desc
"""
......@@ -373,7 +507,7 @@ class InMemoryDataset(DatasetBase):
self.dataset.set_parse_logkey(self.parse_logkey)
self.dataset.set_merge_by_sid(self.merge_by_sid)
self.dataset.set_enable_pv_merge(self.enable_pv_merge)
self.dataset.set_data_feed_desc(self.desc())
self.dataset.set_data_feed_desc(self._desc())
self.dataset.create_channel()
self.dataset.create_readers()
......@@ -387,7 +521,7 @@ class InMemoryDataset(DatasetBase):
self.dataset.dynamic_adjust_channel_num(self.thread_num, False)
self.dataset.dynamic_adjust_readers_num(self.thread_num)
def set_queue_num(self, queue_num):
def _set_queue_num(self, queue_num):
"""
Set Dataset output queue num, training threads get data from queues
......@@ -397,17 +531,17 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_queue_num(12)
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_queue_num(12)
"""
self.is_user_set_queue_num = True
self.queue_num = queue_num
def set_parse_ins_id(self, parse_ins_id):
def _set_parse_ins_id(self, parse_ins_id):
"""
Set id Dataset need to parse insid
Set if Dataset need to parse insid
Args:
parse_ins_id(bool): if parse ins_id or not
......@@ -415,14 +549,14 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_parse_ins_id(True)
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_parse_ins_id(True)
"""
self.parse_ins_id = parse_ins_id
def set_parse_content(self, parse_content):
def _set_parse_content(self, parse_content):
"""
Set if Dataset need to parse content
......@@ -432,120 +566,14 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_parse_content(True)
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_parse_content(True)
"""
self.parse_content = parse_content
def set_parse_logkey(self, parse_logkey):
"""
Set if Dataset need to parse logkey
Args:
parse_content(bool): if parse logkey or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_parse_logkey(True)
"""
self.parse_logkey = parse_logkey
def set_merge_by_sid(self, merge_by_sid):
"""
Set if Dataset need to merge sid. If not, one ins means one Pv.
Args:
merge_by_sid(bool): if merge sid or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_merge_by_sid(True)
"""
self.merge_by_sid = merge_by_sid
def set_enable_pv_merge(self, enable_pv_merge):
"""
Set if Dataset need to merge pv.
Args:
enable_pv_merge(bool): if enable_pv_merge or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_enable_pv_merge(True)
"""
self.enable_pv_merge = enable_pv_merge
def preprocess_instance(self):
"""
Merge pv instance and convey it from input_channel to input_pv_channel.
It will be effective when enable_pv_merge_ is True.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
"""
self.dataset.preprocess_instance()
def set_current_phase(self, current_phase):
"""
Set current phase in train. It is useful for untest.
current_phase : 1 for join, 0 for update.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.set_current_phase(1)
"""
self.dataset.set_current_phase(current_phase)
def postprocess_instance(self):
"""
Divide pv instance and convey it to input_channel.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
exe.train_from_dataset(dataset)
dataset.postprocess_instance()
"""
self.dataset.postprocess_instance()
def set_fleet_send_batch_size(self, fleet_send_batch_size=1024):
def _set_fleet_send_batch_size(self, fleet_send_batch_size=1024):
"""
Set fleet send batch size, default is 1024
......@@ -555,14 +583,14 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_fleet_send_batch_size(800)
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_fleet_send_batch_size(800)
"""
self.fleet_send_batch_size = fleet_send_batch_size
def set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0):
def _set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0):
"""
Set fleet send sleep time, default is 0
......@@ -572,14 +600,14 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_fleet_send_sleep_seconds(2)
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_fleet_send_sleep_seconds(2)
"""
self.fleet_send_sleep_seconds = fleet_send_sleep_seconds
def set_merge_by_lineid(self, merge_size=2):
def _set_merge_by_lineid(self, merge_size=2):
"""
Set merge by line id, instances of same line id will be merged after
shuffle, you should parse line id in data generator.
......@@ -590,21 +618,21 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_merge_by_lineid()
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_merge_by_lineid()
"""
self.dataset.set_merge_by_lineid(merge_size)
self.merge_by_lineid = True
self.parse_ins_id = True
def set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num):
def _set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num):
self.dataset.set_generate_unique_feasigns(generate_uni_feasigns)
self.gen_uni_feasigns = generate_uni_feasigns
self.local_shard_num = shard_num
def generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num,
def _generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num,
consume_thread_num, shard_num):
self.dataset.generate_local_tables_unlock(
table_id, fea_dim, read_thread_num, consume_thread_num, shard_num)
......@@ -616,8 +644,8 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
import paddle
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -635,8 +663,8 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
import paddle
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.preload_into_memory()
......@@ -656,8 +684,8 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
import paddle
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.preload_into_memory()
......@@ -673,8 +701,8 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
import paddle
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -692,9 +720,9 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -736,9 +764,9 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -751,30 +779,6 @@ class InMemoryDataset(DatasetBase):
"""
self.dataset.release_memory()
def get_pv_data_size(self):
"""
Get memory data size of Pv, user can call this function to know the pv num
of ins in all workers after load into memory.
Note:
This function may cause bad performance, because it has barrier
Returns:
The size of memory pv data.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
print dataset.get_pv_data_size()
"""
return self.dataset.get_pv_data_size()
def get_memory_data_size(self, fleet=None):
"""
Get memory data size, user can call this function to know the num
......@@ -792,9 +796,9 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -829,9 +833,9 @@ class InMemoryDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset = paddle.distributed.InMemoryDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -849,6 +853,51 @@ class InMemoryDataset(DatasetBase):
return global_data_size[0]
return local_data_size[0]
def _set_fea_eval(self, record_candidate_size, fea_eval=True):
"""
set fea eval mode for slots shuffle to debug the importance level of
slots(features), fea_eval need to be set True for slots shuffle.
Args:
record_candidate_size(int): size of instances candidate to shuffle
one slot
fea_eval(bool): whether enable fea eval mode to enable slots shuffle.
default is True.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset._set_fea_eval(1000000, True)
"""
if fea_eval:
self.dataset.set_fea_eval(fea_eval, record_candidate_size)
self.fea_eval = fea_eval
def slots_shuffle(self, slots):
"""
Slots Shuffle
Slots Shuffle is a shuffle method in slots level, which is usually used
in sparse feature with large scale of instances. To compare the metric, i.e.
auc while doing slots shuffle on one or several slots with baseline to
evaluate the importance level of slots(features).
Args:
slots(list[string]): the set of slots(string) to do slots shuffle.
Examples:
import paddle
dataset = paddle.distributed.InMemoryDataset()
dataset.set_merge_by_lineid()
#suppose there is a slot 0
dataset.slots_shuffle(['0'])
"""
if self.fea_eval:
slots_set = set(slots)
self.dataset.slots_shuffle(slots_set)
class QueueDataset(DatasetBase):
"""
......@@ -857,19 +906,24 @@ class QueueDataset(DatasetBase):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
import paddle
dataset = paddle.distributed.QueueDataset()
"""
def __init__(self):
"""
Initialize QueueDataset
This class should be created by DatasetFactory
"""
super(QueueDataset, self).__init__()
self.proto_desc.name = "MultiSlotDataFeed"
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(QueueDataset, self).init(**kwargs)
def _prepare_to_run(self):
"""
Set data_feed_desc/thread num/filelist before run,
......@@ -881,115 +935,154 @@ class QueueDataset(DatasetBase):
self.thread_num = 1
self.dataset.set_thread_num(self.thread_num)
self.dataset.set_filelist(self.filelist)
self.dataset.set_data_feed_desc(self.desc())
self.dataset.set_data_feed_desc(self._desc())
self.dataset.create_readers()
def local_shuffle(self):
"""
Local shuffle data.
Local shuffle is not supported in QueueDataset
NotImplementedError will be raised
class FileInstantDataset(DatasetBase):
"""
FileInstantDataset, it will process data streamly.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
dataset.local_shuffle()
Raises:
NotImplementedError: QueueDataset does not support local shuffle
import paddle
dataset = paddle.distributed.fleet.FileInstantDataset()
"""
def __init__(self):
"""
raise NotImplementedError(
"QueueDataset does not support local shuffle, "
"please use InMemoryDataset for local_shuffle")
Initialize FileInstantDataset
"""
super(FileInstantDataset, self).__init__()
self.proto_desc.name = "MultiSlotFileInstantDataFeed"
def global_shuffle(self, fleet=None):
def init(self, **kwargs):
"""
Global shuffle data.
should be called only once in user's python scripts to initialize setings of dataset instance
"""
super(FileInstantDataset, self).init(**kwargs)
Global shuffle is not supported in QueueDataset
NotImplementedError will be raised
Args:
fleet(Fleet): fleet singleton. Default None.
class BoxPSDataset(InMemoryDataset):
"""
BoxPSDataset: derived from InMemoryDataset.
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
dataset.global_shuffle(fleet)
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
"""
Raises:
NotImplementedError: QueueDataset does not support global shuffle
def __init__(self):
"""
Initialize BoxPSDataset
"""
super(BoxPSDataset, self).__init__()
self.boxps = core.BoxPS(self.dataset)
self.proto_desc.name = "PaddleBoxDataFeed"
def init(self, **kwargs):
"""
should be called only once in user's python scripts to initialize setings of dataset instance
"""
raise NotImplementedError(
"QueueDataset does not support global shuffle, "
"please use InMemoryDataset for global_shuffle")
super(BoxPSDataset, self).init(**kwargs)
rank_offset = kwargs.get("rank_offset", "")
self._set_rank_offset(rank_offset)
pv_batch_size = kwargs.get("pv_batch_size", 1)
self._set_pv_batch_size(pv_batch_size)
parse_logkey = kwargs.get("parse_logkey", False)
self._set_parse_logkey(parse_logkey)
merge_by_sid = kwargs.get("merge_by_sid", False)
self._set_merge_by_sid(merge_by_sid)
enable_pv_merge = kwargs.get("enable_pv_merge", False)
self._set_enable_pv_merge(enable_pv_merge)
class FileInstantDataset(DatasetBase):
def _set_rank_offset(self, rank_offset):
"""
FileInstantDataset, it will process data streamly.
Set rank_offset for merge_pv. It set the message of Pv.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory.create_dataset("FileInstantDataset")
"""
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_rank_offset("rank_offset")
def __init__(self):
"""
Initialize FileInstantDataset
This class should be created by DatasetFactory
"""
super(FileInstantDataset, self).__init__()
self.proto_desc.name = "MultiSlotFileInstantDataFeed"
Args:
rank_offset(str): rank_offset's name
def local_shuffle(self):
"""
Local shuffle
FileInstantDataset does not support local shuffle
self.proto_desc.rank_offset = rank_offset
def _set_pv_batch_size(self, pv_batch_size):
"""
raise NotImplementedError(
"FileInstantDataset does not support local shuffle, "
"please use InMemoryDataset for local_shuffle")
Set pv batch size. It will be effective during enable_pv_merge
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_pv_batch_size(128)
Args:
pv_batch_size(int): pv batch size
def global_shuffle(self, fleet=None):
"""
Global shuffle
FileInstantDataset does not support global shuffle
self.proto_desc.pv_batch_size = pv_batch_size
def _set_parse_logkey(self, parse_logkey):
"""
raise NotImplementedError(
"FileInstantDataset does not support global shuffle, "
"please use InMemoryDataset for global_shuffle")
Set if Dataset need to parse logkey
Args:
parse_content(bool): if parse logkey or not
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_parse_logkey(True)
class BoxPSDataset(InMemoryDataset):
"""
BoxPSDataset: derived from InMemoryDataset.
self.parse_logkey = parse_logkey
def _set_merge_by_sid(self, merge_by_sid):
"""
Set if Dataset need to merge sid. If not, one ins means one Pv.
Args:
merge_by_sid(bool): if merge sid or not
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_merge_by_sid(True)
"""
self.merge_by_sid = merge_by_sid
def __init__(self):
def _set_enable_pv_merge(self, enable_pv_merge):
"""
Initialize BoxPSDataset
This class should be created by DatasetFactory
Set if Dataset need to merge pv.
Args:
enable_pv_merge(bool): if enable_pv_merge or not
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset._set_enable_pv_merge(True)
"""
super(BoxPSDataset, self).__init__()
self.boxps = core.BoxPS(self.dataset)
self.proto_desc.name = "PaddleBoxDataFeed"
self.enable_pv_merge = enable_pv_merge
def set_date(self, date):
"""
......@@ -1008,8 +1101,8 @@ class BoxPSDataset(InMemoryDataset):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.begin_pass()
"""
self.boxps.begin_pass()
......@@ -1021,8 +1114,8 @@ class BoxPSDataset(InMemoryDataset):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.end_pass(True)
"""
self.boxps.end_pass(need_save_delta)
......@@ -1034,8 +1127,8 @@ class BoxPSDataset(InMemoryDataset):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.preload_into_memory()
......@@ -1049,8 +1142,8 @@ class BoxPSDataset(InMemoryDataset):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
......@@ -1064,8 +1157,8 @@ class BoxPSDataset(InMemoryDataset):
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.preload_into_memory()
......@@ -1093,11 +1186,90 @@ class BoxPSDataset(InMemoryDataset):
slots(list[string]): the set of slots(string) to do slots shuffle.
Examples:
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.set_merge_by_lineid()
#suppose there is a slot 0
dataset.slots_shuffle(['0'])
"""
slots_set = set(slots)
self.boxps.slots_shuffle(slots_set)
def set_current_phase(self, current_phase):
"""
Set current phase in train. It is useful for untest.
current_phase : 1 for join, 0 for update.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.set_current_phase(1)
"""
self.dataset.set_current_phase(current_phase)
def get_pv_data_size(self):
"""
Get memory data size of Pv, user can call this function to know the pv num
of ins in all workers after load into memory.
Note:
This function may cause bad performance, because it has barrier
Returns:
The size of memory pv data.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
print dataset.get_pv_data_size()
"""
return self.dataset.get_pv_data_size()
def preprocess_instance(self):
"""
Merge pv instance and convey it from input_channel to input_pv_channel.
It will be effective when enable_pv_merge_ is True.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
"""
self.dataset.preprocess_instance()
def postprocess_instance(self):
"""
Divide pv instance and convey it to input_channel.
Examples:
.. code-block:: python
import paddle
dataset = paddle.distributed.fleet.BoxPSDataset()
filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.preprocess_instance()
exe.train_from_dataset(dataset)
dataset.postprocess_instance()
"""
self.dataset.postprocess_instance()
......@@ -157,17 +157,20 @@ def get_cluster_from_args(args, gpus):
free_ports = [x for x in range(start_port, start_port + len(gpus))]
return get_cluster(node_ips, node_ip, free_ports, gpus)
trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports])
return get_cluster(node_ips, node_ip, trainer_endpoints, gpus)
def get_gpus(gpus):
if gpus is None:
gpus_num = fluid.core.get_cuda_device_count()
gpus = [str(x) for x in range(0, gpus_num)]
res_gpus = [str(x) for x in range(0, gpus_num)]
else:
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
if cuda_visible_devices is None or cuda_visible_devices == "":
gpus = [x.strip() for x in gpus.split(',')]
res_gpus = [x.strip() for x in gpus.split(',')]
else:
# change gpus into relative values
# e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.gpus=4,5,6,7;
......@@ -177,12 +180,16 @@ def get_gpus(gpus):
assert x in cuda_visible_devices_list, "Can't find "\
"your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\
% (x, cuda_visible_devices)
gpus = [
res_gpus = [
cuda_visible_devices_list.index(x.strip())
for x in gpus.split(',')
]
logger.info("Change selected_gpus into reletive values. --ips:{} "
"will change into relative_ips:{} according to your "
"CUDA_VISIBLE_DEVICES:{}".format(
gpus, res_gpus, cuda_visible_devices_list))
return gpus
return res_gpus
def launch_collective(args):
......
......@@ -227,18 +227,23 @@ def get_logger(log_level=20, name="root"):
return logger
def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus):
assert type(paddle_ports) is list, "paddle_ports must be list"
def get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus):
assert type(trainer_endpoints) is list, "trainer_endpoints must be list"
cluster = Cluster(hdfs=None)
trainer_rank = 0
for node_rank, ip in enumerate(node_ips):
pod = Pod()
pod.rank = node_rank
pod.addr = ip
cur_node_endpoints = trainer_endpoints[node_rank]
# when use paddlecloud, endpoints may > selected_gpus(user_defined)
assert len(cur_node_endpoints) >= len(
selected_gpus
), "current trainer_endpoints size should be greater equal than selected_gpus size."
for i in range(len(selected_gpus)):
trainer = Trainer()
trainer.gpus.append(selected_gpus[i])
trainer.endpoint = "%s:%d" % (ip, paddle_ports[i])
trainer.endpoint = "%s" % (cur_node_endpoints[i])
trainer.rank = trainer_rank
trainer_rank += 1
......@@ -424,10 +429,6 @@ def start_local_trainers(cluster,
len(pod.trainers),
pretty_print_envs(proc_env, ("Distributed Envs",
"Value"))))
logger.info(
"More details for debug about commands and environments are written in {}/run.sh".
format(log_dir))
fn = None
if log_dir is not None:
os.system("mkdir -p {}".format(log_dir))
......
......@@ -38,7 +38,7 @@ class RecomputeOptimizer(MetaOptimizerBase):
list(user_defined_strategy.recompute_configs["checkpoints"]))
def _can_apply(self):
if self.role_maker._is_collective:
if not self.role_maker._is_collective:
return False
if self.user_defined_strategy.recompute == True:
......
......@@ -160,18 +160,21 @@ def get_cluster_from_args(args, selected_gpus):
x for x in range(started_port, started_port + len(selected_gpus))
]
return get_cluster(node_ips, node_ip, free_ports, selected_gpus)
trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports])
return get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus)
def get_gpus(selected_gpus):
if selected_gpus is None:
from paddle.fluid import core
gpus_num = core.get_cuda_device_count()
selected_gpus = [str(x) for x in range(0, gpus_num)]
gpus = [str(x) for x in range(0, gpus_num)]
else:
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
if cuda_visible_devices is None or cuda_visible_devices == "":
selected_gpus = [x.strip() for x in selected_gpus.split(',')]
gpus = [x.strip() for x in selected_gpus.split(',')]
else:
# change selected_gpus into relative values
# e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.selected_gpus=4,5,6,7;
......@@ -181,12 +184,16 @@ def get_gpus(selected_gpus):
assert x in cuda_visible_devices_list, "Can't find "\
"your selected_gpus %s in CUDA_VISIBLE_DEVICES[%s]."\
% (x, cuda_visible_devices)
selected_gpus = [
gpus = [
cuda_visible_devices_list.index(x.strip())
for x in selected_gpus.split(',')
]
logger.info("Change selected_gpus into reletive values. --ips:{} "
"will change into relative_ips:{} according to your "
"CUDA_VISIBLE_DEVICES:{}".format(
selected_gpus, gpus, cuda_visible_devices_list))
return selected_gpus
return gpus
def get_cluster_and_pod(args):
......
......@@ -227,18 +227,23 @@ def get_logger(log_level, name="root"):
return logger
def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus):
assert type(paddle_ports) is list, "paddle_ports must be list"
def get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus):
assert type(trainer_endpoints) is list, "trainer_endpoints must be list"
cluster = Cluster(hdfs=None)
trainer_rank = 0
for node_rank, ip in enumerate(node_ips):
pod = Pod()
pod.rank = node_rank
pod.addr = ip
cur_node_endpoints = trainer_endpoints[node_rank]
# when use paddlecloud, endpoints may > selected_gpus(user_defined)
assert len(cur_node_endpoints) >= len(
selected_gpus
), "current trainer_endpoints size should be greater equal than selected_gpus size."
for i in range(len(selected_gpus)):
trainer = Trainer()
trainer.gpus.append(selected_gpus[i])
trainer.endpoint = "%s:%d" % (ip, paddle_ports[i])
trainer.endpoint = "%s" % (cur_node_endpoints[i])
trainer.rank = trainer_rank
trainer_rank += 1
......@@ -253,6 +258,7 @@ def terminate_local_procs(procs):
for p in procs:
if p.proc.poll() is None:
p.proc.terminate()
if p.log_fn:
p.log_fn.close()
logger.debug("terminate process id:{}".format(p.proc.pid))
......
......@@ -143,7 +143,7 @@ class PostTrainingQuantization(object):
weight_quantize_type='channel_wise_abs_max',
optimize_model=False,
is_use_cache_file=False,
cache_dir="./temp_post_training"):
cache_dir=None):
'''
Constructor.
......@@ -206,13 +206,8 @@ class PostTrainingQuantization(object):
`conv2d/depthwise_conv2d + bn`, the weights scale for all channel will
be different. In address this problem, fuse the pattern before
quantization. Default False.
is_use_cache_file(bool, optional): If set is_use_cache_file as False,
all temp data will be saved in memory. If set is_use_cache_file as True,
it will save temp data to disk. When the fp32 model is complex or
the number of calibrate data is large, we should set is_use_cache_file
as True. Defalut is False.
cache_dir(str, optional): When is_use_cache_file is True, set cache_dir as
the directory for saving temp data. Default is ./temp_post_training.
is_use_cache_file(bool, optional): This param is deprecated.
cache_dir(str, optional): This param is deprecated.
Returns:
None
......@@ -302,10 +297,6 @@ class PostTrainingQuantization(object):
assert op_type in self._support_quantize_op_type, \
op_type + " is not supported for quantization."
self._optimize_model = optimize_model
self._is_use_cache_file = is_use_cache_file
self._cache_dir = cache_dir
if self._is_use_cache_file and not os.path.exists(self._cache_dir):
os.mkdir(self._cache_dir)
# Define variables
self._place = self._executor.place
......@@ -317,11 +308,17 @@ class PostTrainingQuantization(object):
self._out_scale_op_list = _out_scale_op_list
self._quantized_weight_var_name = set()
self._quantized_act_var_name = set()
self.weight_op_pairs = {}
self._weight_op_pairs = {}
# The vars for alog = KL
self._sampling_act_abs_min_max = {}
self._sampling_act_histogram = {}
self._sampling_data = {}
self._quantized_var_kl_threshold = {}
self._histogram_bins = 2048
# The vars for algo = min_max
self._quantized_var_min = {}
self._quantized_var_max = {}
# The vars for algo = abs_max
self._quantized_var_abs_max = {}
def quantize(self):
......@@ -339,6 +336,8 @@ class PostTrainingQuantization(object):
self._collect_target_varnames()
self._set_activation_persistable()
if self._algo == "KL":
_logger.info("Preparation stage ...")
batch_id = 0
for data in self._data_loader():
self._executor.run(program=self._program,
......@@ -346,17 +345,30 @@ class PostTrainingQuantization(object):
fetch_list=self._fetch_list,
return_numpy=False,
scope=self._scope)
if self._algo == "KL":
self._sample_data(batch_id)
else:
self._sample_threshold()
self._collect_activation_abs_min_max()
if batch_id % 5 == 0:
_logger.info("Run batch: " + str(batch_id))
batch_id += 1
if self._batch_nums and batch_id >= self._batch_nums:
break
_logger.info("Finish preparation stage, all batch:" + str(batch_id))
self._init_sampling_act_histogram()
_logger.info("Sampling stage ...")
batch_id = 0
for data in self._data_loader():
self._executor.run(program=self._program,
feed=data,
fetch_list=self._fetch_list,
return_numpy=False,
scope=self._scope)
self._sampling()
if batch_id % 5 == 0:
_logger.info("Run batch: " + str(batch_id))
batch_id += 1
if self._batch_nums and batch_id >= self._batch_nums:
break
_logger.info("Finish all batch: " + str(batch_id))
_logger.info("Finish sampling stage, all batch: " + str(batch_id))
self._reset_activation_persistable()
......@@ -397,6 +409,7 @@ class PostTrainingQuantization(object):
target_vars=self._fetch_list,
executor=self._executor,
main_program=self._program)
_logger.info("The quantized model is saved in " + save_model_path)
def _load_model_data(self):
'''
......@@ -454,7 +467,7 @@ class PostTrainingQuantization(object):
for var_name in var_name_list:
if var_name in persistable_var_names:
self._quantized_weight_var_name.add(var_name)
self.weight_op_pairs[var_name] = op_type
self._weight_op_pairs[var_name] = op_type
else:
self._quantized_act_var_name.add(var_name)
......@@ -494,20 +507,18 @@ class PostTrainingQuantization(object):
if var.name in self._quantized_act_var_name:
var.persistable = False
def _sample_threshold(self):
def _sampling(self):
'''
Sample the input threshold(min, max, or abs_max) in every iterations.
Sample the min/max, abs_max or histogram in every iterations.
'''
assert self._algo in ["abs_max", "min_max"], \
"The algo should be abs_max or min_max for _sample_threshold."
if self._algo == "abs_max":
self._sample_threshold_abs_max()
self._sample_abs_max()
elif self._algo == "min_max":
self._sample_threshold_min_max()
self._sample_min_max()
elif self._algo == "KL":
self._sample_histogram()
def _sample_threshold_abs_max(self):
assert self._algo == "abs_max", \
"The algo should be abs_max for _sample_threshold_abs_max."
def _sample_abs_max(self):
# Only calculate abs_max value for weight for once
if self._quantized_var_abs_max == {}:
for var_name in self._quantized_weight_var_name:
......@@ -516,7 +527,7 @@ class PostTrainingQuantization(object):
abs_max_value = float(np.max(np.abs(var_tensor)))
elif self._weight_quantize_type == "channel_wise_abs_max":
abs_max_value = []
if self.weight_op_pairs[
if self._weight_op_pairs[
var_name] in _channelwise_quant_axis1_ops:
for i in range(var_tensor.shape[1]):
abs_max_value.append(
......@@ -534,9 +545,7 @@ class PostTrainingQuantization(object):
(abs_max_value > self._quantized_var_abs_max[var_name]):
self._quantized_var_abs_max[var_name] = abs_max_value
def _sample_threshold_min_max(self):
assert self._algo == "min_max", \
"The algo should be min_max for _sample_threshold_min_max."
def _sample_min_max(self):
if self._quantized_var_min == {} and self._quantized_var_max == {}:
for var_name in self._quantized_weight_var_name:
var_tensor = _load_variable_data(self._scope, var_name)
......@@ -546,7 +555,7 @@ class PostTrainingQuantization(object):
elif self._weight_quantize_type == "channel_wise_abs_max":
min_value = []
max_value = []
if self.weight_op_pairs[
if self._weight_op_pairs[
var_name] in _channelwise_quant_axis1_ops:
for i in range(var_tensor.shape[1]):
min_value.append(float(np.min(var_tensor[:, i])))
......@@ -569,6 +578,14 @@ class PostTrainingQuantization(object):
(max_value > self._quantized_var_max[var_name]):
self._quantized_var_max[var_name] = max_value
def _sample_histogram(self):
for var_name in self._quantized_act_var_name:
var_tensor = _load_variable_data(self._scope, var_name)
var_tensor_abs = np.abs(var_tensor)
bins = self._sampling_act_histogram[var_name][1]
hist, _ = np.histogram(var_tensor_abs, bins=bins)
self._sampling_act_histogram[var_name][0] += hist
def _save_input_threhold(self):
'''
Save input threshold to the quantized op.
......@@ -585,27 +602,36 @@ class PostTrainingQuantization(object):
op._set_attr(var_name + ".max",
self._quantized_var_max[var_name])
def _sample_data(self, iter):
def _collect_activation_abs_min_max(self):
'''
Sample the tensor data of quantized variables,
applied in every iteration.
Collect the abs_min and abs_max for all activation. When algo = KL,
get the min and max value, and then calculate the threshold.
'''
assert self._algo == "KL", "The algo should be KL to sample data."
if self._is_use_cache_file:
for var_name in self._quantized_act_var_name:
var_tensor = _load_variable_data(self._scope, var_name)
var_tensor = var_tensor.ravel()
save_path = os.path.join(
self._cache_dir,
var_name.replace("/", ".") + "_" + str(iter) + ".npy")
np.save(save_path, var_tensor)
var_tensor = np.abs(var_tensor)
min_value = float(np.min(var_tensor))
max_value = float(np.max(var_tensor))
if var_name not in self._sampling_act_abs_min_max:
self._sampling_act_abs_min_max[
var_name] = [min_value, max_value]
else:
if min_value < self._sampling_act_abs_min_max[var_name][0]:
self._sampling_act_abs_min_max[var_name][0] = min_value
if max_value > self._sampling_act_abs_min_max[var_name][1]:
self._sampling_act_abs_min_max[var_name][1] = max_value
def _init_sampling_act_histogram(self):
'''
Based on the min/max value, init the sampling_act_histogram.
'''
for var_name in self._quantized_act_var_name:
if var_name not in self._sampling_data:
self._sampling_data[var_name] = []
var_tensor = _load_variable_data(self._scope, var_name)
var_tensor = var_tensor.ravel()
self._sampling_data[var_name].append(var_tensor)
if var_name not in self._sampling_act_histogram:
min_val = self._sampling_act_abs_min_max[var_name][0]
max_val = self._sampling_act_abs_min_max[var_name][1]
hist, hist_edeges = np.histogram(
[], bins=self._histogram_bins, range=(min_val, max_val))
self._sampling_act_histogram[var_name] = [hist, hist_edeges]
def _calculate_kl_threshold(self):
'''
......@@ -621,7 +647,7 @@ class PostTrainingQuantization(object):
weight_threshold = float(np.max(np.abs(weight_data)))
elif self._weight_quantize_type == "channel_wise_abs_max":
weight_threshold = []
if self.weight_op_pairs[
if self._weight_op_pairs[
var_name] in _channelwise_quant_axis1_ops:
for i in range(weight_data.shape[1]):
weight_threshold.append(
......@@ -632,25 +658,10 @@ class PostTrainingQuantization(object):
float(np.max(np.abs(weight_data[i]))))
self._quantized_var_kl_threshold[var_name] = weight_threshold
# KL threshold for activations
if self._is_use_cache_file:
for var_name in self._quantized_act_var_name:
sampling_data = []
filenames = [f for f in os.listdir(self._cache_dir) \
if re.match(var_name.replace("/", ".") + '_[0-9]+.npy', f)]
for filename in filenames:
file_path = os.path.join(self._cache_dir, filename)
sampling_data.append(np.load(file_path))
os.remove(file_path)
sampling_data = np.concatenate(sampling_data)
hist, hist_edeges = self._sampling_act_histogram[var_name]
self._quantized_var_kl_threshold[var_name] = \
self._get_kl_scaling_factor(np.abs(sampling_data))
else:
for var_name in self._quantized_act_var_name:
self._sampling_data[var_name] = np.concatenate(
self._sampling_data[var_name])
self._quantized_var_kl_threshold[var_name] = \
self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name]))
self._get_kl_scaling_factor(hist, hist_edeges)
def _update_program(self):
'''
......@@ -765,22 +776,15 @@ class PostTrainingQuantization(object):
for var_name in out_var_names:
analysis_and_save_info(op, var_name)
def _get_kl_scaling_factor(self, activation_blob, num_quantized_bins=255):
def _get_kl_scaling_factor(self, hist, hist_edeges, num_quantized_bins=255):
'''
Using the KL-divergenc method to get the more precise scaling factor.
'''
max_val = np.max(activation_blob)
min_val = np.min(activation_blob)
if min_val >= 0:
hist, hist_edeges = np.histogram(
activation_blob, bins=2048, range=(min_val, max_val))
ending_iter = 2047
ending_iter = self._histogram_bins - 1
starting_iter = int(ending_iter * 0.7)
else:
_logger.error("Please first apply abs to activation_blob.")
bin_width = hist_edeges[1] - hist_edeges[0]
P_sum = len(np.array(activation_blob).ravel())
P_sum = np.sum(np.array(hist).ravel())
min_kl_divergence = 0
min_kl_index = 0
kl_inited = False
......
......@@ -19,6 +19,7 @@ import six
import pickle
import numpy as np
import paddle
from paddle import compat as cpt
from paddle.fluid import core
from paddle.fluid import framework
......@@ -182,9 +183,9 @@ class _ProgramHolder(object):
super(_ProgramHolder, self).__init__()
# input, output, persistable var info
self._input_names = []
self._persistable_names = []
self._input_descs = []
self._output_descs = []
self._persistable_names = []
# execution scope
self._inner_scope = core.Scope()
......@@ -207,11 +208,11 @@ class _ProgramHolder(object):
return self._train_program_desc
@property
def input_names(self):
return self._input_names
def input_descs(self):
return self._input_descs
@property
def output_decs(self):
def output_descs(self):
return self._output_descs
@property
......@@ -233,7 +234,8 @@ class _ProgramHolder(object):
ops_to_remove.append(i)
feed_var_name = cpt.to_bytes(op.input('X')[0])
root_block._remove_var(feed_var_name)
self._input_names.append(cpt.to_bytes(op.output('Out')[0]))
self._input_descs.append(
root_block.find_var(cpt.to_bytes(op.output('Out')[0])))
elif op.type() == 'scale' and op.output('Out')[0].startswith(
'save_infer_model/scale_'):
ops_to_remove.append(i)
......@@ -257,7 +259,7 @@ class _ProgramHolder(object):
root_block._remove_op(op_idx, op_idx + 1)
# 2. Input processing, reverse feed vars
self._input_names.reverse()
self._input_descs.reverse()
# 3. Output processing, add scale for outputs
tmp_program = _build_program_by_desc(program_desc)
......@@ -738,7 +740,7 @@ class TranslatedLayer(layers.Layer):
if isinstance(value, np.ndarray):
var = core.VarBase(
value=value,
name=program_holder.input_names[i],
name=program_holder.input_descs[i].name(),
persistable=False,
place=framework._current_expected_place(),
zero_copy=True)
......@@ -746,7 +748,7 @@ class TranslatedLayer(layers.Layer):
var = value
# NOTE: we changed var name here,
# but it may be an important name set by user
var.name = program_holder.input_names[i]
var.name = program_holder.input_descs[i].name()
input_vars.append(var)
persistable_vars = []
......@@ -762,7 +764,7 @@ class TranslatedLayer(layers.Layer):
% var_name)
output_vars = []
for var_desc in program_holder.output_decs:
for var_desc in program_holder.output_descs:
var = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(), var_desc.type(), False)
......@@ -913,11 +915,7 @@ class TranslatedLayer(layers.Layer):
program = translated_layer.program()
"""
# 1. get program holder
program_holder = self._program_holder_dict.get(method_name, None)
if program_holder is None:
raise ValueError(
"The method `%s` is not exists in loaded TranslatedLayer." %
method_name)
program_holder = self._get_program_holder(method_name)
# 2. get inference program desc
program_desc = program_holder.infer_program
......@@ -925,3 +923,44 @@ class TranslatedLayer(layers.Layer):
# 3. construct program
program = _build_program_by_desc(program_desc)
return program
def _get_program_holder(self, method_name='forward'):
program_holder = self._program_holder_dict.get(method_name, None)
if program_holder is None:
raise ValueError(
"The method `%s` does not exist in loaded TranslatedLayer." %
method_name)
return program_holder
def _input_spec(self, method_name='forward'):
# 1. get program holder
program_holder = self._get_program_holder(method_name)
# 2. build input spec by input desc
input_spec = []
for var_desc in program_holder.input_descs:
spec = paddle.static.InputSpec(
shape=var_desc.shape(),
dtype=var_desc.dtype(),
name=var_desc.name())
input_spec.append(spec)
return input_spec
def _output_spec(self, method_name='forward'):
# 1. get program holder
program_holder = self._get_program_holder(method_name)
# 2. build output spec by output desc
output_spec = []
for var_desc in program_holder.output_descs:
# NOTE(chenweihang): InputSpec describes a tensor, not just input.
# Maybe the name is not good enough. Here we use InputSpec to
# construct the description of Output tensor
spec = paddle.static.InputSpec(
shape=var_desc.shape(),
dtype=var_desc.dtype(),
name=var_desc.name())
output_spec.append(spec)
return output_spec
......@@ -1726,13 +1726,13 @@ class DatasetLoader(DataLoaderBase):
logging.warn('thread_num {} which is set in Dataset is ignored'.
format(dataset.thread_num))
dataset.set_thread(thread_num)
dataset._set_thread(thread_num)
if isinstance(dataset, paddle.distributed.fleet.dataset.
InMemoryDataset) and dataset.queue_num > thread_num:
logging.warn("queue_num {} which is set in Dataset is ignored".
format(dataset.queue_num))
dataset.set_queue_num(thread_num)
dataset._set_queue_num(thread_num)
self._dataset = dataset
use_slots = [
......
......@@ -102,6 +102,7 @@ if(WIN32)
endif()
LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2)
......@@ -399,17 +400,17 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${G
py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS})
py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS
FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
py_test_modules(test_imperative_resnet_sorted_gradient MODULES test_imperative_resnet_sorted_gradient ENVS
FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS
FLAGS_cudnn_deterministic=1)
py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mnist_sorted_gradient ENVS
FLAGS_cudnn_deterministic=1)
py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS
FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY")
py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_attention_model ENVS
FLAGS_cudnn_deterministic=1 SERIAL)
py_test_modules(test_install_check MODULES test_install_check ENVS
......
......@@ -208,14 +208,16 @@ class TestDistCTR2x2(FleetDistRunnerBase):
filelist = train_file_list
# config dataset
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset()
dataset.set_batch_size(batch_size)
dataset.set_use_var(self.feeds)
dataset = paddle.distributed.QueueDataset()
pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command)
dataset.init(
batch_size=batch_size,
use_var=self.feeds,
pipe_command=pipe_command,
thread_num=thread_num)
dataset.set_filelist(filelist)
dataset.set_thread(thread_num)
for epoch_id in range(1):
pass_start = time.time()
......
......@@ -114,14 +114,14 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2):
filelist.append(train_file_path)
# config dataset
dataset = paddle.fleet.DatasetFactory().create_dataset()
dataset.set_batch_size(batch_size)
dataset.set_use_var(self.feeds)
dataset = paddle.distributed.QueueDataset()
dataset._set_batch_size(batch_size)
dataset._set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command)
dataset._set_pipe_command(pipe_command)
dataset.set_filelist(filelist)
dataset.set_thread(thread_num)
dataset._set_thread(thread_num)
for epoch_id in range(1):
pass_start = time.time()
......
......@@ -183,14 +183,14 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
print("filelist: {}".format(filelist))
# config dataset
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset()
dataset.set_batch_size(batch_size)
dataset.set_use_var(self.feeds)
dataset = paddle.distributed.QueueDataset()
dataset._set_batch_size(batch_size)
dataset._set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command)
dataset._set_pipe_command(pipe_command)
dataset.set_filelist(filelist)
dataset.set_thread(thread_num)
dataset._set_thread(thread_num)
for epoch_id in range(1):
pass_start = time.time()
......
......@@ -17,6 +17,7 @@ import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
class TransposeFlattenConcatFusePassTest(InferencePassTest):
......@@ -45,6 +46,37 @@ class TransposeFlattenConcatFusePassTest(InferencePassTest):
use_gpu = True
self.check_output_with_option(use_gpu)
PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass')
class TransposeFlattenConcatFusePassWithAxisTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data1 = fluid.data(name="data1", shape=[5, 5, 5], dtype="float32")
data2 = fluid.data(name="data2", shape=[5, 5, 5], dtype="float32")
trans1 = fluid.layers.transpose(data1, perm=[2, 1, 0])
trans2 = fluid.layers.transpose(data2, perm=[2, 1, 0])
flatt1 = fluid.layers.flatten(trans1, axis=2)
flatt2 = fluid.layers.flatten(trans2, axis=2)
concat_out = fluid.layers.concat([flatt1, flatt2], axis=1)
# There is no parameters for above structure.
# Hence, append a batch_norm to avoid failure caused by load_combined.
out = fluid.layers.batch_norm(concat_out, is_test=True)
self.feeds = {
"data1": np.random.random([5, 5, 5]).astype("float32"),
"data2": np.random.random([5, 5, 5]).astype("float32")
}
self.fetch_list = [out]
def test_check_output(self):
# There is no cpu pass for transpose_flatten_concat_fuse
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass')
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
class PadOpTRTTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[1, 3, 128, 128], dtype="float32")
pad_out = fluid.layers.pad(x=data,
paddings=[0, 0, 0, 0, 0, 1, 1, 2],
pad_value=0.0)
out = fluid.layers.batch_norm(pad_out, is_test=True)
self.feeds = {
"data": np.random.random((1, 3, 128, 128)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = PadOpTRTTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
#normal starts && ends
class SlicePluginTRTTest1(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [1, 3]
starts = [0, 1]
ends = [2, 3]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest1.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
#negative starts && ends
class SlicePluginTRTTest2(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [2, 3]
starts = [-3, -2]
ends = [-1, 3]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest2.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
#exceeded bound starts && ends
class SlicePluginTRTTest3(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [2, 3]
starts = [-5, -2]
ends = [-1, 8]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest3.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
#fp16
class SlicePluginTRTTest4(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32")
axes = [2, 3]
starts = [-5, -2]
ends = [-1, 8]
slice_out = fluid.layers.slice(
data, axes=axes, starts=starts, ends=ends)
out = fluid.layers.batch_norm(slice_out, is_test=True)
self.feeds = {
"data": np.random.random((3, 3, 3, 3)).astype("float32"),
}
# Diff occurred between GPU and TRT.
# In order to provide TRT CI ASAP, this test for trt part
# is disabled temporarily.
self.enable_trt = True
self.trt_parameters = SlicePluginTRTTest3.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
class TestBroadcastOpCpu(OpTest):
def setUp(self):
self.op_type = "broadcast"
input = np.random.random((100, 2)).astype("float32")
np_out = input[:]
self.inputs = {"X": input}
self.attrs = {"sync_mode": False, "root": 0}
self.outputs = {"Out": np_out}
def test_check_output_cpu(self):
try:
self.check_output_with_place(place=core.CPUPlace())
except:
print("do not support cpu test, skip")
if __name__ == "__main__":
unittest.main()
......@@ -38,26 +38,22 @@ class TestDataset(unittest.TestCase):
def test_dataset_create(self):
""" Testcase for dataset create. """
try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset = paddle.distributed.InMemoryDataset()
except:
self.assertTrue(False)
try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"QueueDataset")
dataset = paddle.distributed.QueueDataset()
except:
self.assertTrue(False)
try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"FileInstantDataset")
dataset = paddle.distributed.fleet.dataset.FileInstantDataset()
except:
self.assertTrue(False)
try:
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"MyOwnDataset")
dataset = paddle.distributed.fleet.dataset.MyOwnDataset()
self.assertTrue(False)
except:
self.assertTrue(True)
......@@ -95,18 +91,18 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.update_settings(pipe_command="cat1")
dataset._init_distributed_settings(
parse_ins_id=True,
parse_content=True,
fea_eval=True,
candidate_size=10000)
dataset.set_filelist(
["test_run_with_dump_a.txt", "test_run_with_dump_b.txt"])
dataset.set_parse_ins_id(True)
dataset.set_parse_content(True)
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
dataset.set_fea_eval(10000, True)
dataset.local_shuffle()
exe = fluid.Executor(fluid.CPUPlace())
......@@ -176,14 +172,14 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32,
thread_num=3,
pipe_command="cat",
download_cmd="cat",
use_var=slots_vars)
dataset.set_filelist([filename1, filename2])
dataset.set_pipe_command("cat")
dataset.set_download_cmd("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
......@@ -228,22 +224,19 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset._init_distributed_settings(fea_eval=True, candidate_size=1)
dataset.set_filelist([
"test_in_memory_dataset_run_a.txt",
"test_in_memory_dataset_run_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
dataset.set_fea_eval(1, True)
dataset.slots_shuffle(["slot1"])
dataset.local_shuffle()
dataset.set_generate_unique_feasigns(True, 15)
dataset.generate_local_tables_unlock(0, 11, 1, 25, 15)
dataset._set_generate_unique_feasigns(True, 15)
dataset._generate_local_tables_unlock(0, 11, 1, 25, 15)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
if self.use_data_loader:
......@@ -300,17 +293,14 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(1)
dataset.set_parse_ins_id(True)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars)
dataset._init_distributed_settings(parse_ins_id=True)
dataset.set_filelist([
"test_in_memory_dataset_masterpatch_a.txt",
"test_in_memory_dataset_masterpatch_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
dataset.local_shuffle()
......@@ -325,7 +315,8 @@ class TestDataset(unittest.TestCase):
except Exception as e:
self.assertTrue(False)
dataset.set_merge_by_lineid(2)
#dataset._set_merge_by_lineid(2)
dataset.update_settings(merge_size=2)
dataset.dataset.merge_by_lineid()
os.remove("./test_in_memory_dataset_masterpatch_a.txt")
......@@ -367,17 +358,14 @@ class TestDataset(unittest.TestCase):
name="slot4", shape=[1], dtype="float32", lod_level=0)
slots_vars = [var1, var2, var3, var4]
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(1)
dataset.set_parse_ins_id(True)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars)
dataset._init_distributed_settings(parse_ins_id=True)
dataset.set_filelist([
"test_in_memory_dataset_masterpatch1_a.txt",
"test_in_memory_dataset_masterpatch1_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
dataset.local_shuffle()
......@@ -392,7 +380,7 @@ class TestDataset(unittest.TestCase):
except Exception as e:
self.assertTrue(False)
dataset.set_merge_by_lineid(2)
dataset._set_merge_by_lineid(2)
dataset.dataset.merge_by_lineid()
os.remove("./test_in_memory_dataset_masterpatch1_a.txt")
......@@ -423,16 +411,13 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_filelist([
"test_in_memory_dataset_run_a.txt",
"test_in_memory_dataset_run_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
dataset.local_shuffle()
......@@ -473,9 +458,9 @@ class TestDataset(unittest.TestCase):
except Exception as e:
self.assertTrue(False)
dataset.set_merge_by_lineid(2)
dataset.set_parse_ins_id(False)
dataset.set_fleet_send_sleep_seconds(2)
dataset._set_merge_by_lineid(2)
dataset._set_parse_ins_id(False)
dataset._set_fleet_send_sleep_seconds(2)
dataset.preload_into_memory()
dataset.wait_preload_done()
dataset.release_memory()
......@@ -483,10 +468,25 @@ class TestDataset(unittest.TestCase):
dataset.wait_preload_done()
dataset.dataset.merge_by_lineid()
dataset.release_memory()
dataset.set_merge_by_lineid(30)
dataset.set_parse_ins_id(False)
dataset._set_merge_by_lineid(30)
dataset._set_parse_ins_id(False)
dataset.load_into_memory()
dataset.dataset.merge_by_lineid()
dataset.update_settings(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=[],
fs_name="",
fs_ugi="",
download_cmd="cat",
merge_size=-1,
parse_ins_id=False,
parse_content=False,
fleet_send_batch_size=2,
fleet_send_sleep_seconds=2,
fea_eval=True)
fleet_ptr = fluid.core.Fleet()
fleet_ptr.set_client2client_config(1, 1, 1)
fleet_ptr.get_cache_threshold(0)
......@@ -517,14 +517,11 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"QueueDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.QueueDataset()
dataset.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
......@@ -543,12 +540,9 @@ class TestDataset(unittest.TestCase):
except Exception as e:
self.assertTrue(False)
dataset2 = paddle.distributed.fleet.DatasetFactory().create_dataset(
"QueueDataset")
dataset2.set_use_var(slots_vars)
dataset2.set_batch_size(32)
dataset2.set_thread(3)
dataset2.set_pipe_command("cat")
dataset2 = paddle.distributed.QueueDataset()
dataset2.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_filelist([])
try:
exe.train_from_dataset(fluid.default_main_program(), dataset2)
......@@ -585,14 +579,11 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"QueueDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.QueueDataset()
dataset.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars)
dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0))
......@@ -641,15 +632,15 @@ class TestDataset(unittest.TestCase):
name=slot, shape=[None, 1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_input_type(1)
dataset.set_batch_size(1)
dataset.set_thread(2)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist(
["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
......@@ -721,13 +712,10 @@ class TestDatasetWithFetchHandler(unittest.TestCase):
inputs(list): inputs of get_dataset
files(list): files of get_dataset
"""
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"QueueDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.QueueDataset()
dataset.init(
batch_size=32, thread_num=3, pipe_command="cat", use_var=inputs)
dataset.set_filelist(files)
dataset.set_pipe_command("cat")
dataset.set_use_var(inputs)
return dataset
def setUp(self):
......@@ -879,16 +867,17 @@ class TestDataset2(unittest.TestCase):
except ImportError as e:
print("warning: no mpi4py")
exe.run(startup_program)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32,
thread_num=3,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist([
"test_in_memory_dataset2_run_a.txt",
"test_in_memory_dataset2_run_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
fleet._opt_info = None
fleet._fleet_ptr = None
......@@ -949,16 +938,16 @@ class TestDataset2(unittest.TestCase):
except ImportError as e:
print("warning: no mpi4py")
exe.run(startup_program)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset.init(
batch_size=32,
thread_num=3,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist([
"test_in_memory_dataset2_run2_a.txt",
"test_in_memory_dataset2_run2_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset.load_into_memory()
try:
dataset.global_shuffle(fleet)
......@@ -966,14 +955,11 @@ class TestDataset2(unittest.TestCase):
print("warning: catch expected error")
fleet._opt_info = None
fleet._fleet_ptr = None
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_rank_offset("")
dataset.set_pv_batch_size(1)
dataset.set_hdfs_config("", "")
dataset = paddle.distributed.InMemoryDataset()
dataset.init(fs_name="", fs_ugi="")
d = paddle.distributed.fleet.DatasetBase()
try:
dataset.set_feed_type("MultiSlotInMemoryDataFeed")
dataset._set_feed_type("MultiSlotInMemoryDataFeed")
except:
print("warning: catch expected error")
dataset.thread_num = 0
......@@ -981,9 +967,6 @@ class TestDataset2(unittest.TestCase):
dataset._prepare_to_run()
except:
print("warning: catch expected error")
dataset.set_parse_logkey(True)
dataset.set_merge_by_sid(True)
dataset.set_enable_pv_merge(True)
try:
dataset.preprocess_instance()
except:
......@@ -996,16 +979,15 @@ class TestDataset2(unittest.TestCase):
dataset.postprocess_instance()
except:
print("warning: catch expected error")
dataset.set_fleet_send_batch_size(1024)
dataset._set_fleet_send_batch_size(1024)
try:
dataset.global_shuffle()
except:
print("warning: catch expected error")
dataset.get_pv_data_size()
#dataset.get_pv_data_size()
dataset.get_memory_data_size()
dataset.get_shuffle_data_size()
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"QueueDataset")
dataset = paddle.distributed.QueueDataset()
try:
dataset.local_shuffle()
except:
......@@ -1027,6 +1009,120 @@ class TestDataset2(unittest.TestCase):
os.remove("./test_in_memory_dataset2_run2_a.txt")
os.remove("./test_in_memory_dataset2_run2_b.txt")
def test_bosps_dataset_fleet2(self):
"""
Testcase for InMemoryDataset from create to run.
"""
with open("test_in_memory_dataset2_run2_a.txt", "w") as f:
data = "1 1 2 3 3 4 5 5 5 5 1 1\n"
data += "1 2 2 3 4 4 6 6 6 6 1 2\n"
data += "1 3 2 3 5 4 7 7 7 7 1 3\n"
f.write(data)
with open("test_in_memory_dataset2_run2_b.txt", "w") as f:
data = "1 4 2 3 3 4 5 5 5 5 1 4\n"
data += "1 5 2 3 4 4 6 6 6 6 1 5\n"
data += "1 6 2 3 5 4 7 7 7 7 1 6\n"
data += "1 7 2 3 6 4 8 8 8 8 1 7\n"
f.write(data)
train_program = fluid.Program()
startup_program = fluid.Program()
scope = fluid.Scope()
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
with fluid.program_guard(train_program, startup_program):
slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
slots_vars = []
for slot in slots:
var = fluid.layers.data(\
name=slot, shape=[1], dtype="float32", lod_level=1)
slots_vars.append(var)
fake_cost = \
fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1])
fake_cost = fluid.layers.mean(fake_cost)
with fluid.scope_guard(scope):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
try:
fleet.init()
except ImportError as e:
print("warning: no mpi4py")
adam = fluid.optimizer.Adam(learning_rate=0.000005)
try:
adam = fleet.distributed_optimizer(
adam,
strategy={
"fs_uri": "fs_uri_xxx",
"fs_user": "fs_user_xxx",
"fs_passwd": "fs_passwd_xxx",
"fs_hadoop_bin": "fs_hadoop_bin_xxx"
})
adam.minimize([fake_cost], [scope])
except AttributeError as e:
print("warning: no mpi")
except ImportError as e:
print("warning: no mpi4py")
exe.run(startup_program)
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.init(
batch_size=32,
thread_num=3,
pipe_command="cat",
use_var=slots_vars)
dataset.set_filelist([
"test_in_memory_dataset2_run2_a.txt",
"test_in_memory_dataset2_run2_b.txt"
])
dataset.load_into_memory()
try:
dataset.global_shuffle(fleet)
except:
print("warning: catch expected error")
fleet._opt_info = None
fleet._fleet_ptr = None
dataset = paddle.distributed.fleet.BoxPSDataset()
dataset.init(
rank_offset="",
pv_batch_size=1,
fs_name="",
fs_ugi="",
data_feed_type="MultiSlotInMemoryDataFeed",
parse_logkey=True,
merge_by_sid=True,
enable_pv_merge=True)
d = paddle.distributed.fleet.DatasetBase()
try:
dataset._set_feed_type("MultiSlotInMemoryDataFeed")
except:
print("warning: catch expected error")
dataset.thread_num = 0
try:
dataset._prepare_to_run()
except:
print("warning: catch expected error")
dataset._set_parse_logkey(True)
dataset._set_merge_by_sid(True)
dataset._set_enable_pv_merge(True)
try:
dataset.preprocess_instance()
except:
print("warning: catch expected error")
try:
dataset.set_current_phase(1)
except:
print("warning: catch expected error")
try:
dataset.postprocess_instance()
except:
print("warning: catch expected error")
dataset._set_fleet_send_batch_size(1024)
try:
dataset.global_shuffle()
except:
print("warning: catch expected error")
#dataset.get_pv_data_size()
dataset.get_memory_data_size()
dataset.get_shuffle_data_size()
if __name__ == '__main__':
unittest.main()
......@@ -97,9 +97,11 @@ class DatasetLoaderTestBase(unittest.TestCase):
def check_batch_number(self, place, randomize_batch_num=False):
main_prog, startup_prog, feeds = self.build_network()
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
self.dataset_name)
dataset.set_batch_size(BATCH_SIZE)
if self.dataset_name == "QueueDataset":
dataset = paddle.distributed.QueueDataset()
else:
dataset = paddle.distributed.InMemoryDataset()
dataset._set_batch_size(BATCH_SIZE)
if isinstance(place, fluid.CPUPlace):
file_num = 10
......@@ -128,8 +130,8 @@ class DatasetLoaderTestBase(unittest.TestCase):
fake_reader(batch_num=BATCH_NUM + random_delta_batch_size[i]))
dataset.set_filelist(filelist)
dataset.set_use_var(feeds)
dataset.set_pipe_command("cat")
dataset._set_use_var(feeds)
dataset._set_pipe_command("cat")
if self.dataset_name == 'InMemoryDataset':
dataset.load_into_memory()
......
......@@ -141,7 +141,7 @@ class TestFleetLambMetaOptimizer(unittest.TestCase):
ops = [op.type for op in avg_cost.block.ops]
self.assertIn('lamb', ops)
self.assertIn('cast', ops)
self.assertIn('isfinite', ops)
self.assertIn('check_finite_and_unscale', ops)
if __name__ == "__main__":
......
......@@ -145,7 +145,7 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase):
ops = [op.type for op in avg_cost.block.ops]
self.assertIn('lars_momentum', ops)
self.assertIn('cast', ops)
self.assertIn('isfinite', ops)
self.assertIn('check_finite_and_unscale', ops)
if __name__ == "__main__":
......
......@@ -79,9 +79,9 @@ if [ -f $file_1 ]; then
rm $file_1
fi
# test use DISTRIBUTED_TRAINER_ENDPOINTS env in paddlecloud
unset PADDLE_PORT
unset TRAINER_PORTS_NUM
export DISTRIBUTED_TRAINER_ENDPOINTS=127.0.0.1:6170,127.0.0.1:6171,127.0.0.2:6170,127.0.0.2:6171
echo ""
echo "paddle.distributed.launch async poll process test"
......
......@@ -163,10 +163,9 @@ class TestCloudRoleMaker2(unittest.TestCase):
data = "1 1 1 1\n"
f.write(data)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset = paddle.distributed.InMemoryDataset()
dataset.set_filelist(["test_fleet_gloo_role_maker_1.txt"])
dataset.set_use_var([show, label])
dataset._set_use_var([show, label])
dataset.load_into_memory()
dataset.get_memory_data_size(fleet)
dataset.get_shuffle_data_size(fleet)
......
......@@ -48,9 +48,9 @@ if [ -f $file_1 ]; then
rm $file_1
fi
# test use DISTRIBUTED_TRAINER_ENDPOINTS env in paddlecloud
unset PADDLE_PORT
unset TRAINER_PORTS_NUM
export DISTRIBUTED_TRAINER_ENDPOINTS=127.0.0.1:6170,127.0.0.1:6171,127.0.0.2:6170,127.0.0.2:6171
echo ""
echo "paddle.distributed.launch async poll process test"
......
......@@ -400,7 +400,8 @@ class TestCUDNNLstmOp(OpTest):
'Input': input,
'W': flat_w,
'InitH': init_h,
'InitC': init_c
'InitC': init_c,
'SequenceLength': self.sequence_length
}
self.attrs = {
'dropout_prob': 0.0,
......@@ -408,7 +409,6 @@ class TestCUDNNLstmOp(OpTest):
'input_size': input_size,
'hidden_size': hidden_size,
'num_layers': 1,
'sequence_length': self.sequence_length.tolist()
}
self.outputs = {
'Out': output,
......@@ -436,13 +436,6 @@ class TestCUDNNLstmOp(OpTest):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNLstmOp2(TestCUDNNLstmOp):
def set_attrs(self):
self.sequence_length = np.array([], dtype=np.int32)
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNLstmOp3(TestCUDNNLstmOp):
def set_attrs(self):
self.num_layers = 2
......
......@@ -52,18 +52,17 @@ class TestDatasetWithStat(unittest.TestCase):
name=slot, shape=[1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset(
"InMemoryDataset")
dataset.set_batch_size(32)
dataset.set_thread(3)
dataset = paddle.distributed.InMemoryDataset()
dataset._set_batch_size(32)
dataset._set_thread(3)
dataset.set_filelist([
"test_in_memory_dataset_run_a.txt",
"test_in_memory_dataset_run_b.txt"
])
dataset.set_pipe_command("cat")
dataset.set_use_var(slots_vars)
dataset._set_pipe_command("cat")
dataset._set_use_var(slots_vars)
dataset.load_into_memory()
dataset.set_fea_eval(1, True)
dataset._set_fea_eval(1, True)
dataset.slots_shuffle(["slot1"])
exe = fluid.Executor(fluid.CPUPlace())
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from functools import partial
import contextlib
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
import paddle.fluid.framework as framework
import paddle.fluid.optimizer as optimizer
import paddle.regularizer as regularizer
from paddle.fluid.backward import append_backward
def bow_net(data,
label,
dict_dim,
is_sparse=False,
emb_dim=8,
hid_dim=8,
hid_dim2=6,
class_dim=2):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb = fluid.layers.embedding(
input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim])
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestRegularizer(unittest.TestCase):
def setUp(self):
self.word_dict = paddle.dataset.imdb.word_dict()
reader = paddle.batch(
paddle.dataset.imdb.train(self.word_dict), batch_size=1)()
self.train_data = [next(reader) for _ in range(1)]
def get_places(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
return places
@contextlib.contextmanager
def scope_prog_guard(self, main_prog, startup_prog):
scope = fluid.core.Scope()
with fluid.unique_name.guard():
with fluid.scope_guard(scope):
with fluid.program_guard(main_prog, startup_prog):
yield
def run_program(self, place, feed_list):
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe.run(fluid.default_startup_program())
main_prog = fluid.default_main_program()
param_list = [var.name for var in main_prog.block(0).all_parameters()]
param_sum = []
for data in self.train_data:
out = exe.run(main_prog,
feed=feeder.feed(data),
fetch_list=param_list)
p_sum = 0
for v in out:
p_sum += np.sum(np.abs(v))
param_sum.append(p_sum)
return param_sum
def check_l2decay_regularizer(self, place, model):
paddle.manual_seed(1)
paddle.framework.random._manual_program_seed(1)
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
with self.scope_prog_guard(
main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost = model(data, label, len(self.word_dict))
optimizer = fluid.optimizer.Adagrad(
learning_rate=0.1,
regularization=paddle.regularizer.L2Decay(1.0))
optimizer.minimize(avg_cost)
param_sum = self.run_program(place, [data, label])
return param_sum
def check_l2decay(self, place, model):
paddle.manual_seed(1)
paddle.framework.random._manual_program_seed(1)
main_prog = fluid.framework.Program()
startup_prog = fluid.framework.Program()
with self.scope_prog_guard(
main_prog=main_prog, startup_prog=startup_prog):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
avg_cost_l2 = model(data, label, len(self.word_dict))
param_list = fluid.default_main_program().block(0).all_parameters()
para_sum = []
for para in param_list:
para_mul = fluid.layers.square(x=para)
para_sum.append(fluid.layers.reduce_sum(input=para_mul))
avg_cost_l2 += fluid.layers.sums(para_sum) * .5
optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
optimizer.minimize(avg_cost_l2)
param_sum = self.run_program(place, [data, label])
return param_sum
def test_l2(self):
for place in self.get_places():
dense_sparse_p_sum = []
for sparse in [True, False]:
model = partial(bow_net, is_sparse=sparse)
framework_l2 = self.check_l2decay_regularizer(place, model)
l2 = self.check_l2decay(place, model)
assert len(l2) == len(framework_l2)
for i in range(len(l2)):
assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5)
dense_sparse_p_sum.append(framework_l2)
assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1])
for i in range(len(dense_sparse_p_sum[0])):
assert np.isclose(
a=dense_sparse_p_sum[0][i],
b=dense_sparse_p_sum[1][i],
rtol=5e-5)
def test_repeated_regularization(self):
l1 = paddle.regularizer.L1Decay(0.1)
l2 = paddle.regularizer.L2Decay(0.01)
fc_param_attr = fluid.ParamAttr(regularizer=l1)
with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.uniform_random([2, 2, 3])
out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
loss = fluid.layers.reduce_sum(out)
sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
sgd.minimize(loss)
with fluid.dygraph.guard():
input = fluid.dygraph.to_variable(
np.random.randn(3, 2).astype('float32'))
paddle.manual_seed(1)
paddle.framework.random._manual_program_seed(1)
linear1 = fluid.dygraph.Linear(
2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr)
linear2 = fluid.dygraph.Linear(
2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr)
loss1 = linear1(input)
loss1.backward()
# set l2 regularizer in optimizer, but l1 in fluid.ParamAttr
fluid.optimizer.SGD(parameter_list=linear1.parameters(),
learning_rate=1e-2,
regularization=l2).minimize(loss1)
# only set l1 in fluid.ParamAttr
loss2 = linear2(input)
loss2.backward()
fluid.optimizer.SGD(parameter_list=linear2.parameters(),
learning_rate=1e-2).minimize(loss2)
# they should both be applied by l1, and keep the same
self.assertTrue(
np.allclose(linear1.weight.numpy(), linear2.weight.numpy()),
"weight should use the regularization in fluid.ParamAttr!")
self.assertTrue(
np.allclose(linear1.bias.numpy(), linear2.bias.numpy()),
"bias should use the regularization in fluid.ParamAttr!")
if __name__ == '__main__':
unittest.main()
......@@ -49,7 +49,10 @@ class LinearNet(nn.Layer):
super(LinearNet, self).__init__()
self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@paddle.jit.to_static
@paddle.jit.to_static(input_spec=[
paddle.static.InputSpec(
shape=[None, IMAGE_SIZE], dtype='float32', name='x')
])
def forward(self, x):
return self._linear(x)
......@@ -152,6 +155,34 @@ class TestTranslatedLayer(unittest.TestCase):
with self.assertRaises(ValueError):
program = translated_layer.program('not_exists')
def test_get_input_spec(self):
# load
translated_layer = paddle.jit.load(self.model_path)
expect_spec = [
paddle.static.InputSpec(
shape=[None, IMAGE_SIZE], dtype='float32', name='x')
]
actual_spec = translated_layer._input_spec()
for spec_x, spec_y in zip(expect_spec, actual_spec):
self.assertEqual(spec_x, spec_y)
def test_get_output_spec(self):
# load
translated_layer = paddle.jit.load(self.model_path)
expect_spec = [
paddle.static.InputSpec(
shape=[None, CLASS_NUM],
dtype='float32',
name='translated_layer/scale_0.tmp_1')
]
actual_spec = translated_layer._output_spec()
for spec_x, spec_y in zip(expect_spec, actual_spec):
self.assertEqual(spec_x, spec_y)
if __name__ == '__main__':
unittest.main()
......@@ -26,4 +26,5 @@ NEED_TO_FIX_OP_LIST = [
'squared_l2_distance',
'tree_conv',
'cvm',
'cudnn_lstm',
]
......@@ -12,8 +12,134 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO: define the regularizer functions
# __all__ = ['L1Decay',
# 'L1DecayRegularizer',
# 'L2Decay',
# 'L2DecayRegularizer']
__all__ = ['L1Decay', 'L2Decay']
import paddle.fluid as fluid
class L1Decay(fluid.regularizer.L1Decay):
"""
Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse.
It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ).
When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in
``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has
higher priority than ``optimizer`` , which means that for a trainable parameter, if regularizer is defined
in its ParamAttr, then the regularizer in Optimizer will be ignored. Otherwise the regularizer
in Optimizer will be used.
In the implementation, the formula of L1 Weight Decay Regularization is as follows:
.. math::
L1WeightDecay = reg\_coeff * sign(parameter)
Args:
coeff(float, optional): regularization coeff. Default:0.0.
Examples:
.. code-block:: python
# Example1: set Regularizer in optimizer
import paddle
from paddle.regularizer import L1Decay
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
momentum = paddle.optimizer.Momentum(
learning_rate=0.1,
parameters=linear.parameters(),
weight_decay=L1Decay(0.0001))
back = out.backward()
momentum.step()
momentum.clear_grad()
# Example2: set Regularizer in parameters
# Set L1 regularization in parameters.
# Global regularizer does not take effect on my_conv2d for this case.
from paddle.nn import Conv2d
from paddle import ParamAttr
from paddle.regularizer import L2Decay
my_conv2d = Conv2d(
in_channels=10,
out_channels=10,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(regularizer=L2Decay(coeff=0.01)),
bias_attr=False)
"""
def __init__(self, coeff=0.0):
super(L1Decay, self).__init__(coeff)
class L2Decay(fluid.regularizer.L2Decay):
"""
Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ).
When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in
``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has
higher priority than ``optimizer`` , which means that for a trainable parameter, if regularizer is defined
in its ParamAttr, then the regularizer in Optimizer will be ignored. Otherwise the regularizer
in Optimizer will be used.
In the implementation, the formula of L2 Weight Decay Regularization is as follows:
.. math::
L2WeightDecay = reg\_coeff * parameter
Args:
regularization_coeff(float, optional): regularization coeff. Default:0.0
Examples:
.. code-block:: python
# Example1: set Regularizer in optimizer
import paddle
from paddle.regularizer import L2Decay
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
momentum = paddle.optimizer.Momentum(
learning_rate=0.1,
parameters=linear.parameters(),
weight_decay=L2Decay(0.0001))
back = out.backward()
momentum.step()
momentum.clear_grad()
# Example2: set Regularizer in parameters
# Set L2 regularization in parameters.
# Global regularizer does not take effect on my_conv2d for this case.
from paddle.nn import Conv2d
from paddle import ParamAttr
from paddle.regularizer import L2Decay
my_conv2d = Conv2d(
in_channels=10,
out_channels=10,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(regularizer=L2Decay(coeff=0.01)),
bias_attr=False)
"""
def __init__(self, coeff=0.0):
super(L2Decay, self).__init__(coeff)
......@@ -37,7 +37,11 @@ def get_cluster_from_args(selected_gpus):
free_ports = find_free_ports(len(selected_gpus))
if free_ports is not None:
free_ports = list(free_ports)
return get_cluster(node_ips, node_ip, free_ports, selected_gpus)
trainer_endpoints = []
for ip in node_ips:
trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports])
return get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus)
def get_gpus(selected_gpus):
......
......@@ -16,12 +16,13 @@ from .profiler import ProfilerOptions
from .profiler import Profiler
from .profiler import get_profiler
from .deprecated import deprecated
from ..fluid.framework import unique_name
from ..fluid.framework import load_op_library
from ..fluid.framework import require_version
from . import download
__all__ = ['dump_config', 'deprecated', 'download']
#TODO: define new api under this directory
# __all__ = ['unique_name',
# 'load_op_library',
# 'require_version']
__all__ += ['unique_name', 'load_op_library', 'require_version']
:: Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
::
:: Licensed under the Apache License, Version 2.0 (the "License");
:: you may not use this file except in compliance with the License.
:: You may obtain a copy of the License at
::
:: http://www.apache.org/licenses/LICENSE-2.0
::
:: Unless required by applicable law or agreed to in writing, software
:: distributed under the License is distributed on an "AS IS" BASIS,
:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
:: See the License for the specific language governing permissions and
:: limitations under the License.
::
:: ===============================
:: Build Paddle compile enviroment
:: ===============================
:: Description:
::
:: Install compile enviroment for xly CI.
::
:: Include:
:: 1. CMake 3.17.0
:: 2. Git 2.28.0
:: 3. Python 3.7.8
:: 4. Visual Studio 2015 with update 3
:: 5. CUDA 10 [miss cudnn]
:: 6. java jre [not complete]
:: 7. xly agent [not complete]
:: Echo command is not required.
@echo off
:: ===== start step 0: wget tool =====
:: Download wget for windows when there is not wget tool.
echo ">>>>>>>> step [0/7]: wget tool"
wget --help > nul 2> nul || call:install_wget
goto cmake
:install_wget
echo There is not wget in this PC, will download wget 1.20.
echo Download package from https://eternallybored.org/misc/wget/1.20/64/wget.exe ...
certutil -urlcache -split -f https://eternallybored.org/misc/wget/1.20/64/wget.exe > nul 2> nul
if %errorlevel% == 0 (
echo Download wget tool into %cd% success.
) else (
echo Error***** Download wget tool failed, please download it before rerun.
exit /b 1
)
goto :eof
:: ===== end step 0: wget tool =====
:: ===== start step 1: cmake =====
:: Download CMake-3.17.0 and add in PATH when it not installed.
:: TODO: limit version >= 3.17.0
:cmake
echo ">>>>>>>> step [1/7]: CMake 3.17.0"
cmake --help > nul 2> nul || call :install_cmake
goto git
:install_cmake
echo There is not cmake in this PC, will install cmake-3.17.0.
echo Download package from https://cmake.org/files/v3.17/cmake-3.17.0-win64-x64.msi ...
wget -O cmake-3.17.0-win64-x64.msi https://cmake.org/files/v3.17/cmake-3.17.0-win64-x64.msi
echo Install cmake-3.17.0 ...
:: /passive [silent installation]
:: /norestart [do not restart]
:: ADD_CMAKE_TO_PATH = System [add CMake to the system PATH for all users]
start /wait cmake-3.17.0-win64-x64.msi /passive /norestart ADD_CMAKE_TO_PATH=System
if %errorlevel% == 0 (
echo Install CMake-3.17.0 success!
) else (
echo Error***** Install Cmake-3.17.0 failed, please re-install it manually.
)
del cmake-3.17.0-win64-x64.msi
goto :eof
:: ===== end step 1: cmake =====
:: ===== start step 2: Git =====
:: Download Git-2.28.0 and add in PATH when it not installed.
:: TODO: limit version >= 2.28.0
:git
echo ">>>>>>>> step [2/8]: Git 2.28.0"
git --help > nul 2> nul || call :install_git
goto python
:install_git
echo There is not git in this PC, will install Git-2.28.0.
echo Download package from https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe ...
wget -O Git-2.28.0-64-bit.exe https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe
echo Install Git-2.28.0 ...
:: /SILENT [silent install]
:: /ALLUSERS [add path for all users]
:: /NORESTART [do not restart]
start /wait Git-2.28.0-64-bit.exe /SILENT /ALLUSERS /NORESTART
if %errorlevel% == 0 (
echo Install Git-2.28.0 success!
) else (
echo Error***** Install Git-2.28.0 failed, please re-install it manually.
)
del Git-2.28.0-64-bit.exe
goto :eof
:: ===== end step 2: Git =====
:: ===== start step 3: Python =====
:: Download Python-3.7.8 and add in PATH when it not installed.
:: TODO: limit version >= 3.7.8
:python
echo ">>>>>>>> step [3/7]: Python 3.7.8"
python -V 2>&1 | findstr /C:"Python 3.7.8" > nul 2> nul || call :install_python
goto vs2015
:install_python
echo There is not Python in this PC, will install Python-3.7.8.
echo Download package from https://npm.taobao.org/mirrors/python/3.7.8/python-3.7.8-amd64.exe ...
wget -O python-3.7.8-amd64.exe https://npm.taobao.org/mirrors/python/3.7.8/python-3.7.8-amd64.exe
echo Install Python-3.7.8 ...
:: /passive [silent install]
:: InstallAllUsers [add path for all users]
:: PrependPath [add script/install into PATH]
:: TargetDir [install directory]
start /wait python-3.7.8-amd64.exe /passive InstallAllUsers=1 PrependPath=1 TargetDir=C:\Python37
if %errorlevel% == 0 (
echo Install python-3.7.8 success!
) else (
echo Error***** Install python-3.7.8 failed, please re-install it manually.
)
del python-3.7.8-amd64.exe
goto :eof
:: ===== end step 3: Python =====
:: ===== start step 4: Visual Studio 2015 =====
:: Download Visual Studio 2015 when it not installed.
:vs2015
echo ">>>>>>>> step [4/7]: Visual Studio 2015"
cmd /C "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 > nul 2> nul || call :install_visual_studio
goto :cuda10
:install_visual_studio
echo There is not Visual Studio in this PC, will install VS2015.
echo Download package from "https://download.my.visualstudio.com/pr/en_visual_studio_professional_2015_with_update_3_x86_x64_web_installer_8922978.exe"
wget -O vs_installer.exe "https://download.my.visualstudio.com/pr/en_visual_studio_professional_2015_with_update_3_x86_x64_web_installer_8922978.exe?t=9ee7a96d-ca80-4b84-af2c-7dd86996a0aa&e=1600103404&h=3cdea1e81c04aa4e846f5314972c46eb&su=1"
echo Install Visual Studio 2015 ...
:: /passive [silent install]
:: /norestart [no restart]
:: /NoRefresh [no refresh]
:: /InstallSelectableItems NativeLanguageSupport_Group [select Visual C++ for installing]
start /wait visual_installer.exe /passive /norestart /NoRefresh /InstallSelectableItems NativeLanguageSupport_Group
if %errorlevel% == 0 (
echo Install Visual Studio 2015 success!
) else (
echo Error***** Install Visual Studio 2015 failed, please re-install it manually.
)
del vs_installer.exe
goto :eof
:: ===== end step 4: Visual Studio 2015 =====
:: ===== start step 5: CUDA 10 =====
:cuda10
echo ">>>>>>>> step [5/7]: CUDA 10.0"
nvcc --version > nul 2> nul || call :install_cuda
goto java-jre
:install_cuda
echo There is not CUDA in this PC, will install CUDA-10.0.
echo Download package from "https://developer.download.nvidia.cn/compute/cuda/10.0/secure/Prod/network_installers/cuda_10.0.130_win10_network.exe"
wget -O cuda_installer.exe "https://developer.download.nvidia.cn/compute/cuda/10.0/secure/Prod/network_installers/cuda_10.0.130_win10_network.exe?hG7oBtA2CnxZG7d39onmBdtzrIa2cOukrmW8I0qk3h36vb2Sj0yYGjMElJlxlNhjx8Xu5RlbmdBhCWvP2QcEqMjCoKCXe5lOgr5uIIso_7LqrotgQHbZRZSVBYRT4bIAHPVSPrr4_4KczKvI9Nf3mbO9RJ2Vj6ECD5QphRMJBus0KKNVxO1gsplVL5qaCnE"
echo Install CUDA-10.0 ...
:: -s [silent install]
start /wait cuda_installer.exe -s
if %errorlevel% == 0 (
echo Install CUDA-10.0 success!
) else (
echo Error***** Install CUDA-10.0 failed, please re-install it manually.
)
del cuda_installer.exe
goto :eof
:: ===== end step 5: CUDA 10 =====
:: ===== start step 6: java jre =====
:java-jre
echo ">>>>>>>> step [6/7]: java jre"
goto xly-agent
:: ===== end step 6: java jre =====
:: ===== start step 7: xly agent =====
:xly-agent
echo ">>>>>>>> step [7/7]: xly agent"
goto :eof
:: ===== end step 8: xly agent =====
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册