diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 1956e5c39ea2524d8a8e2650eb08f8d58f410b73..b0a6dfe29020781e57d57861137861366864abdb 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -386,7 +386,7 @@ function(cc_test_run TARGET_NAME) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) # No unit test should exceed 2 minutes. if (APPLE OR WIN32) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) else() set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) endif() @@ -748,7 +748,7 @@ function(py_test TARGET_NAME) endif() if (APPLE OR WIN32) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) else() # No unit test should exceed 2 minutes in Linux. set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) diff --git a/cmake/operators.cmake b/cmake/operators.cmake index f60a6dc3f0c89dd345b04ea3a1e213de770e5760..aea972ab3db2af862f5230ea6c1eabeed8b611c5 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -138,12 +138,17 @@ function(op_library TARGET) # And for detail pybind information, please see generated paddle/pybind/pybind.h. file(READ ${TARGET}.cc TARGET_CONTENT) string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}") - string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}") + # [ \t\r\n]* is used for blank characters + string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}") + if (one_register STREQUAL "") string(REPLACE "_op" "" TARGET "${TARGET}") else () string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}") string(REPLACE "," "" TARGET "${TARGET}") + # [ \t\r\n]+ is used for blank characters. + # Here we use '+' instead of '*' since it is a REPLACE operation. + string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}") endif() # pybind USE_NO_KERNEL_OP diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index edd1700ae7284c77883af6abd2cd7d511097685f..df482f43346c57cc59af42936b6a7308b76cbd3a 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -41,6 +41,11 @@ message LocalSGDConfig { optional int32 begin_step = 2 [ default = 1 ]; } +message AdaptiveLocalSGDConfig { + optional int32 init_k_steps = 1 [ default = 1 ]; + optional int32 begin_step = 2 [ default = 1 ]; +} + message GradientMergeConfig { optional int32 k_steps = 1 [ default = 1 ]; optional bool avg = 2 [ default = true ]; @@ -121,6 +126,7 @@ message DistributedStrategy { optional bool cudnn_exhaustive_search = 21 [ default = true ]; optional int32 conv_workspace_size_limit = 22 [ default = 4000 ]; optional bool cudnn_batchnorm_spatial_persistent = 23 [ default = true ]; + optional bool adaptive_localsgd = 24 [ default = false ]; optional RecomputeConfig recompute_configs = 101; optional AMPConfig amp_configs = 102; @@ -131,6 +137,7 @@ message DistributedStrategy { optional AsyncConfig a_sync_configs = 107; optional LarsConfig lars_configs = 108; optional LambConfig lamb_configs = 109; + optional AdaptiveLocalSGDConfig adaptive_localsgd_configs = 110; optional BuildStrategy build_strategy = 201; optional ExecutionStrategy execution_strategy = 202; } diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 8787aa8a94a44c2c36868fea4b88ede5f91b19f4..5bb833f613529a81d5ae4e18fc5ad7cd1136354b 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -102,6 +102,8 @@ if(WITH_MKLDNN) pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(scale_matmul_fuse_pass inference DIR mkldnn) + pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) + pass_library(cpu_bfloat16_pass inference DIR mkldnn) pass_library(fc_mkldnn_pass inference DIR mkldnn) pass_library(cpu_quantize_placement_pass base DIR mkldnn) pass_library(cpu_quantize_pass inference DIR mkldnn) @@ -162,4 +164,6 @@ endif() cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass) cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass) + cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass) + cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass) endif () diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 3d65fe595373fa98ba237f04134c75d4a60a7242..9c1eaa99a3ca04ddbeecab639d5587d5509e3f00 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1892,6 +1892,82 @@ PDNode *patterns::QuantizePlacement::operator()( return op; } +PDNode *patterns::Bfloat16Placement::operator()( + const std::unordered_set &bfloat16_enabled_op_types) { + std::unordered_set supported_op_types = + std::unordered_set(); + if (!bfloat16_enabled_op_types.empty()) { + supported_op_types = bfloat16_enabled_op_types; + } + auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types); + return op; +} + +PDNode *patterns::OrphanedBfloat16::operator()() { + auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op(); + prev_op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") == + "float32"; + }); + auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput(); + + auto *op = pattern->NewNode(op_repr())->assert_is_op(); + op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") == + "bfloat16"; + }); + auto *op_out = pattern->NewNode(op_out_repr())->AsOutput(); + + auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op(); + next_op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") == + "float32"; + }); + + prev_op->LinksTo({prev_out}); + op->LinksFrom({prev_out}).LinksTo({op_out}); + next_op->LinksFrom({op_out}); + return next_op; +} + +PDNode *patterns::LastBfloat16Ops::operator()() { + auto *op = pattern->NewNode(op_repr())->assert_is_op(); + op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") == + "bfloat16"; + }); + auto *op_out = pattern->NewNode(op_out_repr())->AsOutput(); + + auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op(); + next_op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") != + "bfloat16"; + }); + + op->LinksTo({op_out}); + next_op->LinksFrom({op_out}); + return next_op; +} + +PDNode *patterns::FirstBfloat16Ops::operator()() { + auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op(); + prev_op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") != + "bfloat16"; + }); + auto *op_in = pattern->NewNode(op_in_repr())->AsOutput(); + + auto *op = pattern->NewNode(op_repr())->assert_is_op(); + op->assert_more([&](Node *node) { + return node->Op()->GetAttrIfExists("mkldnn_data_type") == + "bfloat16"; + }); + + prev_op->LinksTo({op_in}); + op->LinksFrom({op_in}); + return op; +} + PDNode *patterns::MKLDNNInPlace::operator()() { const std::unordered_set &supported_op_types = { "abs", diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 0803265884165bc754489b18d07c0d277a4bd92b..053c1fe832b0088d2abdd3f8eb40a0042e5e2dfe 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1129,6 +1129,47 @@ struct QuantizePlacement : public PatternBase { PATTERN_DECL_NODE(op); }; +struct Bfloat16Placement : public PatternBase { + Bfloat16Placement(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "bfloat16_placement") {} + PDNode* operator()( + const std::unordered_set& bfloat16_enabled_op_types); + + PATTERN_DECL_NODE(op); +}; + +struct OrphanedBfloat16 : public PatternBase { + OrphanedBfloat16(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "orphaned_bfloat16") {} + PDNode* operator()(); + + PATTERN_DECL_NODE(prev_op); + PATTERN_DECL_NODE(prev_out); + PATTERN_DECL_NODE(op); + PATTERN_DECL_NODE(op_out); + PATTERN_DECL_NODE(next_op); +}; + +struct LastBfloat16Ops : public PatternBase { + LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "last_bfloat16_ops") {} + PDNode* operator()(); + + PATTERN_DECL_NODE(op); + PATTERN_DECL_NODE(op_out); + PATTERN_DECL_NODE(next_op); +}; + +struct FirstBfloat16Ops : public PatternBase { + FirstBfloat16Ops(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "first_bfloat16_ops") {} + PDNode* operator()(); + + PATTERN_DECL_NODE(prev_op); + PATTERN_DECL_NODE(op_in); + PATTERN_DECL_NODE(op); +}; + // Pattern used for enforcing inplace computation for in-place computation // supporting DNNL ops. softmax, batch_norm and layer_norm struct MKLDNNInPlace : public PatternBase { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..df498865245fc8054f9521026e0b5cd6906b136f --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc @@ -0,0 +1,159 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" + +#include +#include +#include + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace framework { +namespace ir { + +using string::PrettyLogDetail; + +void UnlinkNodes(ir::Node* a, ir::Node* b) { + a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b), + a->outputs.end()); + b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a), + b->inputs.end()); +} + +void CPUBFloat16Pass::SetInputDataType(ir::Graph* graph) const { + GraphPatternDetector gpd; + patterns::FirstBfloat16Ops bfloat16_ops{gpd.mutable_pattern(), + "first_bfloat16_ops"}; + bfloat16_ops(); + int quantize_counter = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, bfloat16_ops); + GET_IR_NODE_FROM_SUBGRAPH(op_in, op_in, bfloat16_ops); + GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops); + + if (op->Op()->Type() != "conv2d" && prev_op->Op()->Type() != "quantize") { + VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out")); + auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc); + + // create a quantize op node + OpDesc q_desc; + q_desc.SetType("quantize"); + q_desc.SetInput("Input", std::vector({op_in->Name()})); + q_desc.SetOutput("Output", + std::vector({quantize_out_node->Name()})); + q_desc.SetAttr("Scale", 1.f); + q_desc.SetAttr("bfloat16", true); + q_desc.SetAttr("output_format", Has("data_layout") + ? Get("data_layout") + : "NCHW"); + auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied. + + std::string op_input_name; + for (auto name : op->Op()->InputNames()) { + for (auto input_name : op->Op()->Input(name)) { + if (input_name == op_in->Name()) op_input_name = name; + } + } + + PADDLE_ENFORCE_NE( + op_input_name.empty(), true, + platform::errors::NotFound( + "Operator before operator should have input as op output")); + + op->Op()->SetInput(op_input_name, + std::vector({quantize_out_node->Name()})); + + UnlinkNodes(op_in, op); + IR_NODE_LINK_TO(op_in, quantize_op); + IR_NODE_LINK_TO(quantize_op, quantize_out_node); + IR_NODE_LINK_TO(quantize_out_node, op); + quantize_counter++; + } + }; + gpd(graph, handler); + PrettyLogDetail("--- added %d quantize op before bfloat16 op", + quantize_counter); +} + +void CPUBFloat16Pass::SetOutputDataType(ir::Graph* graph) const { + GraphPatternDetector gpd; + patterns::LastBfloat16Ops bfloat16_ops{gpd.mutable_pattern(), + "last_bfloat16_ops"}; + bfloat16_ops(); + int force_fp32_counter = 0, dequantize_counter = 0; + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_ops); + GET_IR_NODE_FROM_SUBGRAPH(op_out, op_out, bfloat16_ops); + GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, bfloat16_ops); + + if ((op->Op()->HasAttr("force_fp32_output") || + op->Op()->HasProtoAttr("force_fp32_output")) && + !op->Op()->GetAttrIfExists("fuse_residual_connection")) { + op->Op()->SetAttr("force_fp32_output", true); + force_fp32_counter++; + } else if (op->Op()->Type() != "prior_box") { + // Create dequantize input variable + VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in")); + auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc); + + // create a dequantize op node for output. + OpDesc deq_desc; + deq_desc.SetType("dequantize"); + deq_desc.SetInput("Input", + std::vector({dequantize_in_node->Name()})); + deq_desc.SetOutput("Output", std::vector({op_out->Name()})); + deq_desc.SetAttr("Scale", 1.0f); + auto dequantize_op = g->CreateOpNode(&deq_desc); + + std::string op_output_name; + for (auto name : op->Op()->OutputNames()) { + for (auto output_name : op->Op()->Output(name)) { + if (output_name == op_out->Name()) op_output_name = name; + } + } + + PADDLE_ENFORCE_NE( + op_output_name.empty(), true, + platform::errors::NotFound( + "Operator after operator should have input as op output")); + + op->Op()->SetOutput(op_output_name, std::vector( + {dequantize_in_node->Name()})); + + UnlinkNodes(op, op_out); + IR_NODE_LINK_TO(op, dequantize_in_node); + IR_NODE_LINK_TO(dequantize_in_node, dequantize_op); + IR_NODE_LINK_TO(dequantize_op, op_out); + dequantize_counter++; + } + }; + gpd(graph, handler); + PrettyLogDetail("--- added %d dequantize op and used %d force_fp32_output", + dequantize_counter, force_fp32_counter); +} + +void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const { + SetInputDataType(graph); + SetOutputDataType(graph); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..3a7271f7ddc59a2bdcab8457bc34d5c5c6397268 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class CPUBFloat16Pass : public Pass { + protected: + void SetInputDataType(ir::Graph* graph) const; + void SetOutputDataType(ir::Graph* graph) const; + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..15109db98321343e73fb0c3839e4f7ddf2490948 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" +#include "paddle/fluid/framework/naive_executor.h" +#include "paddle/fluid/imperative/type_defs.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, + const std::vector& inputs, + const std::vector& outputs, bool use_mkldnn, + const std::string& mkldnn_data_type = "float32", + const bool force_fp32_output = false) { + auto* op = prog->MutableBlock(0)->AppendOp(); + op->SetType(type); + op->SetAttr("use_mkldnn", use_mkldnn); + op->SetAttr("name", name); + + if (type == "conv2d") { + op->SetInput("Input", {inputs[0]}); + op->SetOutput("Output", {outputs[0]}); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + op->SetAttr("force_fp32_output", force_fp32_output); + } else if (type == "pool2d" || type == "transpose2" || type == "reshape2" || + type == "dropout") { + op->SetInput("X", {inputs[0]}); + op->SetOutput("Out", {outputs[0]}); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + } else if (type == "fc") { + op->SetInput("Input", {inputs[0]}); + op->SetOutput("Out", {outputs[0]}); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + } else if (type == "concat") { + op->SetInput("X", inputs); + op->SetOutput("Out", outputs); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + } else if (type == "matmul" || type == "elementwise_add") { + op->SetInput("X", {inputs[0]}); + if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); + op->SetOutput("Out", {outputs[0]}); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + } +} + +void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, + const std::initializer_list variable_names, + int* original_nodes_num, int* current_nodes_num) { + auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass"); + + graph->reset(pass->Apply(graph->release())); + + *original_nodes_num = (*graph)->Nodes().size(); + (*graph).reset(pass->Apply((*graph).release())); + *current_nodes_num = (*graph)->Nodes().size(); +} + +static const std::initializer_list variable_names{ + "z", "a", "b", "c", "d", "e", "f", "g", "h", "i"}; + +ProgramDesc BuildProgramDesc(bool use_mkldnn) { + ProgramDesc prog; + for (auto& v : variable_names) { + prog.MutableBlock(0)->Var(v); + } + SetOp(&prog, "dropout", "Dropout1", {"z"}, {"a"}, use_mkldnn, "float32"); + SetOp(&prog, "conv2d", "Conv1", {"a"}, {"b"}, use_mkldnn, "bfloat16"); + SetOp(&prog, "pool2d", "Pool1", {"b"}, {"c"}, use_mkldnn, "bfloat16"); + SetOp(&prog, "conv2d", "Conv1", {"c"}, {"d"}, use_mkldnn, "bfloat16"); + SetOp(&prog, "dropout", "Dropout2", {"d"}, {"e"}, use_mkldnn, "float32"); + SetOp(&prog, "transpose2", "Transpose1", {"e"}, {"f"}, use_mkldnn, + "bfloat16"); + SetOp(&prog, "reshape2", "Reshape1", {"f"}, {"g"}, use_mkldnn, "bfloat16"); + SetOp(&prog, "concat", "Concat1", {"g"}, {"h"}, use_mkldnn, "bfloat16"); + SetOp(&prog, "dropout", "Dropout3", {"h"}, {"i"}, use_mkldnn, "float32"); + + return prog; +} + +void MainTest(const ProgramDesc& prog, int conv_count, int pool_count, + int transpose_count, int quant_count, int dequant_count, + int added_nodes_count) { + std::unique_ptr graph(new ir::Graph(prog)); + int original_nodes_num, current_nodes_num; + PreparePass(&graph, prog, variable_names, &original_nodes_num, + ¤t_nodes_num); + + int quantize_nodes_count = 0; + int dequantize_nodes_count = 0; + int conv2d_nodes_count = 0; + int pool2d_nodes_count = 0; + int transpose2_nodes_count = 0; + + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->Type() == "conv2d") { + conv2d_nodes_count++; + } else if (op->Type() == "pool2d") { + pool2d_nodes_count++; + } else if (op->Type() == "transpose2") { + transpose2_nodes_count++; + } else if (op->Type() == "quantize") { + quantize_nodes_count++; + } else if (op->Type() == "dequantize") { + dequantize_nodes_count++; + } + } + } + EXPECT_EQ(conv2d_nodes_count, conv_count); + EXPECT_EQ(pool2d_nodes_count, pool_count); + EXPECT_EQ(transpose2_nodes_count, transpose_count); + EXPECT_EQ(quantize_nodes_count, quant_count); + EXPECT_EQ(dequantize_nodes_count, dequant_count); + EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); +} + +TEST(CpuQuantizePass, quantize) { + bool use_mkldnn = true; + // 1 quantize + 1 dequantize + int added_nodes = 2; + MainTest(BuildProgramDesc(use_mkldnn), 2, 1, 1, 1, 2, added_nodes); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(cpu_bfloat16_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..3d7a9c1107bbaac04a3a478014520a9b340b1d5f --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc @@ -0,0 +1,91 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h" + +#include +#include + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace framework { +namespace ir { + +using string::PrettyLogDetail; + +void CPUBfloat16PlacementPass::SetMkldnnDataType( + ir::Graph* graph, int* bfloat16_operators) const { + const auto& op_types_list = + Get>("bfloat16_enabled_op_types"); + // set mkldnn_data_type to bfloat16 to all operators that are in + // bfloat16_enabled_op_types vector or they are included to Bfloat16Placement + // pattern + GraphPatternDetector gpd; + patterns::Bfloat16Placement bfloat16_placement_pattern{gpd.mutable_pattern(), + "bfloat16_placement"}; + bfloat16_placement_pattern(op_types_list); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(op, op, bfloat16_placement_pattern); + + if ((op->Op()->HasAttr("mkldnn_data_type") || + op->Op()->HasProtoAttr("mkldnn_data_type")) && + !platform::HasOpINT8DataType(op->Op())) { + op->Op()->SetAttr("mkldnn_data_type", std::string("bfloat16")); + (*bfloat16_operators)++; + } + }; + gpd(graph, handler); +} + +void CPUBfloat16PlacementPass::RemoveOrhanedOperators( + ir::Graph* graph, int* bfloat16_operators) const { + // find orphaned bfloat16 operator that is between two float32 operators + // revert mkldnn_data_type attr to float32 + GraphPatternDetector gpd; + patterns::OrphanedBfloat16 orphaned_bfloat16_pattern{gpd.mutable_pattern(), + "orphaned_bfloat16"}; + orphaned_bfloat16_pattern(); + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(op, op, orphaned_bfloat16_pattern); + + op->Op()->SetAttr("mkldnn_data_type", std::string("float32")); + bfloat16_operators--; + }; + gpd(graph, handler); +} + +void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const { + int bfloat16_operators = 0; + SetMkldnnDataType(graph, &bfloat16_operators); + RemoveOrhanedOperators(graph, &bfloat16_operators); + PrettyLogDetail("--- marked %d operators to bfloat16 ", + bfloat16_operators); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(cpu_bfloat16_placement_pass, + paddle::framework::ir::CPUBfloat16PlacementPass) + // a vector of operator type names with bfloat16 support ("conv2d" etc.) + // the second param is the default value for this vector + .DefaultPassAttr("bfloat16_enabled_op_types", + new std::unordered_set()); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..1911b1a3cb32a6a23585e8240c462aa84e8d869b --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { +/* + * Specifies which operators should be run on bfloat16. + */ +class CPUBfloat16PlacementPass : public Pass { + protected: + void SetMkldnnDataType(ir::Graph* graph, int* bfloat16_operators) const; + + void RemoveOrhanedOperators(ir::Graph* graph, int* bfloat16_operators) const; + + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..b9797a4bfcc0048083e059cb003746e3278a039b --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h" +#include "paddle/fluid/platform/mkldnn_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, + const std::vector& inputs, + const std::vector& outputs, + const std::string& mkldnn_data_type = "float32") { + auto* op = prog->MutableBlock(0)->AppendOp(); + + op->SetType(type); + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + + if (type == "conv2d") { + op->SetAttr("name", name); + op->SetInput("Input", {inputs[0]}); + } else if (type == "relu") { + op->SetInput("X", inputs); + } else if (type == "concat") { + op->SetAttr("axis", 1); + op->SetInput("X", {inputs[0], inputs[1]}); + } else if (type == "pool2d") { + op->SetInput("X", {inputs[0]}); + } else { + FAIL() << "Unexpected operator type."; + } + op->SetOutput("Out", {outputs[0]}); +} + +// operator mkldnn_data_type +// --------------------------------------- +// (a,b)->concat->c float32 +// c->conv->f float32 +// f->relu->g float32 +// g->pool->h float32 +// h->conv->k float32 +// k->pool->l float32 +ProgramDesc BuildProgramDesc() { + ProgramDesc prog; + + for (auto& v : + std::vector({"a", "b", "c", "f", "g", "h", "k", "l"})) { + prog.MutableBlock(0)->Var(v); + } + + SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}); + SetOp(&prog, "conv2d", "conv1", {"c"}, {"f"}); + SetOp(&prog, "relu", "relu1", {"f"}, {"g"}); + SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}); + SetOp(&prog, "conv2d", "conv2", {"h"}, {"k"}); + SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}); + + return prog; +} + +void MainTest(std::initializer_list bfloat16_enabled_op_types, + unsigned expected_bfloat16_data_type_count) { + auto prog = BuildProgramDesc(); + + std::unique_ptr graph(new ir::Graph(prog)); + + auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass"); + pass->Set("bfloat16_enabled_op_types", + new std::unordered_set(bfloat16_enabled_op_types)); + + graph.reset(pass->Apply(graph.release())); + + unsigned bfloat16_data_type_count = 0; + + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + if (platform::HasOpBFLOAT16DataType(node->Op())) { + ++bfloat16_data_type_count; + } + } + } + + EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count); +} + +void DefaultAttrTest(unsigned expected_bfloat16_data_type_count) { + auto prog = BuildProgramDesc(); + std::unique_ptr graph(new ir::Graph(prog)); + auto pass = PassRegistry::Instance().Get("cpu_bfloat16_placement_pass"); + graph.reset(pass->Apply(graph.release())); + + unsigned bfloat16_data_type_count = 0; + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + if (platform::HasOpBFLOAT16DataType(node->Op())) { + ++bfloat16_data_type_count; + } + } + } + EXPECT_EQ(bfloat16_data_type_count, expected_bfloat16_data_type_count); +} + +TEST(Bfloat16PlacementPass, enable_all) { + MainTest({"conv2d", "pool2d", "relu", "concat"}, 6); +} + +TEST(Bfloat16PlacementPass, enabled_conv_and_pool) { + // 2 conv2d + 2 pool2 - 1 orphaned conv2d + MainTest({"conv2d", "pool2d"}, 3); +} + +TEST(Bfloat16PlacementPass, default_attr_value) { DefaultAttrTest(0); } + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(cpu_bfloat16_placement_pass); diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc index 9a0a5f07a7080593d8f13e07788c703edb92c7ad..405cefa99ebbbe147fc96f63567e13607732780e 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -145,3 +146,11 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { REGISTER_PASS(transpose_flatten_concat_fuse_pass, paddle::framework::ir::TransposeFlattenConcatFusePass); +REGISTER_PASS_CAPABILITY(transpose_flatten_concat_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("transpose", 0) + .EQ("transpose2", 0) + .EQ("flatten", 0) + .EQ("concat", 0) + .EQ("fusion_transpose_flatten_concat", 0)); diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h index 171f08390765a64997bff28d80f9360f5da2cd1a..89b499975790060a3a3e3f665c35f8545922e6a7 100644 --- a/paddle/fluid/framework/op_info.h +++ b/paddle/fluid/framework/op_info.h @@ -69,7 +69,8 @@ class OpInfo { const OpCreator& Creator() const { PADDLE_ENFORCE_NOT_NULL(creator_, - "Operator's Creator has not been registered"); + platform::errors::NotFound( + "Operator's Creator has not been registered.")); return creator_; } @@ -79,11 +80,12 @@ class OpInfo { std::string type = proto_ ? proto_->type() : "unknown"; PADDLE_ENFORCE_NOT_NULL( grad_op_maker_, - "Operator %s's GradOpMaker has not been " - "registered.\nPlease check whether %s_op has " - "grad_op.\nIf not, please set stop_gradient to True " - "for its input and output variables using var.stop_gradient=True.", - type.c_str(), type.c_str()); + platform::errors::NotFound( + "Operator %s's GradOpMaker has not been " + "registered.\nPlease check whether (%s) operator has " + "gradient operator.\nIf not, please set stop_gradient to be True " + "for its input and output variables using var.stop_gradient=True.", + type.c_str(), type.c_str())); return grad_op_maker_; } @@ -100,11 +102,12 @@ class OpInfo { std::string type = proto_ ? proto_->type() : "unknown"; PADDLE_ENFORCE_NOT_NULL( dygraph_grad_op_maker_, - "Operator %s's DygraphGradOpMaker has not been " - "registered.\nPlease check whether %s_op has " - "grad_op.\nIf not, please set stop_gradient to True " - "for its input and output variables using var.stop_gradient=True.", - type.c_str(), type.c_str()); + platform::errors::NotFound( + "Operator %s's DygraphGradOpMaker has not been " + "registered.\nPlease check whether (%s) operator has " + "gradient operator.\nIf not, please set stop_gradient to be True " + "for its input and output variables using var.stop_gradient=True.", + type.c_str(), type.c_str())); return dygraph_grad_op_maker_; } @@ -130,14 +133,17 @@ class OpInfoMap { } void Insert(const std::string& type, const OpInfo& info) { - PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type); + PADDLE_ENFORCE_NE(Has(type), true, + platform::errors::AlreadyExists( + "Operator (%s) has been registered.", type)); map_.insert({type, info}); } const OpInfo& Get(const std::string& type) const { auto op_info_ptr = GetNullable(type); - PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered", - type); + PADDLE_ENFORCE_NOT_NULL( + op_info_ptr, + platform::errors::NotFound("Operator (%s) is not registered.", type)); return *op_info_ptr; } diff --git a/paddle/fluid/framework/op_kernel_type.cc b/paddle/fluid/framework/op_kernel_type.cc index 6d4801e4a0eed7083e671e1d49b8628dfb280cf9..e64c3674e7433bb1d9e54f89b89e5f1e2c521648 100644 --- a/paddle/fluid/framework/op_kernel_type.cc +++ b/paddle/fluid/framework/op_kernel_type.cc @@ -33,10 +33,18 @@ size_t OpKernelType::Hash::operator()(const OpKernelType& key) const { cur_loc += OpKernelType::kLibBits; int customized_value = key.customized_type_value_; - PADDLE_ENFORCE(customized_value < (1 << OpKernelType::kCustomizeBits)); + PADDLE_ENFORCE_LT(customized_value, (1 << OpKernelType::kCustomizeBits), + platform::errors::Unavailable( + "Too many custom OpKernel attribute values, expected " + "maximum value is %d, received value is %d.", + (1 << OpKernelType::kCustomizeBits), customized_value)); customized_value = customized_value << cur_loc; cur_loc += OpKernelType::kCustomizeBits; - PADDLE_ENFORCE(cur_loc < 64); + PADDLE_ENFORCE_LT(cur_loc, 64, + platform::errors::Unavailable( + "Too many OpKernel attribute values, expected maximum " + "value is 64, received value is %d.", + cur_loc)); std::hash hasher; return hasher(place + data_type + data_layout + library_type + diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index 3408ab262c16197b92e407d0af6043c8a062b5d4..357c4fb5e57fb5b9172631ca57fbdbfeb19b3143 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -43,7 +43,9 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput( void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { std::unordered_set names; auto checker = [&](const std::string& name) { - PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name); + PADDLE_ENFORCE_EQ( + names.count(name), 0, + platform::errors::AlreadyExists("Attribute [%s] is duplicated.", name)); names.insert(name); }; for (auto& attr : proto_->attrs()) { diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index d8159d6a5c294b85d8d5ab9bbee3b95a5eba793f..6408fadf90ae32adf048156d1369cf22a76d20ea 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -54,9 +54,10 @@ class Registrar { template struct OperatorRegistrar : public Registrar { explicit OperatorRegistrar(const char* op_type) { - if (OpInfoMap::Instance().Has(op_type)) { - PADDLE_THROW("'%s' is registered more than once.", op_type); - } + PADDLE_ENFORCE_EQ( + OpInfoMap::Instance().Has(op_type), false, + platform::errors::AlreadyExists( + "Operator '%s' is registered more than once.", op_type)); static_assert(sizeof...(ARGS) != 0, "OperatorRegistrar should be invoked at least by OpClass"); OpInfo info; diff --git a/paddle/fluid/framework/op_registry_test.cc b/paddle/fluid/framework/op_registry_test.cc index 21d3454467603c58c9513351eba2c09ef6eeacba..45fe66d7db3b546604b640008e0ab61eaa84390e 100644 --- a/paddle/fluid/framework/op_registry_test.cc +++ b/paddle/fluid/framework/op_registry_test.cc @@ -58,7 +58,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { AddInput("input", "input of cosine op").AsDuplicable(); AddOutput("output", "output of cosine op").AsIntermediate(); auto my_checker = [](int i) { - PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!"); + PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument( + "'test_attr' must be even!")); }; AddAttr("test_attr", "a simple test attribute") .AddCustomChecker(my_checker); diff --git a/paddle/fluid/framework/op_version_registry.h b/paddle/fluid/framework/op_version_registry.h index 5edd70e035f98f408c0104297e084771cd158f53..fea043a0ff311f7b940331b9d392296c331590e9 100644 --- a/paddle/fluid/framework/op_version_registry.h +++ b/paddle/fluid/framework/op_version_registry.h @@ -152,10 +152,10 @@ class OpVersionRegistrar { return instance; } OpVersion& Register(const std::string& op_type) { - if (op_version_map_.find(op_type) != op_version_map_.end()) { - PADDLE_THROW("'%s' is registered in operator version more than once.", - op_type); - } + PADDLE_ENFORCE_EQ( + op_version_map_.find(op_type), op_version_map_.end(), + platform::errors::AlreadyExists( + "'%s' is registered in operator version more than once.", op_type)); op_version_map_.insert({op_type, OpVersion()}); return op_version_map_[op_type]; } diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index ca2705f154c4f45dfccd954b23209c71701adce5..21fc293e84179da72be8cc5ee50de46a00fe9a0d 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -164,15 +164,20 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { VLOG(4) << place << " " << DebugStringEx(&scope); if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("Cannot run operator on place %s", place); + PADDLE_THROW(platform::errors::Unavailable( + "Cannot run operator on place %s, please recompile paddle or " + "reinstall Paddle with CUDA support.", + place)); #else auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; platform::SetDeviceId(dev_id); #endif } else if (platform::is_xpu_place(place)) { #ifndef PADDLE_WITH_XPU - PADDLE_THROW(platform::errors::Unimplemented( - "Cannot run operator on place %s", place)); + PADDLE_THROW(platform::errors::Unavailable( + "Cannot run operator on place %s, please recompile paddle or " + "reinstall Paddle with XPU support.", + place)); #else auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device; platform::SetXPUDeviceId(dev_id); @@ -214,7 +219,7 @@ std::string OperatorBase::Input(const std::string& name) const { auto& ins = Inputs(name); PADDLE_ENFORCE_LE( ins.size(), 1UL, - platform::errors::AlreadyExists( + platform::errors::InvalidArgument( "Operator %s's input %s should contain only one variable.", type_, name)); return ins.empty() ? kEmptyVarName : ins[0]; @@ -223,8 +228,10 @@ std::string OperatorBase::Input(const std::string& name) const { const std::vector& OperatorBase::Inputs( const std::string& name) const { auto it = inputs_.find(name); - PADDLE_ENFORCE(it != inputs_.end(), "Operator %s does not have the input %s.", - type_, name); + PADDLE_ENFORCE_NE( + it, inputs_.end(), + platform::errors::NotFound("Operator %s does not have the input %s.", + type_, name)); return it->second; } @@ -238,17 +245,21 @@ bool OperatorBase::HasOutputs(const std::string& name) const { std::string OperatorBase::Output(const std::string& name) const { auto& outs = Outputs(name); - PADDLE_ENFORCE_LE(outs.size(), 1UL, - "Operator %s's output %s should contain only one variable.", - type_, name); + PADDLE_ENFORCE_LE( + outs.size(), 1UL, + platform::errors::InvalidArgument( + "Operator %s's output %s should contain only one variable.", type_, + name)); return outs.empty() ? kEmptyVarName : outs[0]; } const std::vector& OperatorBase::Outputs( const std::string& name) const { auto it = outputs_.find(name); - PADDLE_ENFORCE(it != outputs_.end(), - "Operator %s does not have an output called %s.", type_, name); + PADDLE_ENFORCE_NE( + it, outputs_.end(), + platform::errors::NotFound( + "Operator %s does not have an output called %s.", type_, name)); return it->second; } @@ -391,16 +402,19 @@ void OperatorBase::CheckAllInputOutputSet() const { for (auto& in : info_->Proto().inputs()) { if (!in.dispensable()) { - PADDLE_ENFORCE(inputs_.find(in.name()) != inputs_.end(), - "Operator %s's input, %s, is not set", Type(), in.name()); + PADDLE_ENFORCE_NE( + inputs_.find(in.name()), inputs_.end(), + platform::errors::NotFound("Operator %s's input (%s) is not set.", + Type(), in.name())); } } for (auto& out : info_->Proto().outputs()) { if (!out.dispensable()) { - PADDLE_ENFORCE(outputs_.find(out.name()) != outputs_.end(), - "Operator %s's output, %s, is not set", Type(), - out.name()); + PADDLE_ENFORCE_NE( + outputs_.find(out.name()), outputs_.end(), + platform::errors::NotFound("Operator %s's output (%s) is not set.", + Type(), out.name())); } } } @@ -428,8 +442,9 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) { } else if (var.IsType()) { return &(var.Get().value()); } else { - PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", - ToTypeName(var.Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable type is %s, expect LoDTensor or SelectedRows.", + ToTypeName(var.Type()))); } } @@ -439,8 +454,9 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) { } else if (var->IsType()) { return var->GetMutable()->mutable_value(); } else { - PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", - ToTypeName(var->Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable type is %s, expect LoDTensor or SelectedRows.", + ToTypeName(var->Type()))); } } @@ -462,7 +478,7 @@ const Variable* ExecutionContext::InputVar(const std::string& name) const { PADDLE_ENFORCE_LE( it->second.size(), 1UL, - platform::errors::AlreadyExists( + platform::errors::InvalidArgument( "Operator %s's input %s should contain only one variable.", op_.Type(), name)); return it->second.empty() ? nullptr : it->second[0]; @@ -472,9 +488,11 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const { auto it = ctx_.outputs.find(name); if (it == ctx_.outputs.end()) return nullptr; - PADDLE_ENFORCE_LE(it->second.size(), 1UL, - "Operator %s's output %s should contain only one variable.", - op_.Type(), name); + PADDLE_ENFORCE_LE( + it->second.size(), 1UL, + platform::errors::InvalidArgument( + "Operator %s's output %s should contain only one variable.", + op_.Type(), name)); return it->second.empty() ? nullptr : it->second[0]; } @@ -497,10 +515,11 @@ const std::vector ExecutionContext::MultiInput( std::transform(vars.begin(), vars.end(), std::back_inserter(res), [&](const Variable* var) -> const Tensor* { if (var == nullptr) return nullptr; - PADDLE_ENFORCE( - var->IsType(), - "should be LoDTensor, but the received type is %s", - ToTypeName(var->Type())); + PADDLE_ENFORCE_EQ(var->IsType(), true, + platform::errors::InvalidArgument( + "Input variable should be LoDTensor, " + "but the received type is %s.", + ToTypeName(var->Type()))); return &(var->Get()); }); return res; @@ -558,8 +577,10 @@ class RuntimeInferShapeContext : public InferShapeContext { } const auto& in = it->second; if (in.size() == 0) return false; - PADDLE_ENFORCE_EQ(in.size(), 1UL, - "Input %s should not have more than one inputs", name); + PADDLE_ENFORCE_EQ( + in.size(), 1UL, + platform::errors::InvalidArgument( + "Input %s should not contain more than one inputs.", name)); return in[0] != nullptr; } @@ -574,8 +595,10 @@ class RuntimeInferShapeContext : public InferShapeContext { if (out.size() == 0) { return false; } - PADDLE_ENFORCE_EQ(out.size(), 1UL, - "Output %s should not have more than one outputs", name); + PADDLE_ENFORCE_EQ( + out.size(), 1UL, + platform::errors::InvalidArgument( + "Output %s should not contain more than one outputs.", name)); return out[0] != nullptr; } @@ -644,16 +667,31 @@ class RuntimeInferShapeContext : public InferShapeContext { size_t j = 0) override { auto in_it = ctx_.inputs.find(in); auto out_it = ctx_.outputs.find(out); - PADDLE_ENFORCE(in_it != ctx_.inputs.end() && in_it->second.size() > i, - "Inputs %s should have %llu argument", in, i); - PADDLE_ENFORCE(out_it != ctx_.outputs.end() && out_it->second.size() > j, - "Outputs %s should have %llu argument", out, j); + PADDLE_ENFORCE_NE( + in_it, ctx_.inputs.end(), + platform::errors::NotFound("Input %s does not exist.", in)); + PADDLE_ENFORCE_NE( + out_it, ctx_.outputs.end(), + platform::errors::NotFound("Output %s does not exist.", out)); + PADDLE_ENFORCE_LT(i, in_it->second.size(), + platform::errors::InvalidArgument( + "The index of input dimension is out of range, " + "excepted index less than %zu, but received %zu.", + in_it->second.size(), i)); + PADDLE_ENFORCE_LT(j, out_it->second.size(), + platform::errors::InvalidArgument( + "The index of output dimension is out of range, " + "excepted index less than %zu, but received %zu.", + out_it->second.size(), j)); Variable* in_var = in_it->second[i]; Variable* out_var = out_it->second[j]; - PADDLE_ENFORCE(in_var->Type() == out_var->Type(), - "The type of %s and %s is not the same.", in, out); + PADDLE_ENFORCE_EQ( + in_var->Type(), out_var->Type(), + platform::errors::InvalidArgument( + "The type of input (%s) and output (%s) are inconsistent.", in, + out)); if (in_var->IsType()) { auto& in_sele_rows = in_var->Get(); @@ -666,9 +704,9 @@ class RuntimeInferShapeContext : public InferShapeContext { auto* out_lod_tensor = out_var->GetMutable(); out_lod_tensor->Resize(in_lod_tensor.dims()); } else { - PADDLE_THROW( + PADDLE_THROW(platform::errors::Unimplemented( "Currently, the input type of ShareDim only can be LoDTensor " - "or SelectedRows."); + "or SelectedRows.")); } } @@ -721,16 +759,30 @@ class RuntimeInferShapeContext : public InferShapeContext { size_t j = 0) const override { auto in_it = ctx_.inputs.find(in); auto out_it = ctx_.outputs.find(out); - PADDLE_ENFORCE(in_it != ctx_.inputs.end() && in_it->second.size() > i, - "Inputs %s should have %llu argument", in, i); - PADDLE_ENFORCE(out_it != ctx_.outputs.end() && out_it->second.size() > j, - "Outputs %s should have %llu argument", out, j); + PADDLE_ENFORCE_NE( + in_it, ctx_.inputs.end(), + platform::errors::NotFound("Input %s does not exist.", in)); + PADDLE_ENFORCE_NE( + out_it, ctx_.outputs.end(), + platform::errors::NotFound("Output %s does not exist.", out)); + PADDLE_ENFORCE_LT(i, in_it->second.size(), + platform::errors::InvalidArgument( + "The index of input dimension is out of range, " + "excepted index less than %zu, but received %zu.", + in_it->second.size(), i)); + PADDLE_ENFORCE_LT(j, out_it->second.size(), + platform::errors::InvalidArgument( + "The index of output dimension is out of range, " + "excepted index less than %zu, but received %zu.", + out_it->second.size(), j)); Variable* in_var = in_it->second.at(i); if (!in_var->IsType()) return; Variable* out_var = out_it->second.at(j); - PADDLE_ENFORCE(out_var->IsType(), - "The %d-th output of Output(%s) must be LoDTensor.", j, out); + PADDLE_ENFORCE_EQ( + out_var->IsType(), true, + platform::errors::InvalidArgument( + "The %zu-th output of Output(%s) must be LoDTensor.", j, out)); auto& in_tensor = in_var->Get(); auto* out_tensor = out_var->GetMutable(); out_tensor->set_lod(in_tensor.lod()); @@ -757,18 +809,18 @@ class RuntimeInferShapeContext : public InferShapeContext { } int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override { - PADDLE_THROW( + PADDLE_THROW(platform::errors::PreconditionNotMet( "GetLoDLevel is only used in compile time. The calculation of " "output's actual lod is different among operators so that should be " - "set in the runtime kernel."); + "set in the runtime kernel.")); } void SetLoDLevel(const std::string& out, int32_t lod_level, size_t j = 0) const override { - PADDLE_THROW( + PADDLE_THROW(platform::errors::PreconditionNotMet( "SetLoDLevel is only used in compile time. The calculation of " "output's actual lod is different among operators so that should be " - "set in the runtime kernel."); + "set in the runtime kernel.")); } bool IsRuntime() const override { return true; } @@ -794,9 +846,11 @@ class RuntimeInferShapeContext : public InferShapeContext { DDim GetInputDim(const std::string& name) const override { const std::vector& vars = InputVars(name); - PADDLE_ENFORCE_EQ(vars.size(), 1UL, - "Input(%s) should hold one element, but now it holds %d", - name, vars.size()); + PADDLE_ENFORCE_EQ( + vars.size(), 1UL, + platform::errors::InvalidArgument( + "Input(%s) should hold one element, but now it holds %zu elements.", + name, vars.size())); return this->GetDim(vars[0]); } @@ -817,9 +871,11 @@ class RuntimeInferShapeContext : public InferShapeContext { void SetOutputDim(const std::string& name, const DDim& dim) override { auto& vars = OutputVars(name); - PADDLE_ENFORCE_EQ(vars.size(), 1UL, - "Output(%s) should hold one element, but now it holds %d", - name, vars.size()); + PADDLE_ENFORCE_EQ( + vars.size(), 1UL, + platform::errors::InvalidArgument("Output(%s) should hold one element, " + "but now it holds %zu elements.", + name, vars.size())); SetDim(vars[0], dim); } @@ -831,16 +887,17 @@ class RuntimeInferShapeContext : public InferShapeContext { protected: DDim GetDim(Variable* var) const { - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::InvalidArgument("Input variable is nullptr.")); if (var->IsType()) { return var->Get().dims(); } else if (var->IsType()) { return var->Get().GetCompleteDims(); } else { - PADDLE_THROW( - "Only LoDTensor/SelectedRows support 'GetDim', but Variables " - "type_id is %s.", - ToTypeName(var->Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Only LoDTensor or SelectedRows support 'GetDim', but input " + "Variable's type is %s.", + ToTypeName(var->Type()))); } } @@ -853,7 +910,8 @@ class RuntimeInferShapeContext : public InferShapeContext { } std::vector GetRepeatedDims(const std::string& name) const override { - PADDLE_THROW("Only compile time support this method"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "GetRepeatedDims method only ban be used in compile time.")); } void SetDim(Variable* var, const DDim& dim) { @@ -862,15 +920,22 @@ class RuntimeInferShapeContext : public InferShapeContext { } else if (var->IsType()) { var->GetMutable()->set_height(dim[0]); } else { - PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", - ToTypeName(var->Type())); + PADDLE_THROW(platform::errors::Unimplemented( + "Variable type error, expect LoDTensor or SelectedRows, but received " + "(%s).", + ToTypeName(var->Type()))); } } void SetDims(const std::vector& vars, const std::vector& dims) { size_t length = vars.size(); - PADDLE_ENFORCE_EQ(length, dims.size()); + PADDLE_ENFORCE_EQ(length, dims.size(), + platform::errors::InvalidArgument( + "The number of input variables do not match the " + "number of input dimensions, the number of variables " + "is %zu, the number of dimensions is %zu.", + length, dims.size())); for (size_t i = 0; i < length; ++i) { if (vars[i] == nullptr) { continue; @@ -881,7 +946,8 @@ class RuntimeInferShapeContext : public InferShapeContext { void SetRepeatedDims(const std::string& name, const std::vector& dims) override { - PADDLE_THROW("Only compile time support this method"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "SetRepeatedDims method only can be used in compile time.")); } std::vector GetVarTypes( @@ -901,16 +967,19 @@ class RuntimeInferShapeContext : public InferShapeContext { private: const std::vector& InputVars(const std::string& name) const { auto it = ctx_.inputs.find(name); - PADDLE_ENFORCE(it != ctx_.inputs.end(), - "Operator %s does not have the input %s.", op_.Type(), name); + PADDLE_ENFORCE_NE( + it, ctx_.inputs.end(), + platform::errors::NotFound( + "Operator (%s) does not have the input (%s).", op_.Type(), name)); return it->second; } const std::vector& OutputVars(const std::string& name) const { auto it = ctx_.outputs.find(name); - PADDLE_ENFORCE(it != ctx_.outputs.end(), - "Operator %s does not have the outputs %s.", op_.Type(), - name); + PADDLE_ENFORCE_NE( + it, ctx_.outputs.end(), + platform::errors::NotFound( + "Operator (%s) does not have the outputs (%s).", op_.Type(), name)); return it->second; } @@ -928,10 +997,14 @@ static void CheckTensorNANOrInf(const std::string& op_type, tensor.type() != proto::VarType::FP64) { return; } - PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), - "Operator %s output Tensor %s contains Inf", op_type, name); - PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor), - "Operator %s output Tensor %s contains NAN", op_type, name); + PADDLE_ENFORCE_NE( + framework::TensorContainsInf(tensor), true, + platform::errors::Fatal("Operator %s output Tensor %s contains Inf.", + op_type, name)); + PADDLE_ENFORCE_NE( + framework::TensorContainsNAN(tensor), true, + platform::errors::Fatal("Operator %s output Tensor %s contains NAN.", + op_type, name)); } void OperatorWithKernel::RuntimeInferShape(const Scope& scope, @@ -1074,10 +1147,11 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, // check if op[type] has kernel registered. auto& all_op_kernels = AllOpKernels(); auto kernels_iter = all_op_kernels.find(type_); - if (kernels_iter == all_op_kernels.end()) { - PADDLE_THROW( - "There are no kernels which are registered in the %s operator.", type_); - } + PADDLE_ENFORCE_NE( + kernels_iter, all_op_kernels.end(), + platform::errors::Unavailable( + "There are no kernels which are registered in the %s operator.", + type_)); OpKernelMap& kernels = kernels_iter->second; @@ -1131,10 +1205,10 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, kernel_iter = kernels.find(expected_kernel_key); } #endif - if (kernel_iter == kernels.end()) { - PADDLE_THROW("op %s does not have kernel for %s", type_, - KernelTypeToString(expected_kernel_key)); - } + PADDLE_ENFORCE_NE(kernel_iter, kernels.end(), + platform::errors::NotFound( + "Operator (%s) does not have kernel for %s.", type_, + KernelTypeToString(expected_kernel_key))); std::lock_guard lock(cache_update_mutex_); if (kernel_type_.get() == nullptr || kernel_func_.get() == nullptr) { @@ -1149,13 +1223,14 @@ void OperatorWithKernel::TransferInplaceVarsBack( for (auto& var_name : inplace_vars) { VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; auto* origin_var = scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(origin_var, "The var[%s] should not be nullptr.", - var_name); + PADDLE_ENFORCE_NOT_NULL(origin_var, + platform::errors::InvalidArgument( + "The variable[%s] is nullptr.", var_name)); auto* original_tensor = GetMutableLoDTensorOrSelectedRowsValueFromVar(origin_var); auto* var = transfer_scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var, "The var[%s] should not be nullptr.", - var_name); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument( + "The variable[%s] is nullptr.", var_name)); auto* transformed_tensor = GetLoDTensorOrSelectedRowsValueFromVar(*var); auto original_dims = original_tensor->dims(); original_tensor->ShareDataWith(*transformed_tensor); @@ -1380,9 +1455,11 @@ proto::VarType::Type OperatorWithKernel::IndicateVarDataType( ParseInputDataType(ctx, name, &data_type); PADDLE_ENFORCE_NE( data_type, dafault_data_type, - "The Input Variable(%s) of %s Op used to determine kernel data type " - "is empty or not LoDTensor or SelectedRows or LoDTensorArray.", - name, Type()); + platform::errors::InvalidArgument( + "The Input Variable(%s) of (%s) Operator used to determine kernel " + "data type is empty or not LoDTensor or SelectedRows or " + "LoDTensorArray.", + name, Type())); return data_type; } diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index c4ce627ff1f940f1625b8650b243d64af2641612..218fc8880bb276a75ed1dd71b04fcd9f387c9a54 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -495,9 +495,9 @@ TEST(IndicateVarDataTypeTest, other) { EXPECT_TRUE( ex_msg.find( "The Input Variable(Other) of " - "indicate_other_data_type_test Op used to " + "(indicate_other_data_type_test) Operator used to " "determine kernel data type " - "is empty or not LoDTensor or SelectedRows or LoDTensorArray") != + "is empty or not LoDTensor or SelectedRows or LoDTensorArray.") != std::string::npos); } ASSERT_TRUE(caught); diff --git a/paddle/fluid/framework/reader.cc b/paddle/fluid/framework/reader.cc index d3513fb7dbed0413e61796d8a843c38fbbcf93dc..b418339bf32965a454e5b240bb728c4cb41e03ba 100644 --- a/paddle/fluid/framework/reader.cc +++ b/paddle/fluid/framework/reader.cc @@ -20,7 +20,10 @@ namespace framework { void ReaderBase::ReadNext(std::vector *out) { std::lock_guard lock(mu_); - PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning); + PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning, + platform::errors::Unavailable( + "The current reader has stopped running and cannot " + "continue to read the next batch of data.")); ReadNextImpl(out); } diff --git a/paddle/fluid/framework/rw_lock.h b/paddle/fluid/framework/rw_lock.h index f8aa87519a2fc1a14765887e95c96883d7b4589f..9b74a55304077c6c13a55f36ea8cf3b6dfbe5b9c 100644 --- a/paddle/fluid/framework/rw_lock.h +++ b/paddle/fluid/framework/rw_lock.h @@ -32,17 +32,21 @@ struct RWLock { ~RWLock() { pthread_rwlock_destroy(&lock_); } inline void RDLock() { - PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0, - "acquire read lock failed"); + PADDLE_ENFORCE_EQ( + pthread_rwlock_rdlock(&lock_), 0, + platform::errors::External("The pthread failed to acquire read lock.")); } inline void WRLock() { PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0, - "acquire write lock failed"); + platform::errors::External( + "The pthread failed to acquire write lock.")); } inline void UNLock() { - PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed"); + PADDLE_ENFORCE_EQ( + pthread_rwlock_unlock(&lock_), 0, + platform::errors::External("The pthread failed to unlock.")); } private: diff --git a/paddle/fluid/framework/save_load_util.cc b/paddle/fluid/framework/save_load_util.cc index fbbbfd66b3d8c39d0ccaa7d998bb5c5e9860df4e..602b431995cc59ab67e1a32ac09a3557432c3539 100644 --- a/paddle/fluid/framework/save_load_util.cc +++ b/paddle/fluid/framework/save_load_util.cc @@ -33,7 +33,8 @@ void CheckInStreamState(std::istream& istre, size_t length) { VLOG(5) << "Can't read [" << length << "] from file" << "file seems breakem"; - PADDLE_THROW("Model load error, file seems breaken"); + PADDLE_THROW(platform::errors::Unavailable( + "Model load failed, istream state error.")); } } @@ -58,10 +59,11 @@ size_t ReadTensorNumber(std::istream& istre) { sizeof(char) * tensor_number_mark.size()); std::string str_read_tensor_number_mark(tensor_number_mark_buffer, tensor_number_mark.size()); - PADDLE_ENFORCE_EQ( - tensor_number_mark, str_read_tensor_number_mark, - "Tensor number mark not match, expect [%s], but read from file is [%]", - tensor_number_mark, str_read_tensor_number_mark); + PADDLE_ENFORCE_EQ(tensor_number_mark, str_read_tensor_number_mark, + platform::errors::InvalidArgument( + "Tensor number mark does not match, expect mark is " + "[%s], but the mark read from file is [%s].", + tensor_number_mark, str_read_tensor_number_mark)); size_t tensor_number = 0; istre.read(reinterpret_cast(&tensor_number), sizeof(tensor_number)); @@ -79,10 +81,11 @@ std::string ReadTensorName(std::istream& istre) { std::string str_read_tensor_name_mark(name_mark_buffer, tensor_name_mark.size()); - PADDLE_ENFORCE_EQ( - tensor_name_mark, str_read_tensor_name_mark, - "Tensor name mark not match, expect [%s], but read from file is [%]", - tensor_name_mark, str_read_tensor_name_mark); + PADDLE_ENFORCE_EQ(tensor_name_mark, str_read_tensor_name_mark, + platform::errors::InvalidArgument( + "Tensor name mark does not match, expect mark is [%s], " + "but the mark read from file is [%s].", + tensor_name_mark, str_read_tensor_name_mark)); size_t tensor_name_length = 0; istre.read(reinterpret_cast(&tensor_name_length), @@ -117,16 +120,18 @@ bool SaveStaticNameListToDisk( for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); - PADDLE_ENFORCE_NE( - var_ptr, nullptr, - "Variable find error, when save model, can't not find vairable [%s], " - "Please make sure you have run StartUpProgram", - vec_tensor_name_list[i]); + PADDLE_ENFORCE_NOT_NULL( + var_ptr, platform::errors::NotFound("Variable (%s) is not found when " + "saving model, please make sure " + "that exe.run(startup_program) has " + "been executed.", + vec_tensor_name_list[i])); Tensor* tensor = var_ptr->GetMutable(); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, - "Paramter [%s] not initialzed," - "Please make sure you have run StartUpProgram", - vec_tensor_name_list[i]); + platform::errors::PreconditionNotMet( + "Paramter [%s] is not initialzed, please make sure " + "that exe.run(startup_program) has been executed.", + vec_tensor_name_list[i])); map_tensor[vec_tensor_name_list[i]] = tensor; } @@ -145,9 +150,10 @@ bool SaveDygraphVarBaseListToDisk( Tensor* tensor = var_ptr->GetMutable(); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, - "Paramter [%s] not initialzed," - "Please make sure you have run StartUpProgram", - vec_var_base_list[i]->Name()); + platform::errors::PreconditionNotMet( + "Paramter [%s] is not initialzed, please make sure " + "that exe.run(startup_program) has been executed.", + vec_var_base_list[i]->Name())); map_tensor[vec_var_base_list[i]->Name()] = tensor; } @@ -185,34 +191,41 @@ bool LoadStaticNameListFromDisk( for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { auto it = map_load_tensor.find(vec_tensor_name_list[i]); - PADDLE_ENFORCE(it != map_load_tensor.end(), - "Paramete not found in Model file, " - "Can not find [%s] in model file [%s]", - vec_tensor_name_list[i], file_name); + PADDLE_ENFORCE_NE(it, map_load_tensor.end(), + platform::errors::NotFound( + "Parameter (%s) not found in model file (%s).", + vec_tensor_name_list[i], file_name)); auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); - PADDLE_ENFORCE_NE( - var_ptr, nullptr, - "Parameter not created, when load model, can't not find parameter [%s] " - "please make sure you have run StartUpProgram", - vec_tensor_name_list[i]); + PADDLE_ENFORCE_NOT_NULL( + var_ptr, + platform::errors::PreconditionNotMet( + "Parameter (%s) is not created when loading model, " + "please make sure that exe.run(startup_program) has been executed.", + vec_tensor_name_list[i])); Tensor* tensor = var_ptr->GetMutable(); - PADDLE_ENFORCE_NE(tensor, nullptr, - "Paramter [%s] not initialzed " - "please make sure you have run startUpProgram", - vec_tensor_name_list[i]); + PADDLE_ENFORCE_NOT_NULL( + tensor, + platform::errors::PreconditionNotMet( + "Paramter [%s] is not initialzed, " + "please make sure that exe.run(startup_program) has been executed.", + vec_tensor_name_list[i])); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, - "Paramter [%s] not initialzed " - "please make sure you have run StartUpProgram", - vec_tensor_name_list[i]); + platform::errors::PreconditionNotMet( + "Paramter [%s] is not initialzed, " + "please make sure that exe.run(startup_program) has " + "been executed.v", + vec_tensor_name_list[i])); PADDLE_ENFORCE_EQ( tensor->dims(), it->second->dims(), - "Shape not matching: the Program requires a parameter with a shape of " - "(%s), " - "while the loaded parameter (namely [ %s ]) has a shape of (%s).", - tensor->dims(), vec_tensor_name_list[i], it->second->dims()); + platform::errors::InvalidArgument( + "Shape does not match, the program requires a parameter with a " + "shape of " + "(%s), while the loaded parameter (namely [ %s ]) has a shape of " + "(%s).", + tensor->dims(), vec_tensor_name_list[i], it->second->dims())); TensorCopySync(*(it->second.get()), tensor->place(), tensor); @@ -239,9 +252,9 @@ bool SaveTensorToDisk(const std::string& file_name, MkDirRecursively(DirName(file_name).c_str()); std::ofstream fout(file_name, std::ios::binary); - if (!fout) { - PADDLE_THROW("File open error. Can not open file [%s]", file_name); - } + PADDLE_ENFORCE_EQ( + fout.is_open(), true, + platform::errors::Unavailable("File (%s) open failed.", file_name)); // first 256 byte for reserve for fulture upgrade char* kReserveBuffer = new char[model_file_reserve_size]; @@ -292,9 +305,8 @@ bool SaveTensorToDisk(const std::string& file_name, TensorCopySync(*tensor, platform::CPUPlace(), &temp); data_ptr = temp.data(); #else - PADDLE_THROW( - "Tensor is in CUDA device, but paddle not compile with CUDA, this " - "should not happen"); + PADDLE_THROW(platform::errors::Unavailable( + "Tensor is in CUDA device, but paddle not compiled with CUDA.")); #endif } fout.write(static_cast(data_ptr), @@ -302,8 +314,9 @@ bool SaveTensorToDisk(const std::string& file_name, } if (!fout) { - PADDLE_THROW("Model save failed, data write to model file [%s] error", - file_name); + PADDLE_THROW(platform::errors::Unavailable( + "Model save failed, error when writing data into model file [%s].", + file_name)); } fout.close(); @@ -316,9 +329,9 @@ bool LoadTensorFromDisk( std::map>* map_tensor) { std::ifstream fin(file_name, std::ios::binary); - if (!fin) { - PADDLE_THROW("File open error. Can not open model file [%s]", file_name); - } + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("File (%s) open failed.", file_name)); ReadReserveBuffer(fin); @@ -331,7 +344,8 @@ bool LoadTensorFromDisk( uint32_t version; fin.read(reinterpret_cast(&version), sizeof(version)); CheckInStreamState(fin, sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument( + "Only version 0 tensor is supported.")); proto::VarType::TensorDesc desc; { // int32_t size @@ -344,7 +358,7 @@ bool LoadTensorFromDisk( CheckInStreamState(fin, sizeof(size)); PADDLE_ENFORCE_EQ( desc.ParseFromArray(buf.get(), size), true, - platform::errors::InvalidArgument("Cannot parse tensor desc")); + platform::errors::InvalidArgument("Parse tensor desc failed.")); } { // read tensor diff --git a/paddle/fluid/framework/selected_rows.cc b/paddle/fluid/framework/selected_rows.cc index 54a818250b45e593de4110f56e42a04a9ea65e00..1f402ea9dd33626a43a4d03b96256b2c2841c8b4 100644 --- a/paddle/fluid/framework/selected_rows.cc +++ b/paddle/fluid/framework/selected_rows.cc @@ -113,7 +113,9 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows, // the 1st field, unit32_t version for SelectedRows uint32_t version; is.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + PADDLE_ENFORCE_EQ(version, 0U, + platform::errors::InvalidArgument( + "Only version 0 SelectedRows is supported.")); } { // the 2st field, rows information @@ -155,24 +157,27 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown, auto iter = id_to_index_.find(key); if (iter == id_to_index_.end()) { rwlock_->UNLock(); - if (!auto_grown) { - PADDLE_THROW("key %d not found", key); - } + PADDLE_ENFORCE_EQ( + auto_grown, true, + platform::errors::NotFound("Input key(%lld) is not found.", key)); rwlock_->WRLock(); auto map_size = id_to_index_.size(); auto vector_size = rows_.size(); if (map_size != vector_size) { rwlock_->UNLock(); - PADDLE_THROW( - "id_to_index_ size %d should have the same size with rows_ %d", - map_size, vector_size); + PADDLE_THROW(platform::errors::InvalidArgument( + "Row map size(%zu) should be equal to rows size(%zu).", map_size, + vector_size)); } auto write_iter = id_to_index_.find(key); if (write_iter == id_to_index_.end()) { int row_num = rows_.size(); if (row_num == value_->dims()[0]) { rwlock_->UNLock(); - PADDLE_THROW("selected rows is full, then length exceed %d", row_num); + PADDLE_THROW(platform::errors::InvalidArgument( + "Selected rows is full, then length exceed the length of first " + "dimension (%d).", + row_num)); } // key logic to put a key into id_to_index_ rows_.push_back(key); @@ -203,15 +208,20 @@ void SelectedRows::SyncIndex() { void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, bool auto_grown, bool is_test) { - PADDLE_ENFORCE(value->IsInitialized(), - "The value tensor should be initialized."); + PADDLE_ENFORCE_EQ(value->IsInitialized(), true, + platform::errors::InvalidArgument( + "The value tensor is not initialized.")); if (ids.numel() == 0) { VLOG(3) << "keys is empty, please check data!"; } else { int64_t value_width = value_->numel() / value_->dims()[0]; - PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], - "output tensor should have the same shape with table " - "except the dims[0]."); + PADDLE_ENFORCE_EQ( + value_width, value->numel() / value->dims()[0], + platform::errors::InvalidArgument( + "Output tensor should have the same shape with table " + "except the first dimmension, excepted value width not counting " + "the first dimension is %d, actual value width is %d.", + value_width, value->numel() / value->dims()[0])); for (int i = 0; i < ids.numel(); ++i) { auto id = ids.data()[i]; int64_t index = AutoGrownIndex(id, auto_grown, is_test); diff --git a/paddle/fluid/framework/selected_rows.h b/paddle/fluid/framework/selected_rows.h index 5f733139419dbc1769d9eb4efe7e793f8fb2752f..285af1d55302a49cae058fccdd5edd13aa28137e 100644 --- a/paddle/fluid/framework/selected_rows.h +++ b/paddle/fluid/framework/selected_rows.h @@ -82,7 +82,8 @@ class SelectedRows { int64_t Index(int64_t key) const { auto it = std::find(rows_.begin(), rows_.end(), key); if (it == rows_.end()) { - PADDLE_THROW("id %s not in table", key); + PADDLE_THROW(platform::errors::NotFound( + "Input id (%lld) is not in current rows table.", key)); } return static_cast(std::distance(rows_.begin(), it)); } diff --git a/paddle/fluid/framework/shape_inference.cc b/paddle/fluid/framework/shape_inference.cc index 4ac872ac3d3bf918678f5294a4c35097c3fb18ab..f5bb3f68007043ad37ea32e7047c5fc546b80931 100644 --- a/paddle/fluid/framework/shape_inference.cc +++ b/paddle/fluid/framework/shape_inference.cc @@ -25,20 +25,22 @@ namespace framework { std::vector InferShapeContext::GetReaderDims( const std::string &name) const { const std::vector &arg_names = Inputs(name); - PADDLE_ENFORCE_EQ( - arg_names.size(), 1UL, - "Reader input '%s' should hold one element, but now it holds %d", name, - arg_names.size()); + PADDLE_ENFORCE_EQ(arg_names.size(), 1UL, + platform::errors::InvalidArgument( + "Reader input '%s' should hold one element, but now it " + "holds %d elements.", + name, arg_names.size())); return this->GetRepeatedDims(arg_names[0]); } void InferShapeContext::SetReaderDims(const std::string &name, const std::vector &dims) { const std::vector &arg_names = Outputs(name); - PADDLE_ENFORCE_EQ( - arg_names.size(), 1UL, - "Reader output '%s' should hold one element, but now it holds %d", name, - arg_names.size()); + PADDLE_ENFORCE_EQ(arg_names.size(), 1UL, + platform::errors::InvalidArgument( + "Reader output '%s' should hold one element, but now " + "it holds %d elements.", + name, arg_names.size())); return this->SetRepeatedDims(arg_names[0], dims); } diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index c3626c5c9e0506f12ca77aac5086cb18e272a771..0e3d11b9f0257905cbede334afd0ad84ff15cb5c 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -94,9 +94,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place); auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx_place), true, + platform::errors::PreconditionNotMet( + "Context place error, excepted GPUPlace, but actually %s.", + ctx_place)); auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); - PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); + PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, + platform::errors::Unavailable( + "Source place and context place do not match, source " + "place is %s, context place is %s.", + src_gpu_place, ctx_gpu_place)); auto stream = reinterpret_cast(ctx).stream(); memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); @@ -106,9 +114,17 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx_place), true, + platform::errors::PreconditionNotMet( + "Context place error, excepted GPUPlace, but actually %s.", + ctx_place)); auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); - PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); + PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place, + platform::errors::Unavailable( + "Destination place and context place do not match, " + "destination place is %s, context place is %s.", + dst_gpu_place, ctx_gpu_place)); auto stream = reinterpret_cast(ctx).stream(); memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); @@ -164,7 +180,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx_place), true, + platform::errors::PreconditionNotMet( + "Context place error, excepted GPUPlace, but actually %s.", + ctx_place)); auto stream = reinterpret_cast(ctx).stream(); if (platform::is_same_place(src_place, dst_place)) { @@ -180,12 +200,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { - PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place."); + PADDLE_THROW(platform::errors::Unavailable( + "Context place dose not match the source and destination place.")); } } } else { // NOLINT - PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); + PADDLE_THROW(platform::errors::Unimplemented( + "Copying from %s to %s is not supported.", src_place, dst_place)); } #endif } @@ -298,7 +320,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, nullptr); } else { // NOLINT - PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); + PADDLE_THROW(platform::errors::Unimplemented( + "Copy from %s to %s is not supported.", src_place, dst_place)); } #endif } @@ -832,7 +855,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor, void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst, const platform::Place& dst_place) { // vector types not currently supported - PADDLE_ENFORCE_LE(type.lanes, 1, "vector types not currently supported"); + PADDLE_ENFORCE_LE(type.lanes, 1, + platform::errors::Unimplemented( + "Vector type is not supported currently.")); switch (type.bits) { case 8: @@ -840,32 +865,37 @@ void* GetDstPtrByDLDataType(DLDataType type, framework::Tensor* dst, return static_cast(dst->mutable_data(dst_place)); if (type.code == kDLUInt) return static_cast(dst->mutable_data(dst_place)); - PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", - type.code, type.bits); + PADDLE_THROW(platform::errors::Unimplemented( + "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.", + type.code, type.bits)); case 16: if (type.code == kDLInt) return static_cast(dst->mutable_data(dst_place)); if (type.code == kDLFloat) return static_cast( dst->mutable_data(dst_place)); - PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", - type.code, type.bits); + PADDLE_THROW(platform::errors::Unimplemented( + "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.", + type.code, type.bits)); case 32: if (type.code == kDLInt) return static_cast(dst->mutable_data(dst_place)); if (type.code == kDLFloat) return static_cast(dst->mutable_data(dst_place)); - PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", - type.code, type.bits); + PADDLE_THROW(platform::errors::Unimplemented( + "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.", + type.code, type.bits)); case 64: if (type.code == kDLInt) return static_cast(dst->mutable_data(dst_place)); if (type.code == kDLFloat) return static_cast(dst->mutable_data(dst_place)); - PADDLE_THROW("There is no this type.code <%d> when type.bits is <%d>.", - type.code, type.bits); + PADDLE_THROW(platform::errors::Unimplemented( + "DLDataType code <%d> is illegal when DLDataType.bits is <%d>.", + type.code, type.bits)); default: - PADDLE_THROW("Unsupport type.bits %d", type.bits); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported DLDataType.bits %d.", type.bits)); } } diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index fce0142b41d3ae9b2a6fcd4f16d38b0492fbd806..a0408dbc3dbb4ffca70ef322d93b662f1b953f7b 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -183,7 +183,11 @@ void TensorToVector(const Tensor& src, std::vector* dst) { dst->resize(src.numel()); auto dst_ptr = static_cast(dst->data()); - PADDLE_ENFORCE_EQ(platform::is_cpu_place(src.place()), true); + PADDLE_ENFORCE_EQ( + platform::is_cpu_place(src.place()), true, + platform::errors::InvalidArgument( + "The input tensor should be CPU device, but actually it is in %s.", + src.place())); memory::Copy(dst_place, dst_ptr, BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size); diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index d6d0371edaa78cde603a7f7d77473682be57df31..be7d6ab868022b5e9e1f073aad441decba0dbf00 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -27,8 +27,9 @@ Analyzer::Analyzer() {} void Analyzer::Run(Argument *argument) { RunAnalysis(argument); } void Analyzer::RunAnalysis(Argument *argument) { - PADDLE_ENFORCE(argument->analysis_passes_valid(), - "analsis_passes is not valid in the argument."); + PADDLE_ENFORCE_EQ(argument->analysis_passes_valid(), true, + platform::errors::InvalidArgument( + "analsis_passes is not valid in the argument.")); const bool disable_logs = argument->disable_logs(); for (auto &pass : argument->analysis_passes()) { if (!disable_logs) { @@ -38,7 +39,8 @@ void Analyzer::RunAnalysis(Argument *argument) { continue; auto *ptr = PassRegistry::Global().Retreive(pass); - PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass); + PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet( + "no analysis pass called %s", pass)); ptr->Run(argument); } } diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 79784fcb9bf31e8fac972053b1a4ec6180d45afa..135ef6a970621cea6ee1418f751ffc37406628db 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -75,9 +75,14 @@ void TestWord2vecPrediction(const std::string& model_path) { std::vector outputs; CHECK(predictor->Run(slots, &outputs)); - PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + PADDLE_ENFORCE_EQ(outputs.size(), 1UL, + platform::errors::PreconditionNotMet( + "Output size should be 1, but got %d", outputs.size())); // Check the output buffer size and result of each tid. - PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL); + PADDLE_ENFORCE_EQ(outputs.front().data.length(), 33168UL, + platform::errors::PreconditionNotMet( + "Output's data length should be 33168 but got %d", + outputs.front().data.length())); float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 8d28b8ace26ae51b8fb6b3dcb240c08b1686b143..40ca3e85868fbbba19d81336aed1a8cbee58ec54 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -76,53 +76,62 @@ struct Argument { } } -#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ - public: \ - type__& field__() { \ - PADDLE_ENFORCE(Has(#field__), "There is no such field"); \ - return field__##_; \ - } \ - void Set##Field(const type__& x) { \ - field__##_ = x; \ - valid_fields_.insert(#field__); \ - } \ - DECL_ARGUMENT_FIELD_VALID(field__); \ - type__* field__##_ptr() { return &field__##_; } \ - \ - private: \ +#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ + public: \ + type__& field__() { \ + PADDLE_ENFORCE_EQ( \ + Has(#field__), true, \ + platform::errors::PreconditionNotMet("There is no such field")); \ + return field__##_; \ + } \ + void Set##Field(const type__& x) { \ + field__##_ = x; \ + valid_fields_.insert(#field__); \ + } \ + DECL_ARGUMENT_FIELD_VALID(field__); \ + type__* field__##_ptr() { return &field__##_; } \ + \ + private: \ type__ field__##_; #define DECL_ARGUMENT_FIELD_VALID(field__) \ bool field__##_valid() { return Has(#field__); } -#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \ - public: \ - type__& field__() { \ - PADDLE_ENFORCE_NOT_NULL(field__##_); \ - PADDLE_ENFORCE(Has(#field__)); \ - return *static_cast(field__##_.get()); \ - } \ - void Set##Field(type__* x) { \ - field__##_ = \ - unique_ptr_t(x, [](void* x) { delete static_cast(x); }); \ - valid_fields_.insert(#field__); \ - } \ - void Set##Field##NotOwned(type__* x) { \ - valid_fields_.insert(#field__); \ - field__##_ = unique_ptr_t(x, [](void* x) {}); \ - } \ - DECL_ARGUMENT_FIELD_VALID(field__); \ - type__* field__##_ptr() { \ - PADDLE_ENFORCE(Has(#field__)); \ - return static_cast(field__##_.get()); \ - } \ - type__* Release##Field() { \ - PADDLE_ENFORCE(Has(#field__)); \ - valid_fields_.erase(#field__); \ - return static_cast(field__##_.release()); \ - } \ - \ - private: \ +#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \ + public: \ + type__& field__() { \ + PADDLE_ENFORCE_NOT_NULL(field__##_, platform::errors::PreconditionNotMet( \ + "filed should not be null.")); \ + PADDLE_ENFORCE_EQ( \ + Has(#field__), true, \ + platform::errors::PreconditionNotMet("There is no such field")); \ + return *static_cast(field__##_.get()); \ + } \ + void Set##Field(type__* x) { \ + field__##_ = \ + unique_ptr_t(x, [](void* x) { delete static_cast(x); }); \ + valid_fields_.insert(#field__); \ + } \ + void Set##Field##NotOwned(type__* x) { \ + valid_fields_.insert(#field__); \ + field__##_ = unique_ptr_t(x, [](void* x) {}); \ + } \ + DECL_ARGUMENT_FIELD_VALID(field__); \ + type__* field__##_ptr() { \ + PADDLE_ENFORCE_EQ( \ + Has(#field__), true, \ + platform::errors::PreconditionNotMet("There is no such field")); \ + return static_cast(field__##_.get()); \ + } \ + type__* Release##Field() { \ + PADDLE_ENFORCE_EQ( \ + Has(#field__), true, \ + platform::errors::PreconditionNotMet("There is no such field")); \ + valid_fields_.erase(#field__); \ + return static_cast(field__##_.release()); \ + } \ + \ + private: \ unique_ptr_t field__##_; DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); @@ -227,8 +236,10 @@ struct Argument { }; #define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \ - PADDLE_ENFORCE(argument__->Has(#fieldname__), \ - "the argument field [%s] should be set", #fieldname__); + PADDLE_ENFORCE_EQ( \ + argument__->Has(#fieldname__), true, \ + platform::errors::PreconditionNotMet( \ + "the argument field [%s] should be set", #fieldname__)); } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index a48058400241b030f17557156a4d973fca92fd8d..730fe35853a96a3427c26f1fa5662118a638f731 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -73,12 +73,15 @@ struct DataTypeNamer { template const std::string &repr() const { auto x = std::type_index(typeid(T)); - PADDLE_ENFORCE(dic_.count(x), "unknown type for representation"); + PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet( + "unknown type for representation")); return dic_.at(x); } const std::string &repr(const std::type_index &type) const { // NOLINT - PADDLE_ENFORCE(dic_.count(type), "unknown type for representation"); + PADDLE_ENFORCE_GT(dic_.count(type), 0, + platform::errors::PreconditionNotMet( + "unknown type for representation")); return dic_.at(type); } @@ -116,7 +119,9 @@ template class OrderedRegistry { public: T *Register(const std::string &name, T *x) { - PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name); + PADDLE_ENFORCE_EQ(dic_.count(name), 0, + platform::errors::PreconditionNotMet( + "There exists duplicate key [%s]", name)); dic_[name] = elements_.size(); elements_.emplace_back(std::unique_ptr(x)); return elements_.back().get(); @@ -136,14 +141,20 @@ class OrderedRegistry { template T &GetFromScope(const framework::Scope &scope, const std::string &name) { framework::Variable *var = scope.FindVar(name); - PADDLE_ENFORCE(var != nullptr); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::PreconditionNotMet( + "The var which name is %s should not be nullptr.", name)); return *var->GetMutable(); } static framework::proto::ProgramDesc LoadProgramDesc( const std::string &model_path) { std::ifstream fin(model_path, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(fin.is_open(), "Cannot open file %s", model_path); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::NotFound( + "Cannot open file %s, please confirm whether the file exists", + model_path)); fin.seekg(0, std::ios::end); std::string buffer(fin.tellg(), ' '); fin.seekg(0, std::ios::beg); @@ -188,10 +199,12 @@ static std::string GetDirRoot(const std::string &path) { static std::string GetOrCreateModelOptCacheDir(const std::string &model_root) { std::string opt_cache_dir = model_root + "/_opt_cache/"; if (!PathExists(opt_cache_dir)) { - PADDLE_ENFORCE(MKDIR(opt_cache_dir.c_str()) != -1, - "Can not create optimize cache directory: %s, Make sure you " - "have permission to write", - opt_cache_dir); + PADDLE_ENFORCE_NE( + MKDIR(opt_cache_dir.c_str()), -1, + platform::errors::PreconditionNotMet( + "Can not create optimize cache directory: %s, Make sure you " + "have permission to write", + opt_cache_dir)); } return opt_cache_dir; } diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index d52d71f148c36fa456aaa703c0df2dbccd901205..d136f5033e7e3783ec6c44bbacb94047c718b935 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -38,7 +38,9 @@ IRPassManager::IRPassManager(Argument *argument) { graph_ = std::unique_ptr(new Graph(argument->main_program())); if (argument->Has("scope")) { auto *scope_ptr = argument->scope_ptr(); - PADDLE_ENFORCE(scope_ptr); + PADDLE_ENFORCE_NOT_NULL(scope_ptr, + platform::errors::PreconditionNotMet( + "The scope ptr should not be nullptr.")); graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); } @@ -101,13 +103,17 @@ void IRPassManager::CreatePasses(Argument *argument, std::string optim_cache_dir = argument->optim_cache_dir(); bool int8_valid = !(model_from_memory && optim_cache_dir.empty() && enable_int8); - PADDLE_ENFORCE(int8_valid, - "When you are in TRT INT8 mode, and load model from " - "memory, you should set optim_cache_dir using " - "config.SetOptimCacheDir()"); - PADDLE_ENFORCE(!(model_from_memory && use_static_engine), - "When you are using Paddle-TRT, and also using load model " - "from memory, you should set the use_static to false."); + PADDLE_ENFORCE_EQ( + int8_valid, true, + platform::errors::PreconditionNotMet( + "When you are in TRT INT8 mode, and load model from " + "memory, you should set optim_cache_dir using " + "config.SetOptimCacheDir()")); + PADDLE_ENFORCE_EQ( + !(model_from_memory && use_static_engine), true, + platform::errors::PreconditionNotMet( + "When you are using Paddle-TRT, and also using load model " + "from memory, you should set the use_static to false.")); if (!optim_cache_dir.empty()) { pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir)); diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc index b3bfafb0a116018fe2d624f390f355b348e3f847..ebb19fd486cc89c69d70de3fa98954b9ee415f1a 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc @@ -123,7 +123,9 @@ void RenameAndGetOutputs( auto add_block_var = [&](const std::string &graph_arg, const std::string &block_arg) { auto arg_var_node = graph_var_map.find(graph_arg); - PADDLE_ENFORCE(arg_var_node != graph_var_map.end()); + PADDLE_ENFORCE_NE(arg_var_node, graph_var_map.end(), + platform::errors::InvalidArgument( + "Can not find %s in graph_var_map", graph_arg)); auto *var_t = block_desc->Var(block_arg); var_t->SetShape(arg_var_node->second->Var()->GetShape()); var_t->SetDataType(arg_var_node->second->Var()->GetDataType()); @@ -133,7 +135,10 @@ void RenameAndGetOutputs( framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); framework::OpDesc op_desc(*op, nullptr); auto correspond_node = subgraph_nodes[index]; - PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); + PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type(), + platform::errors::PreconditionNotMet( + "We should get %s, but get %s", op->type(), + correspond_node->Name())); std::unordered_map var2id; std::unordered_map in_vars; diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 7ef072277fb7f1f13c14b38d64cea6d1f4584b76..46612c1c5b7065a1f87e09117818df8a15e2bd8b 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -97,7 +97,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp( std::vector *repetitive_params) const { auto *op_desc = node->Op(); auto &subgraph = *framework::ir::Agent(node).subgraph(); - PADDLE_ENFORCE(!subgraph.empty()); + PADDLE_ENFORCE_EQ(subgraph.empty(), false, + platform::errors::PreconditionNotMet( + "The subgraph should not be empty.")); framework::ProgramDesc *program_desc = Get("program"); @@ -194,12 +196,17 @@ void TensorRtSubgraphPass::CreateTensorRTOp( // to Tensor. std::vector output_mapping; for (auto name : output_names) { - PADDLE_ENFORCE(output_name_map.count(name) != 0); + PADDLE_ENFORCE_NE(output_name_map.count(name), 0, + platform::errors::PreconditionNotMet( + "The output_name_map should have %s", name)); output_mapping.push_back(output_name_map[name]); } - PADDLE_ENFORCE(!output_mapping.empty()); - PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), - "the block has no var-desc"); + PADDLE_ENFORCE_EQ(output_mapping.empty(), false, + platform::errors::PreconditionNotMet( + "The output_mapping should not be empty.")); + PADDLE_ENFORCE_EQ( + !block_desc.Proto()->vars().empty(), true, + platform::errors::PreconditionNotMet("the block has no var-desc")); // Set attrs op_desc->SetType("tensorrt_engine"); diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc index d986811a827b6ed477b30bc43d26f52a71e8f178..34192965297a6b88c7905a2b1d7b1857d842f06a 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" +#include +#include #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h" @@ -31,7 +33,10 @@ void IrAnalysisPass::RunImpl(Argument* argument) { // Apply passes. IRPassManager the_ir_manager(argument); graph = the_ir_manager.Apply(std::move(graph)); - PADDLE_ENFORCE_GT(graph->Nodes().size(), 0); + PADDLE_ENFORCE_GT( + graph->Nodes().size(), 0, + platform::errors::PreconditionNotMet( + "The graph nodes size should be greater than 0, but got 0")); argument->SetMainGraph(graph.release()); CollectFusionStatis(argument); } diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index 970ecdbbeb0c4c12ce6ba928a74a14ca1ae183ca..188b2ff851d96fa76edd666c696d98ddb1dcb948 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -31,7 +31,9 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { if (!argument->scope_valid()) { argument->SetScope(new framework::Scope); } - PADDLE_ENFORCE(argument->use_gpu_valid()); + PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true, + platform::errors::PreconditionNotMet( + "The use_gpu field should be valid")); // The load program should run on the same device with the inference program, // so that the parameters will on the same device, or they will keep copying @@ -51,14 +53,17 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { argument->model_from_memory_valid() && argument->model_from_memory()); argument->SetMainProgram(program.release()); } else { - PADDLE_THROW( - "either model_dir or (program path and parameter path) should be set."); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "either model_dir or (program path and parameter path) should be " + "set.")); } auto graph = std::unique_ptr(new Graph(argument->main_program())); argument->SetMainGraph(graph.release()); auto *scope_ptr = argument->scope_ptr(); - PADDLE_ENFORCE(scope_ptr); + PADDLE_ENFORCE_NOT_NULL(scope_ptr, + platform::errors::PreconditionNotMet( + "The scope ptr should not be nullptr.")); argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); } diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc index 1f888a28da0416b41a87b551208fbe109f54d844..c30aa2a1629c3638b1e7714f7d646c924e7156d7 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc @@ -31,7 +31,8 @@ void IrInferCleanGraphPass::RunImpl(Argument* argument) { std::unordered_set invalid_nodes; int valid_op = 0; for (auto* node : graph.Nodes()) { - PADDLE_ENFORCE_NOT_NULL(node); + PADDLE_ENFORCE_NOT_NULL(node, platform::errors::PreconditionNotMet( + "The node should not be nullptr.")); if (is_valid_node(node)) { invalid_nodes.insert(node); } else if (node->IsOp()) { diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index fedee3ff95f0ffe7af730c7113dbe6ea33c118e5..f127478b5f2bf4bbc3157c3d825d9b042275d957 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -23,8 +23,12 @@ namespace inference { namespace analysis { void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { - PADDLE_ENFORCE(argument->scope_valid()); - PADDLE_ENFORCE(argument->use_gpu_valid()); + PADDLE_ENFORCE_EQ( + argument->scope_valid(), true, + platform::errors::PreconditionNotMet("The scope field should be valid")); + PADDLE_ENFORCE_EQ(argument->use_gpu_valid(), true, + platform::errors::PreconditionNotMet( + "The use_gpu field should be valid")); platform::Place place; @@ -40,7 +44,9 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { LOG(INFO) << "Sync params from CPU to GPU"; - PADDLE_ENFORCE(argument->gpu_device_id_valid()); + PADDLE_ENFORCE_EQ(argument->gpu_device_id_valid(), true, + platform::errors::PreconditionNotMet( + "The gpu_device_id field should be valid")); place = platform::CUDAPlace(argument->gpu_device_id()); auto *scope = argument->scope_ptr(); @@ -56,7 +62,8 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { continue; } auto *var = scope->FindLocalVar(var_name); - PADDLE_ENFORCE(var != nullptr); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet( + "The var should not be nullptr")); if (var->IsType() || var->IsType()) { auto *t = var->GetMutable(); diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 9eb8478515727cf04f9d16e9a38a8f4c3ec9c683..f432188131eddc402e696091ab3723697216aadf 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -224,7 +224,9 @@ void UpdateOpDescsByReuse( // modify the graph for (auto input_node : node->inputs) { - PADDLE_ENFORCE(input_node->IsVar()); + PADDLE_ENFORCE_EQ(input_node->IsVar(), true, + platform::errors::PreconditionNotMet( + "The input node should be a variable.")); std::string input_node_name = input_node->Name(); if (reuse_table.count(input_node_name) && reuse_table.at(input_node_name) != input_node_name) { @@ -246,7 +248,9 @@ void UpdateOpDescsByReuse( // modify the graph for (auto out_node : node->outputs) { - PADDLE_ENFORCE(out_node->IsVar()); + PADDLE_ENFORCE_EQ(out_node->IsVar(), true, + platform::errors::PreconditionNotMet( + "The output node should be a variable.")); std::string out_node_name = out_node->Name(); if (reuse_table.count(out_node_name) && reuse_table.at(out_node_name) != out_node_name) { diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 9fbc97d55090345af3b3b12bcd138bfaecd346cc..2184574aa1fe3c66728b41f221c1b0bf5fd464e7 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -230,7 +230,8 @@ void AnalysisConfig::EnableMkldnnBfloat16() { MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, - "MkldnnQuantizer was not enabled yet."); + platform::errors::PreconditionNotMet( + "MkldnnQuantizer was not enabled yet.")); return mkldnn_quantizer_config_.get(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 64dfdda54aceefef1d89ccb2e3a917ad47c53966..ac914700643af2e7e8eca5dcf0bdf8de88e320d6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -169,7 +169,8 @@ bool AnalysisPredictor::PrepareScope( if (parent_scope) { PADDLE_ENFORCE_NOT_NULL( parent_scope, - "Both program and parent_scope should be set in Clone mode."); + platform::errors::PreconditionNotMet( + "Both program and parent_scope should be set in Clone mode.")); scope_ = parent_scope; status_is_cloned_ = true; } else { @@ -235,7 +236,9 @@ bool AnalysisPredictor::PrepareExecutor() { executor_->Prepare(sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_); - PADDLE_ENFORCE_NOT_NULL(sub_scope_); + PADDLE_ENFORCE_NOT_NULL(sub_scope_, + platform::errors::PreconditionNotMet( + "The sub_scope should not be nullptr.")); return true; } @@ -297,7 +300,8 @@ bool AnalysisPredictor::Run(const std::vector &inputs, timer.tic(); // set feed variable framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get(); - PADDLE_ENFORCE_NOT_NULL(scope, "The scope should not be nullptr."); + PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::PreconditionNotMet( + "The scope should not be nullptr.")); if (!SetFeed(inputs, scope)) { LOG(ERROR) << "fail to set feed"; return false; @@ -399,7 +403,11 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, outputs->resize(fetches_.size()); for (size_t i = 0; i < fetches_.size(); ++i) { int idx = BOOST_GET_CONST(int, fetches_[i]->GetAttr("col")); - PADDLE_ENFORCE((size_t)idx == i); + PADDLE_ENFORCE_EQ( + static_cast(idx), i, + platform::errors::InvalidArgument( + "Fetch op's col attr(%d) should be equal to the index(%d)", idx, + i)); framework::FetchType &fetch_var = framework::GetFetchVariable(*scope, "fetch", idx); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); @@ -435,10 +443,12 @@ void AnalysisPredictor::PrepareArgument() { if (!config_.model_dir().empty()) { argument_.SetModelDir(config_.model_dir()); } else { - PADDLE_ENFORCE( - !config_.params_file().empty(), - "Either model_dir or (param_file, prog_file) should be set."); - PADDLE_ENFORCE(!config_.prog_file().empty()); + PADDLE_ENFORCE_EQ(config_.params_file().empty(), false, + platform::errors::PreconditionNotMet( + "Either model_dir or param_file should be set.")); + PADDLE_ENFORCE_EQ(config_.prog_file().empty(), false, + platform::errors::PreconditionNotMet( + "Either model_dir or prog_file should be set.")); std::string dir = inference::analysis::GetDirRoot(config_.prog_file()); argument_.SetModelProgramPath(config_.prog_file()); @@ -503,7 +513,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { PrepareArgument(); Analyzer().Run(&argument_); - PADDLE_ENFORCE(argument_.scope_valid()); + PADDLE_ENFORCE_EQ( + argument_.scope_valid(), true, + platform::errors::InvalidArgument("The argument scope should be valid.")); VLOG(5) << "to prepare executor"; ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); inference_program_.reset( @@ -525,8 +537,10 @@ std::unique_ptr CreatePaddlePredictor< FLAGS_minloglevel = 2; // GLOG_ERROR } VLOG(3) << "create AnalysisConfig"; - PADDLE_ENFORCE(config.is_valid(), - "Note: Each config can only be used for one predictor."); + PADDLE_ENFORCE_EQ( + config.is_valid(), true, + platform::errors::InvalidArgument( + "Note: Each config can only be used for one predictor.")); if (config.use_gpu()) { static std::once_flag gflags_initialized; @@ -623,7 +637,9 @@ bool AnalysisPredictor::MkldnnQuantize() { } void AnalysisPredictor::PrepareFeedFetch() { - PADDLE_ENFORCE_NOT_NULL(sub_scope_); + PADDLE_ENFORCE_NOT_NULL(sub_scope_, + platform::errors::InvalidArgument( + "The sub_scope should not be nullptr.")); CreateFeedFetchVar(sub_scope_); for (auto *op : inference_program_->Block(0).AllOps()) { if (op->Type() == "feed") { @@ -646,7 +662,8 @@ void AnalysisPredictor::PrepareFeedFetch() { } void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) { - PADDLE_ENFORCE_NOT_NULL(scope); + PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument( + "The scope should not be nullptr.")); auto *var = scope->Var("feed"); var->GetMutable(); var = scope->Var("fetch"); @@ -667,7 +684,8 @@ AnalysisPredictor::GetInputTensorShape() { std::vector names = GetInputNames(); for (std::string name : names) { auto *var = inference_program_->Block(0).FindVar(name); - PADDLE_ENFORCE_NOT_NULL(var, "input %s does not exist.", name); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet( + "Input %s does not exist.", name)); input_shapes[name] = var->GetShape(); } return input_shapes; @@ -683,7 +701,11 @@ std::vector AnalysisPredictor::GetOutputNames() { std::unique_ptr AnalysisPredictor::GetInputTensor( const std::string &name) { - PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); + PADDLE_ENFORCE_NOT_NULL( + executor_->scope()->FindVar(name), + platform::errors::PreconditionNotMet( + "The variable named %s is not found in the scope of the exector.", + name)); std::unique_ptr res( new ZeroCopyTensor(static_cast(executor_->scope()))); res->input_or_output_ = true; @@ -700,7 +722,11 @@ std::unique_ptr AnalysisPredictor::GetInputTensor( std::unique_ptr AnalysisPredictor::GetOutputTensor( const std::string &name) { - PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); + PADDLE_ENFORCE_NOT_NULL( + executor_->scope()->FindVar(name), + platform::errors::PreconditionNotMet( + "he variable named %s is not found in the scope of the exector.", + name)); std::unique_ptr res( new ZeroCopyTensor(static_cast(executor_->scope()))); res->input_or_output_ = false; @@ -761,8 +787,11 @@ bool AnalysisPredictor::LoadProgramDesc() { std::string pb_content; // Read binary std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin.is_open()), "Cannot open file %s", - filename); + PADDLE_ENFORCE_EQ( + static_cast(fin.is_open()), true, + platform::errors::NotFound( + "Cannot open file %s, please confirm whether the file is normal.", + filename)); fin.seekg(0, std::ios::end); pb_content.resize(fin.tellg()); fin.seekg(0, std::ios::beg); @@ -779,7 +808,8 @@ bool AnalysisPredictor::LoadProgramDesc() { bool AnalysisPredictor::LoadParameters() { PADDLE_ENFORCE_NOT_NULL(inference_program_.get(), - "The inference program should be loaded first."); + platform::errors::PreconditionNotMet( + "The inference program should be loaded first.")); const auto &global_block = inference_program_->MutableBlock(0); @@ -855,8 +885,9 @@ void AnalysisPredictor::ClearIntermediateTensor() { #if PADDLE_WITH_TENSORRT bool AnalysisPredictor::SaveTrtCalibToDisk() { - PADDLE_ENFORCE(config_.tensorrt_engine_enabled(), - "This func can be invoked only in trt mode"); + PADDLE_ENFORCE_EQ(config_.tensorrt_engine_enabled(), true, + platform::errors::PreconditionNotMet( + "This func can be invoked only in trt mode")); auto &block = inference_program_->Block(0); for (auto &op_desc : block.AllOps()) { if (op_desc->Type() == "tensorrt_engine") { diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index 2f608da531f25e1a5665744f7e9a2968cc9d0d64..840541246aff4d6f5dec1d8b3f8e5892bdcb6e9d 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -62,9 +62,9 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) { if (other.length() && other.data()) memcpy(data_, other.data(), other.length()); else if (other.length()) - PADDLE_THROW( + PADDLE_THROW(platform::errors::InvalidArgument( "Invalid argument, null pointer data with length %u is passed", - other.length()); + other.length())); length_ = other.length(); memory_owned_ = true; @@ -92,7 +92,8 @@ void PaddleBuf::Resize(size_t length) { length_ = length; memory_owned_ = true; } else { - PADDLE_THROW("The memory is allocated externally, can not Resized"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "The memory is allocated externally, can not Resized")); } } @@ -105,7 +106,11 @@ void PaddleBuf::Reset(void *data, size_t length) { void PaddleBuf::Free() { if (memory_owned_ && data_) { - PADDLE_ENFORCE_GT(length_, 0UL); + PADDLE_ENFORCE_GT( + length_, 0UL, + platform::errors::PreconditionNotMet( + "The memory used in PaddleBuf %d should be greater than 0", + length_)); delete[] static_cast(data_); data_ = nullptr; length_ = 0; diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 07d6dcf86e9814e5bfc932d8320b549d55fe88ae..ca0a5148f0622a8c848cb18afb94f600a547bbfe 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -87,7 +87,9 @@ bool NativePaddlePredictor::Init( if (parent_scope) { scope_ = parent_scope; sub_scope_ = &(parent_scope->NewScope()); - PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail"); + PADDLE_ENFORCE_NOT_NULL(sub_scope_, + platform::errors::PreconditionNotMet( + "The sub_scope should not be nullptr.")); } else { paddle::framework::InitDevices(false); scope_.reset(new paddle::framework::Scope()); @@ -182,7 +184,10 @@ std::unique_ptr NativePaddlePredictor::Clone() { std::unique_ptr cls(new NativePaddlePredictor(config_)); // Hot fix the bug that result diff in multi-thread. // TODO(Superjomn) re-implement a real clone here. - PADDLE_ENFORCE_NOT_NULL(dynamic_cast(cls.get())); + PADDLE_ENFORCE_NOT_NULL( + dynamic_cast(cls.get()), + platform::errors::PreconditionNotMet( + "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed")); if (!dynamic_cast(cls.get())->Init(nullptr)) { LOG(ERROR) << "fail to call Init"; return nullptr; @@ -224,8 +229,13 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, return false; } - PADDLE_ENFORCE_NOT_NULL(input_ptr); - PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data()); + PADDLE_ENFORCE_NOT_NULL(input_ptr, + platform::errors::InvalidArgument( + "The input_ptr should not be nullptr.")); + PADDLE_ENFORCE_NOT_NULL( + inputs[i].data.data(), + platform::errors::InvalidArgument( + "The data of input tensor should not be null.")); if (platform::is_cpu_place(place_)) { // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. std::memcpy(static_cast(input_ptr), inputs[i].data.data(), @@ -241,7 +251,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, platform::CPUPlace(), inputs[i].data.data(), inputs[i].data.length(), dev_ctx->stream()); #else - PADDLE_THROW("Not compile with CUDA, should not reach here."); + PADDLE_THROW(platform::errors::Unavailable( + "Not compile with CUDA, should not reach here.")); #endif } @@ -287,7 +298,11 @@ bool NativePaddlePredictor::GetFetch(std::vector *outputs, outputs->resize(fetchs_.size()); for (size_t i = 0; i < fetchs_.size(); ++i) { int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col")); - PADDLE_ENFORCE((size_t)idx == i); + PADDLE_ENFORCE_EQ( + static_cast(idx), i, + platform::errors::InvalidArgument( + "Fetch op's col attr(%d) should be equal to the index(%d)", idx, + i)); framework::FetchType &fetch_var = framework::GetFetchVariable(*scope, "fetch", idx); auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var); @@ -318,10 +333,15 @@ std::unique_ptr CreatePaddlePredictor< VLOG(3) << "create NativePaddlePredictor"; if (config.use_gpu) { // 1. GPU memory - PADDLE_ENFORCE_GE( - config.fraction_of_gpu_memory, 0.f, - "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); - PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); + PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f, + platform::errors::InvalidArgument( + "fraction_of_gpu_memory in the config should be set " + "to range (0., 1.]")); + PADDLE_ENFORCE_GE(config.device, 0, + platform::errors::PreconditionNotMet( + "Invalid device id %d, the device id should be " + "greater than or equal to 0.", + config.device)); std::vector flags; if (config.fraction_of_gpu_memory >= 0.0f || config.fraction_of_gpu_memory <= 0.95f) { @@ -336,7 +356,9 @@ std::unique_ptr CreatePaddlePredictor< std::unique_ptr predictor(new NativePaddlePredictor(config)); PADDLE_ENFORCE_NOT_NULL( - dynamic_cast(predictor.get())); + dynamic_cast(predictor.get()), + platform::errors::PreconditionNotMet( + "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed")); if (!dynamic_cast(predictor.get())->Init(nullptr)) { return nullptr; } diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index cddb0c8daf97b2b8142fcc3b207be2c56a43988a..014985661fd927debb48c699a157c0e05265842c 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -112,16 +112,19 @@ static T convert(const std::string &item, std::string message = "invalid_argument exception when try to convert : " + item; LOG(ERROR) << message; - PADDLE_THROW(message); + PADDLE_THROW(platform::errors::InvalidArgument( + "invalid_argument exception when try to convert %s.", item)); } catch (std::out_of_range &e) { std::string message = "out_of_range exception when try to convert : " + item; LOG(ERROR) << message; - PADDLE_THROW(message); + PADDLE_THROW(platform::errors::InvalidArgument( + "out_of_range exception when try to convert %s.", item)); } catch (...) { std::string message = "unexpected exception when try to convert " + item; LOG(ERROR) << message; - PADDLE_THROW(message); + PADDLE_THROW(platform::errors::InvalidArgument( + "unexpected exception when try to convert %s.", item)); } return res; } @@ -353,7 +356,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, double batch_latency, int epoch = 1, const framework::proto::VarType::Type data_type = framework::proto::VarType::FP32) { - PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size."); + PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument( + "Non-positive batch size.")); double sample_latency = batch_latency / batch_size; LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid << " ======"; diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 9be12ff309acff681da75f7f13e317a408a9552a..793fc53d90b768050572a3dd0a080a5d30e959a2 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -62,9 +62,12 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { if (scales_.find(var_name) != scales_.end()) continue; auto* var = predictor_.sub_scope_->FindVar(var_name); - PADDLE_ENFORCE(var, "%s is not in the scope", var_name); - PADDLE_ENFORCE(var->IsType(), - "Only support lod tensor now."); + PADDLE_ENFORCE_NOT_NULL(var, + platform::errors::PreconditionNotMet( + "%s is not in the scope", var_name)); + PADDLE_ENFORCE_EQ(var->IsType(), true, + platform::errors::PreconditionNotMet( + "Only support lod tensor now.")); LoDTensor* var_tensor = var->GetMutable(); // force unsigned type if already know it @@ -82,9 +85,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { } else if (op->Type() == "transpose2" || op->Type() == "reshape2" || op->Type() == "pool2d") { auto input_var_name = op->Input("X")[0]; - PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(), - "Input scales must be calculated before the " - "output scales to infer if output is unsigned."); + PADDLE_ENFORCE_NE( + scales_.find(input_var_name), scales_.end(), + platform::errors::PreconditionNotMet( + "Input scales must be calculated before the " + "output scales to infer if output is unsigned.")); if (scales_.find(input_var_name) != scales_.end()) { scales_[var_name] = scales_[input_var_name]; } @@ -94,10 +99,11 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { is_unsigned = true; double min_scale = std::numeric_limits::max(); for (auto input_var_name : op->Input("X")) { - PADDLE_ENFORCE( - scales_.find(input_var_name) != scales_.end(), - "Input scales must be calculated before the " - "output scales to infer if output is unsigned."); + PADDLE_ENFORCE_NE( + scales_.find(input_var_name), scales_.end(), + platform::errors::PreconditionNotMet( + "Input scales must be calculated before the " + "output scales to infer if output is unsigned.")); is_unsigned = is_unsigned && scales_[input_var_name].first; min_scale = std::min( min_scale, @@ -132,11 +138,12 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale( auto rule = qconfig_->scale_algo(op_type_name, conn_name); if (rule == ScaleAlgo::NONE) return; - PADDLE_ENFORCE( - var_tensor.numel() > 0, - "MkldnnQuantizer: LoDTensor of variable %s for quantization of op " - "%s of connection %s should not be empty.", - var_name, op_type_name, conn_name); + PADDLE_ENFORCE_GT( + var_tensor.numel(), 0, + platform::errors::InvalidArgument( + "MkldnnQuantizer: LoDTensor of variable %s for quantization of op " + "%s of connection %s should not be empty.", + var_name, op_type_name, conn_name)); switch (rule) { case ScaleAlgo::MAX: @@ -205,10 +212,11 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor( float min_val = eigen_tensor.minCoeff(); bool is_positive = min_val >= 0.0f; if (is_unsigned) - PADDLE_ENFORCE( - is_positive, - "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", - min_val); + PADDLE_ENFORCE_EQ( + is_positive, true, + platform::errors::InvalidArgument( + "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", + min_val)); int num_quantized_bins = 255; @@ -316,10 +324,11 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( float max_abs = eigen_tensor.abs().maxCoeff(); float min_val = eigen_tensor.minCoeff(); if (is_unsigned) - PADDLE_ENFORCE( - min_val >= 0.0f, - "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", - min_val); + PADDLE_ENFORCE_GE( + min_val, 0.0f, + platform::errors::InvalidArgument( + "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", + min_val)); LoDTensor scale_tensor = CreateScaleTensor(); scale_tensor.data()[0] = 1.0 / max_abs; @@ -330,16 +339,19 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( std::pair AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const { - PADDLE_ENFORCE(var_tensor.dims().size() > 0, "Tensor dimension is empty."); + PADDLE_ENFORCE_GT( + var_tensor.dims().size(), 0, + platform::errors::InvalidArgument("Tensor dimension is empty.")); ConstEigenVectorArrayMap eigen_tensor{var_tensor.data(), var_tensor.numel(), 1}; float min_val = eigen_tensor.minCoeff(); if (is_unsigned) - PADDLE_ENFORCE( - min_val >= 0.0f, - "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", - min_val); + PADDLE_ENFORCE_GE( + min_val, 0.0f, + platform::errors::InvalidArgument( + "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", + min_val)); auto dims = var_tensor.dims(); constexpr int num_col_dims = 1; @@ -367,17 +379,19 @@ AnalysisPredictor::MkldnnQuantizer::Histogram( const framework::LoDTensor& var_tensor, float min_val, float max_val, size_t num_bins) const { PADDLE_ENFORCE_GT(num_bins, 0, - "MkldnnQuantizer: To calculate Histogram, num_bins (" + - std::to_string(num_bins) + ") must be positive."); - PADDLE_ENFORCE_GT( - var_tensor.numel(), 0, - "MkldnnQuantizer: To calculate Histogram, the tensor must not be empty."); - PADDLE_ENFORCE(max_val >= min_val, - "MkldnnQuantizer: To calculate Histogram, max_val (" + - std::to_string(max_val) + - ") must be greater or equal" - "to min_val (" + - std::to_string(min_val) + ")."); + platform::errors::InvalidArgument( + "MkldnnQuantizer: To calculate Histogram, num_bins (" + + std::to_string(num_bins) + ") must be positive.")); + PADDLE_ENFORCE_GT(var_tensor.numel(), 0, + platform::errors::InvalidArgument( + "MkldnnQuantizer: To calculate Histogram, the tensor " + "must not be empty.")); + PADDLE_ENFORCE_GE(max_val, min_val, + platform::errors::InvalidArgument( + "MkldnnQuantizer: To calculate Histogram, max_val (" + + std::to_string(max_val) + ") must be greater or equal" + "to min_val (" + + std::to_string(min_val) + ").")); ConstEigenVectorArrayMap eigen_tensor{var_tensor.data(), var_tensor.numel(), 1}; auto bin_width = std::abs(max_val - min_val) / num_bins; @@ -407,7 +421,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { auto graph = std::unique_ptr(new Graph(arg.main_program())); arg.SetMainGraph(graph.release()); auto* scope_ptr = arg.scope_ptr(); - PADDLE_ENFORCE(scope_ptr); + PADDLE_ENFORCE_NOT_NULL(scope_ptr, platform::errors::PreconditionNotMet( + "The scope should not be nullptr.")); arg.main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); auto* builder = predictor_.config_.pass_builder(); @@ -441,7 +456,9 @@ bool AnalysisPredictor::MkldnnQuantizer::RunQuantizePasses() const { PrepareArgument(); auto& arg = predictor_.argument_; Analyzer().Run(&arg); - PADDLE_ENFORCE(arg.scope_valid()); + PADDLE_ENFORCE_EQ( + arg.scope_valid(), true, + platform::errors::PreconditionNotMet("The scope should be valid.")); VLOG(5) << "to prepare executor"; ARGUMENT_CHECK_FIELD((&arg), ir_analyzed_program); predictor_.inference_program_.reset( @@ -456,7 +473,8 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const { VLOG(3) << "Predictor: run a quantization warmup iteration"; auto warmup_data = qconfig_->warmup_data(); PADDLE_ENFORCE_NOT_NULL(warmup_data, - "Warmup data cannot be NULL in the config."); + platform::errors::PreconditionNotMet( + "Warmup data cannot be NULL in the config.")); PrettyLogH1("--- Running warmup iteration for quantization"); // Run the inference program @@ -469,7 +487,10 @@ bool AnalysisPredictor::MkldnnQuantizer::RunWarmup() const { float AnalysisPredictor::MkldnnQuantizer::SafeEntropy( std::vector reference_distr_P, int P_sum, std::vector candidate_distr_Q, int Q_sum) const { - PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size()); + PADDLE_ENFORCE_EQ(reference_distr_P.size(), candidate_distr_Q.size(), + platform::errors::InvalidArgument( + "The P size %d should be equal to Q size %d", + reference_distr_P.size(), candidate_distr_Q.size())); float tmp_sum1 = 0; float tmp_sum2 = 0; for (size_t idx = 0; idx < reference_distr_P.size(); idx++) { @@ -479,10 +500,11 @@ float AnalysisPredictor::MkldnnQuantizer::SafeEntropy( tmp_sum1 += 0; tmp_sum2 += 0; } else { - PADDLE_ENFORCE(q_idx != 0, "MkldnnQuantizer: Fatal error!, idx = " + - std::to_string(idx) + - " qindex = 0! p_idx = " + - std::to_string(p_idx)); + PADDLE_ENFORCE_NE( + q_idx, 0, + platform::errors::PreconditionNotMet( + "MkldnnQuantizer: Fatal error!, idx = " + std::to_string(idx) + + " qindex = 0! p_idx = " + std::to_string(p_idx))); } tmp_sum1 += p_idx * (log(Q_sum * p_idx)); tmp_sum2 += p_idx * (log(P_sum * q_idx)); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 98a36a3308dc539ee5aecad9e71f50be310e584c..c19e77d2714bcfc18c2cf2a98511d31a97295daa 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -231,6 +231,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { void CpuPassStrategy::EnableMkldnnBfloat16() { #ifdef PADDLE_WITH_MKLDNN + if (!use_mkldnn_bfloat16_) { + passes_.push_back("cpu_bfloat16_placement_pass"); + passes_.push_back("cpu_bfloat16_pass"); + } use_mkldnn_bfloat16_ = true; #else use_mkldnn_bfloat16_ = false; diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index 28afb87a891fb301b1b5108c9762bf6c88cefb96..5d63aa2ace86cb89917126f3a6fef9d0e9839e8c 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter { itensors.push_back(engine_->GetITensor(input_name)); } int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); - PADDLE_ENFORCE(axis > 0, - "The axis attr of Concat op should be large than 0 for trt"); + PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument( + "The axis attr of Concat" + " op should be larger than 0 for trt. " + "But received %d.", + axis)); auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), itensors.size()); diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 10c212c0b4fa394e3c745bf524ef9d081c4bc3c1..aa03bc44bd629513d96cda541c0b7162629bfdc8 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op, TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; auto* layer = fadd_layer(const_cast(X), n_output, n_input, nv_ksize, weight, bias); - PADDLE_ENFORCE(layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(layer, + platform::errors::Fatal("TensorRT create conv2d" + " layer error.")); layer->setStride(nv_strides); layer->setPadding(nv_paddings); layer->setNbGroups(groups); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index c4f0855dbb1ca87b40c396692a812a3cbe06a7b8..dfadb28a6520f983986263b38be69fa48335d485 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer"; - PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight - PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + PADDLE_ENFORCE_EQ( + op_desc.Input("X").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"X\").size() " + "should equal to 1, but received Input(\"X\").size() = %u.", + op_desc.Input("X").size())); + PADDLE_ENFORCE_EQ( + op_desc.Input("Y").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"Y\").size() " + "should equal to 1, but received Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); // Y is a weight + PADDLE_ENFORCE_EQ( + op_desc.Output("Out").size(), 1, + platform::errors::InvalidArgument( + "The input op's Output(\"Out\").size() " + "should equal to 1, but reveceid Output(\"Out\").size() = %u.", + op_desc.Output("Out").size())); auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); - PADDLE_ENFORCE_NOT_NULL(Y_v); + PADDLE_ENFORCE_NOT_NULL( + Y_v, platform::errors::NotFound("Variable %s not found in scope.", + op_desc.Input("Y").front().c_str())); auto* Y_t = Y_v->GetMutable(); float* weight_data = nullptr; weight_data = @@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); nvinfer1::ILayer* layer = nullptr; - PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight - PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + PADDLE_ENFORCE_EQ( + op_desc.Input("X").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"X\").size() " + "should equal to 1, but received Input(\"X\").size() = %u.", + op_desc.Input("X").size())); + PADDLE_ENFORCE_EQ( + op_desc.Input("Y").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"Y\").size() " + "should equal to 1, but received Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); // Y is a weight + PADDLE_ENFORCE_EQ( + op_desc.Output("Out").size(), 1, + platform::errors::InvalidArgument( + "The input op's Output(\"Out\").size() " + "should equal to 1, but received Output(\"Out\").size() = %u.", + op_desc.Output("Out").size())); auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.cc b/paddle/fluid/inference/tensorrt/convert/io_converter.cc index 854f434d93e81237dc85c5df62debcf3b3824b78..d9cf9e2e860018df594ac4d84a4d9fa9b9ba669f 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.cc @@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter { // NOTE out is GPU memory. virtual void operator()(const LoDTensor& in, void* out, size_t max_size) override { - PADDLE_ENFORCE(out != nullptr); - PADDLE_ENFORCE(stream_ != nullptr); + PADDLE_ENFORCE_NOT_NULL(out, + platform::errors::InvalidArgument( + "The input param 'out' must not be nullptr.")); + PADDLE_ENFORCE_NOT_NULL(stream_, + platform::errors::PreconditionNotMet( + "You should set up stream_ by SetStream() " + "before you call the operator().")); const auto& place = in.place(); size_t size = in.memory_size(); - PADDLE_ENFORCE_LE(size, max_size); + PADDLE_ENFORCE_LE( + size, max_size, + platform::errors::InvalidArgument( + "The input Tensor in's memory_size shoule be less than or equal to " + "the input max_size. But in's memory_size = %u, max_size = %u.", + size, max_size)); if (is_cpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size, - cudaMemcpyHostToDevice, *stream_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync( + out, in.data(), size, cudaMemcpyHostToDevice, *stream_)); } else if (is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size, - cudaMemcpyDeviceToDevice, *stream_)); + PADDLE_ENFORCE_EQ( + 0, cudaMemcpyAsync(out, in.data(), size, + cudaMemcpyDeviceToDevice, *stream_), + platform::errors::External( + "cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error.")); } else { - PADDLE_THROW("Unknown device for converter"); + PADDLE_THROW(platform::errors::NotFound("Unknown device for converter")); } cudaStreamSynchronize(*stream_); } // NOTE in is GPU memory. virtual void operator()(const void* in, LoDTensor* out, size_t max_size) override { - PADDLE_ENFORCE(in != nullptr); - PADDLE_ENFORCE(stream_ != nullptr); + PADDLE_ENFORCE_NOT_NULL(in, + platform::errors::InvalidArgument( + "The input param 'in' must not be nullptr.")); + PADDLE_ENFORCE_NOT_NULL(stream_, + platform::errors::PreconditionNotMet( + "You should set up stream_ by SetStream() " + "before you call the operator().")); const auto& place = out->place(); size_t size = out->memory_size(); - PADDLE_ENFORCE_LE(size, max_size); + PADDLE_ENFORCE_LE( + size, max_size, + platform::errors::InvalidArgument( + "The input Tensor out's memory_size shoule be less than or equal " + "to the input max_size. " + "But out's memory_size = %u, max_size = %u.", + size, max_size)); if (is_cpu_place(place)) { PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyDeviceToHost, *stream_)); + cudaMemcpyDeviceToHost, *stream_), + platform::errors::External( + "cudaMemcpyAsync(cudaMemcpyDeviceToHost) error.")); } else if (is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyDeviceToDevice, *stream_)); + PADDLE_ENFORCE_EQ( + 0, cudaMemcpyAsync(out->data(), in, size, + cudaMemcpyDeviceToDevice, *stream_), + platform::errors::External( + "cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error.")); } else { - PADDLE_THROW("Unknown device for converter"); + PADDLE_THROW(platform::errors::NotFound("Unknown device for converter")); } cudaStreamSynchronize(*stream_); } diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.h b/paddle/fluid/inference/tensorrt/convert/io_converter.h index 5daa242f6ab802a50fa6105f0102b817b700f461..58c178028b8b275b57f5c298534bd1d31aede234 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h @@ -44,10 +44,14 @@ class EngineIOConverter { static void ConvertInput(const std::string& op_type, const LoDTensor& in, void* out, size_t max_size, cudaStream_t* stream) { - PADDLE_ENFORCE(stream != nullptr); + PADDLE_ENFORCE_NOT_NULL(stream, + platform::errors::InvalidArgument( + "The input stream must not be nullptr.")); auto* converter = Registry::Global().Lookup( op_type, "default" /* default_type */); - PADDLE_ENFORCE_NOT_NULL(converter); + PADDLE_ENFORCE_NOT_NULL( + converter, platform::errors::Unimplemented( + "The %s in is not supported yet.", op_type.c_str())); converter->SetStream(stream); (*converter)(in, out, max_size); } @@ -55,10 +59,14 @@ class EngineIOConverter { static void ConvertOutput(const std::string& op_type, const void* in, LoDTensor* out, size_t max_size, cudaStream_t* stream) { - PADDLE_ENFORCE(stream != nullptr); + PADDLE_ENFORCE_NOT_NULL(stream, + platform::errors::InvalidArgument( + "The input stream must not be nullptr.")); auto* converter = Registry::Global().Lookup( op_type, "default" /* default_type */); - PADDLE_ENFORCE_NOT_NULL(converter); + PADDLE_ENFORCE_NOT_NULL( + converter, platform::errors::Unimplemented( + "The %s in not supported yet.", op_type.c_str())); converter->SetStream(stream); (*converter)(in, out, max_size); } diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index f4b0f5f23d8fda064c29534b56868beae79f65c0..ac0a04b9a116d907fd69c0ca58d3ae7e82921dab 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -53,7 +53,12 @@ class OpConverter { OpConverter* it{nullptr}; if (op_desc.Type() == "mul") { - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL, + platform::errors::InvalidArgument( + "The input op mul's Input(\"Y\")." + "size() should equal to 1, but reveceid " + "Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); std::string Y = op_desc.Input("Y")[0]; if (parameters.count(Y)) { it = Registry::Global().Lookup("fc"); @@ -66,38 +71,51 @@ class OpConverter { // static std::unordered_set add_weight_op_set {"add", "mul", // "sub", "div"}; static std::unordered_set add_weight_op_set{"add", "mul"}; - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL, + platform::errors::InvalidArgument( + "The input op's Input(\"Y\")." + "size() should equal to 1, but reveceid " + "Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); int op_type_len = op_desc.Type().size(); std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len); std::string Y = op_desc.Input("Y")[0]; if (parameters.count(Y)) { - PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, - "Unsupported elementwise type" + op_type); + PADDLE_ENFORCE_GT( + add_weight_op_set.count(op_type), 0, + platform::errors::Unimplemented("Unsupported elementwise type %s", + op_type.c_str())); it = Registry::Global().Lookup("elementwise_" + op_type + "_weight"); - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented( + "no OpConverter for optype [%s]", op_desc.Type())); } else { - PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, - "Unsupported elementwise type" + op_type); + PADDLE_ENFORCE_GT( + add_tensor_op_set.count(op_type), 0, + platform::errors::Unimplemented("Unsupported elementwise type %s", + op_type.c_str())); it = Registry::Global().Lookup("elementwise_" + op_type + "_tensor"); } - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); } if (op_desc.Type() == "depthwise_conv2d") { it = Registry::Global().Lookup("conv2d"); - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); } if (!it) { it = Registry::Global().Lookup(op_desc.Type()); } - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); it->SetEngine(engine); (*it)(op, scope, test_mode); @@ -149,9 +167,13 @@ class OpConverter { for (auto& input : inputs) { if (parameters.count(input)) continue; auto* var = block_desc->FindVar(input); - PADDLE_ENFORCE(var, "no variable called %s", input); - PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, - "TensorRT engine only takes LoDTensor as input"); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("no variable called %s in block.", + input.c_str())); + PADDLE_ENFORCE_EQ( + var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, + platform::errors::InvalidArgument("TensorRT engine only takes " + "LoDTensor as input")); auto var_shape = var->GetShape(); if (engine->with_dynamic_shape()) { #if IS_TRT_VERSION_GE(6000) diff --git a/paddle/fluid/inference/tensorrt/convert/pad_op.cc b/paddle/fluid/inference/tensorrt/convert/pad_op.cc index a1b0f3b4310a020d4bbf8d7c04c9447d3e0e72f7..dd594404d3316ada6e20624c074368f241ca5cdd 100644 --- a/paddle/fluid/inference/tensorrt/convert/pad_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pad_op.cc @@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter { nvinfer1::Dims input_shape = input->getDimensions(); int nbDims = input_shape.nbDims; int pad_size = static_cast(paddings.size()); - PADDLE_ENFORCE_GE(nbDims, 2); - PADDLE_ENFORCE_EQ((nbDims + 1) * 2, pad_size); - PADDLE_ENFORCE(pad_value == 0.0, "The pad layer of TRT only support zero."); + PADDLE_ENFORCE_GE( + nbDims, 2, + platform::errors::InvalidArgument( + "Input X[0]'s dimension should greater than or equal to 2. " + "But received %d.", + nbDims)); + PADDLE_ENFORCE_EQ( + (nbDims + 1) * 2, pad_size, + platform::errors::InvalidArgument("Input X[0]'s dimension(nbDims for " + "short) should meet the condition:" + "(nbDims + 1) * 2 == pad_size. But " + "received nbDims:%d, pad_size:%d.", + nbDims, pad_size)); + PADDLE_ENFORCE_EQ(pad_value, 0.0, + platform::errors::InvalidArgument( + "The pad layer of TRT only support zero.")); nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]); nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]); @@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter { *const_cast(input), pre_pad, post_pad); - PADDLE_ENFORCE(layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(layer, + platform::errors::External( + "add padding layer to tensorrt engine error")); auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode); } diff --git a/paddle/fluid/inference/tensorrt/convert/slice_op.cc b/paddle/fluid/inference/tensorrt/convert/slice_op.cc index 2a76317eea1b78d13b2ff9d49cc86020ae3cfe96..3c3fead3d361bdb87d8a52dc9a5e986da3975df3 100644 --- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc @@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { -// This OP is implemented by trt dynamic shpae plugin. -// Dynamic shape plugin requires TRT version greater than 6.0. -#if IS_TRT_VERSION_GE(6000) + // This OP is implemented by trt dynamic shpae plugin. + // Dynamic shape plugin requires TRT version greater than 6.0. VLOG(4) << "convert slice op to tensorrt layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs @@ -38,27 +37,65 @@ class SliceOpConverter : public OpConverter { std::vector ends = BOOST_GET_CONST(std::vector, op_desc.GetAttr("ends")); + PADDLE_ENFORCE_EQ( + starts.size(), axes.size(), + platform::errors::InvalidArgument( + "The size of starts must be equal to the size of axes.")); + PADDLE_ENFORCE_EQ( + ends.size(), axes.size(), + platform::errors::InvalidArgument( + "The size of ends must be equal to the size of axes.")); + + auto input_dims = input->getDimensions(); + if (!engine_->with_dynamic_shape()) { + // notice that input shape is [CHW] without batch axis when input has + // static shape + for (size_t i = input_dims.nbDims; i > 0; i--) { + input_dims.d[i] = input_dims.d[i - 1]; + } + input_dims.d[0] = 1; // fake batchsize, not useful here + for (size_t i = 0; i < axes.size(); i++) { + // split on batch is not supported in TensorRT + PADDLE_ENFORCE_NE(axes[i], 0, platform::errors::InvalidArgument( + "Invalid slice axis. Slice on batch " + "axis is not supported in TensorRT")); + if (starts[i] < 0) { + starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0); + } + if (ends[i] < 0) { + ends[i] = std::max(ends[i] + input_dims.d[axes[i]], 0); + } + ends[i] = std::min(ends[i], input_dims.d[axes[i]]); + PADDLE_ENFORCE_GT( + ends[i], starts[i], + platform::errors::InvalidArgument( + "Attr(ends) should be greater than attr(starts) in " + "slice op. But received ends = %d, starts = %d.", + ends[i], starts[i])); + } + } + nvinfer1::ILayer* layer = nullptr; if (engine_->with_dynamic_shape()) { +#if IS_TRT_VERSION_GE(6000) bool ban_fp16 = engine_->disable_trt_plugin_fp16(); plugin::SlicePluginDynamic* plugin = - new plugin::SlicePluginDynamic(starts, ends, ends, ban_fp16); + new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16); layer = engine_->AddPluginV2(&input, 1, plugin); - } else { +#else PADDLE_THROW(platform::errors::Fatal( - "You are running the Ernie(Bert) model in static" - "shape mode, which is not supported for the time being.\n" - "You can use the config.SetTRTDynamicShapeInfo(...) interface" - " to set the shape information to run the dynamic shape mode.")); + "You are running the TRT Dynamic Shape mode, need to confirm that " + "your TRT version is no less than 6.0")); +#endif + } else { + bool ban_fp16 = engine_->disable_trt_plugin_fp16(); + plugin::SlicePlugin* plugin = + new plugin::SlicePlugin(starts, ends, axes, ban_fp16); + layer = engine_->AddPlugin(&input, 1, plugin); } auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(layer, "skip_layernorm", {output_name}, test_mode); -#else - PADDLE_THROW(platform::errors::Fatal( - "You are running the TRT Dynamic Shape mode, need to confirm that " - "your TRT version is no less than 6.0")); -#endif + RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode); } }; diff --git a/paddle/fluid/inference/tensorrt/convert/swish_op.cc b/paddle/fluid/inference/tensorrt/convert/swish_op.cc index 4b3e1c9e70a4a94808c94c81fcc773482f0574e4..e220d80f0d79da5eab98aa7a18a5093f9f4a55c4 100644 --- a/paddle/fluid/inference/tensorrt/convert/swish_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/swish_op.cc @@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); // Declare inputs int input_num = op_desc.Input("X").size(); - PADDLE_ENFORCE(input_num == 1); + PADDLE_ENFORCE_EQ(input_num, 1, + platform::errors::InvalidArgument( + "The input X's size must equal to 1 in TRT swish op." + " But received X's size %d.", + input_num)); auto* input = engine_->GetITensor(op_desc.Input("X")[0]); // Get output size_t output_num = op_desc.Output("Out").size(); - PADDLE_ENFORCE(output_num == 1); + PADDLE_ENFORCE_EQ( + output_num, 1UL, + platform::errors::InvalidArgument( + "The ouput Out's size must equal to 1 in TRT swish op. " + "But received Out's size %u.", + output_num)); // Get attrs float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta")); diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index 3c48c8192f6b06e5a0ba005738383b46bc550ecb..cfb25eb2ba82763950babda5385649d31d2e9185 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, const platform::DeviceContext& ctx) { auto dims = tensor->dims(); size_t num_elements = analysis::AccuDims(dims, dims.size()); - PADDLE_ENFORCE_GT(num_elements, 0); + PADDLE_ENFORCE_GT( + num_elements, 0UL, + platform::errors::PermissionDenied("RandomizeTensor only can be used for " + "tensor which dims is not zero.")); platform::CPUPlace cpu_place; framework::LoDTensor temp_tensor; @@ -79,7 +82,8 @@ class TRTConvertValidation { scope_(scope), if_add_batch_(if_add_batch), max_batch_size_(max_batch_size) { - PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); + PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0, + platform::errors::External("cudaStreamCreate error.")); engine_.reset(new TensorRTEngine(max_batch_size, workspace_size)); engine_->InitNetwork(); } @@ -154,7 +158,12 @@ class TRTConvertValidation { void Execute(int batch_size, std::unordered_set neglected_output = {}) { // Execute Fluid Op - PADDLE_ENFORCE_LE(batch_size, max_batch_size_); + PADDLE_ENFORCE_LE(batch_size, max_batch_size_, + platform::errors::InvalidArgument( + "Runtime batch_size should be less than or equal to " + "max_batch_size_. " + "But received batch_size:%d, max_batch_size_:%d", + batch_size, max_batch_size_)); platform::CUDADeviceContext ctx(place_); op_->Run(scope_, place_); cudaStreamSynchronize(stream_); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index a5b71356d0eca43555f4190b8cac2055a3eb679c..31128ba8c5d42acac0dff321adbc40dbb0ce0c19 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -31,6 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("fused_embedding_eltwise_layernorm"); teller_set.insert("multihead_matmul"); teller_set.insert("skip_layernorm"); + teller_set.insert("slice"); #endif } diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu index 4fb1d8241084d7af787c32949b63819cddbfcb82..5c56270627a6fcb49eb0713d2282c224719fc38d 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu @@ -26,8 +26,10 @@ namespace inference { namespace tensorrt { namespace plugin { -// Dynamic Plugin below. -#if IS_TRT_VERSION_GE(6000) +SlicePlugin *CreateSlicePluginDeserialize(const void *buffer, size_t length) { + return new SlicePlugin(buffer, length); +} +REGISTER_TRT_PLUGIN("slice_plugin", CreateSlicePluginDeserialize); template __global__ void SliceKernel(int num, int dims, const T *input, @@ -56,11 +58,196 @@ __global__ void SliceKernel(int num, int dims, const T *input, } } +SlicePlugin::SlicePlugin(std::vector starts, std::vector ends, + std::vector axes, bool ban_fp16) + : starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) { + cudaEventCreate(©_event_); + cudaStreamCreate(©_stream_); +} + +SlicePlugin::SlicePlugin(void const *serial_data, size_t serial_length) { + deserializeBase(serial_data, serial_length); + DeserializeValue(&serial_data, &serial_length, &starts_); + DeserializeValue(&serial_data, &serial_length, &ends_); + DeserializeValue(&serial_data, &serial_length, &axes_); + DeserializeValue(&serial_data, &serial_length, &ban_fp16_); + cudaEventCreate(©_event_); + cudaStreamCreate(©_stream_); +} + +SlicePlugin::~SlicePlugin() { + cudaStreamDestroy(copy_stream_); + cudaEventDestroy(copy_event_); + cudaFree(offset_temp_data_); +} + +SlicePlugin *SlicePlugin::clone() const { + return new SlicePlugin(starts_, ends_, axes_, ban_fp16_); +} + +bool SlicePlugin::supportsFormat(nvinfer1::DataType type, + nvinfer1::PluginFormat format) const { +#ifdef SUPPORTS_CUDA_FP16 + return ((type == nvinfer1::DataType::kFLOAT || + type == nvinfer1::DataType::kHALF) && + (format == nvinfer1::PluginFormat::kNCHW)); +#else + return ((type == nvinfer1::DataType::kFLOAT) && + (format == nvinfer1::PluginFormat::kNCHW)); +#endif +} + +nvinfer1::Dims SlicePlugin::getOutputDimensions(int index, + const nvinfer1::Dims *inputs, + int nb_input_dims) { + auto in_dims = inputs[0]; + nvinfer1::Dims out_dims = in_dims; + for (size_t i = 0; i < axes_.size(); i++) { + int start = starts_[i]; + int end = ends_[i]; + out_dims.d[axes_[i] - 1] = end - start; + } + return out_dims; +} + +int SlicePlugin::enqueue(int batch_size, const void *const *inputs, + void **outputs, void *workspace, cudaStream_t stream) { + auto input_dims = getInputDims(0); + + // notice input dims is [C, H, W], add input batch dim here + auto out_dims = getOutputDimensions(0, &input_dims, 1); + input_dims.nbDims += 1; + out_dims.nbDims += 1; + for (auto i = input_dims.nbDims; i > 0; --i) { + input_dims.d[i] = input_dims.d[i - 1]; + out_dims.d[i] = out_dims.d[i - 1]; + } + input_dims.d[0] = batch_size; + out_dims.d[0] = batch_size; + + auto num_dims = input_dims.nbDims; + size_t out_num = ProductDim(out_dims); + + std::vector seg_offsets; + std::vector offsets; + std::vector extends; + + offsets.resize(num_dims); + extends.resize(num_dims); + seg_offsets.resize(num_dims); + + seg_offsets[num_dims - 1] = 1; + for (int i = num_dims - 2; i >= 0; i--) { + seg_offsets[i] = input_dims.d[i + 1] * seg_offsets[i + 1]; + } + for (size_t i = 0; i < num_dims; ++i) { + offsets[i] = 0; + extends[i] = out_dims.d[i]; + } + for (size_t i = 0; i < axes_.size(); ++i) { + offsets[axes_[i]] = starts_[i]; + } + + std::vector offset_info; + for (size_t i = 0; i < num_dims; ++i) { + offset_info.push_back(offsets[i]); + offset_info.push_back(extends[i]); + offset_info.push_back(seg_offsets[i]); + } + + if (offset_temp_data_ == nullptr) { + cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int)); + } + + cudaMemcpyAsync(offset_temp_data_, offset_info.data(), + sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice, + copy_stream_); + + cudaEventRecord(copy_event_, copy_stream_); + cudaStreamWaitEvent(stream, copy_event_, 0); + + int threads = 256; + int blocks = (out_num + threads - 1) / threads; + auto input_type = getDataType(); + if (input_type == nvinfer1::DataType::kFLOAT) { + const float *input1 = static_cast(inputs[0]); + float *output = static_cast(outputs[0]); + SliceKernel<<>>( + out_num, num_dims, input1, offset_temp_data_, output); + } else if (input_type == nvinfer1::DataType::kHALF) { +#ifdef SUPPORTS_CUDA_FP16 + const half *input1 = static_cast(inputs[0]); + half *output = static_cast(outputs[0]); + SliceKernel<<>>( + out_num, num_dims, input1, offset_temp_data_, output); +#else + PADDLE_THROW(platform::errors::Fatal( + "The cuda archs you specific should greater than 600.")); +#endif + } else { + PADDLE_THROW(platform::errors::Fatal( + "The Slice TRT Plugin's input type should be float or half.")); + } + return cudaGetLastError() != cudaSuccess; +} + +size_t SlicePlugin::getSerializationSize() { + return getBaseSerializationSize() + SerializedSize(getPluginType()) + + SerializedSize(starts_) + SerializedSize(ends_) + + SerializedSize(axes_) + SerializedSize(ban_fp16_); +} + +void SlicePlugin::serialize(void *buffer) { + SerializeValue(&buffer, getPluginType()); + serializeBase(buffer); + SerializeValue(&buffer, starts_); + SerializeValue(&buffer, ends_); + SerializeValue(&buffer, axes_); + SerializeValue(&buffer, ban_fp16_); +} + +// Dynamic Plugin below. +#if IS_TRT_VERSION_GE(6000) +SlicePluginDynamic::SlicePluginDynamic(std::vector starts, + std::vector ends, + std::vector axes, bool ban_fp16) + : starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) { + cudaEventCreate(©_event_); + cudaStreamCreate(©_stream_); +} + +SlicePluginDynamic::SlicePluginDynamic(void const *serialData, + size_t serialLength) { + DeserializeValue(&serialData, &serialLength, &starts_); + DeserializeValue(&serialData, &serialLength, &ends_); + DeserializeValue(&serialData, &serialLength, &axes_); + DeserializeValue(&serialData, &serialLength, &ban_fp16_); + cudaEventCreate(©_event_); + cudaStreamCreate(©_stream_); +} + +void SlicePluginDynamic::destroy() { + cudaStreamDestroy(copy_stream_); + cudaEventDestroy(copy_event_); + cudaFree(offset_temp_data_); + delete this; +} + int SlicePluginDynamic::initialize() { return 0; } -size_t SlicePluginDynamic::getSerializationSize() const { return 0; } +size_t SlicePluginDynamic::getSerializationSize() const { + size_t size = SerializedSize(starts_) + SerializedSize(ends_) + + SerializedSize(axes_) + SerializedSize(ban_fp16_); -void SlicePluginDynamic::serialize(void *buffer) const {} + return size; +} + +void SlicePluginDynamic::serialize(void *buffer) const { + SerializeValue(&buffer, starts_); + SerializeValue(&buffer, ends_); + SerializeValue(&buffer, axes_); + SerializeValue(&buffer, ban_fp16_); +} nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, @@ -136,9 +323,9 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, std::vector offsets; std::vector extends; - offsets.reserve(num_dims); - extends.reserve(num_dims); - seg_offsets.reserve(num_dims); + offsets.resize(num_dims); + extends.resize(num_dims); + seg_offsets.resize(num_dims); seg_offsets[num_dims - 1] = 1; for (int i = num_dims - 2; i >= 0; i--) { @@ -160,16 +347,16 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, offset_info.push_back(seg_offsets[i]); } - framework::Tensor offset_temp_tensor; + if (offset_temp_data_ == nullptr) { + cudaMalloc(&offset_temp_data_, 3 * num_dims * sizeof(int)); + } - int device_id; - cudaGetDevice(&device_id); - offset_temp_tensor.Resize({3 * num_dims}); - auto *offset_temp_data = - offset_temp_tensor.mutable_data(platform::CUDAPlace(device_id)); + cudaMemcpyAsync(offset_temp_data_, offset_info.data(), + sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice, + copy_stream_); - cudaMemcpyAsync(offset_temp_data, offset_info.data(), - sizeof(int) * 3 * num_dims, cudaMemcpyHostToDevice, stream); + cudaEventRecord(copy_event_, copy_stream_); + cudaStreamWaitEvent(stream, copy_event_, 0); int threads = 256; int blocks = (out_num + threads - 1) / threads; @@ -178,13 +365,13 @@ int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, const float *input1 = static_cast(inputs[0]); float *output = static_cast(outputs[0]); SliceKernel<<>>( - out_num, num_dims, input1, offset_temp_data, output); + out_num, num_dims, input1, offset_temp_data_, output); } else if (input_type == nvinfer1::DataType::kHALF) { #ifdef SUPPORTS_CUDA_FP16 const half *input1 = static_cast(inputs[0]); half *output = static_cast(outputs[0]); SliceKernel<<>>( - out_num, num_dims, input1, offset_temp_data, output); + out_num, num_dims, input1, offset_temp_data_, output); #else PADDLE_THROW(platform::errors::Fatal( "The cuda archs you specific should greater than 600.")); diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h index 13d86df131f6fff58dc896d802c8f3ad959b30bc..e36a270f05d9fee497fa1a033ed16faf08c08225 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h @@ -26,17 +26,56 @@ namespace inference { namespace tensorrt { namespace plugin { +class SlicePlugin : public PluginTensorRT { + public: + explicit SlicePlugin(std::vector starts, std::vector ends, + std::vector axes, bool ban_fp16); + + // It was used for tensorrt deserialization. + // It should not be called by users. + SlicePlugin(void const* serial_data, size_t serial_length); + ~SlicePlugin(); + SlicePlugin* clone() const override; + + const char* getPluginType() const override { return "slice_plugin"; } + int getNbOutputs() const override { return 1; } + int initialize() override { return 0; } + bool supportsFormat(nvinfer1::DataType type, + nvinfer1::PluginFormat format) const override; + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nb_input_dims) override; + int enqueue(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream) override; + + protected: + size_t getSerializationSize() override; + + // TRT will call this func to serialize the configuration of TRT + // It should not be called by users. + void serialize(void* buffer) override; + + private: + std::vector starts_; + std::vector ends_; + std::vector axes_; + bool ban_fp16_{false}; + int* offset_temp_data_{nullptr}; + cudaEvent_t copy_event_; + cudaStream_t copy_stream_; +}; + #if IS_TRT_VERSION_GE(6000) class SlicePluginDynamic : public DynamicPluginTensorRT { public: explicit SlicePluginDynamic(std::vector starts, std::vector ends, - std::vector axes, bool ban_fp16) - : starts_(starts), ends_(ends), axes_(axes), ban_fp16_(ban_fp16) {} - SlicePluginDynamic(void const* serialData, size_t serialLength) {} + std::vector axes, bool ban_fp16); + nvinfer1::IPluginV2DynamicExt* clone() const override { return new SlicePluginDynamic(starts_, ends_, axes_, ban_fp16_); } + SlicePluginDynamic(void const* serialData, size_t serialLength); + const char* getPluginType() const override { return "slice_plugin"; } int getNbOutputs() const override { return 1; } int initialize() override; @@ -72,15 +111,54 @@ class SlicePluginDynamic : public DynamicPluginTensorRT { const nvinfer1::DataType* inputTypes, int nbInputs) const override; - void destroy() override { delete this; } + void destroy() override; private: std::vector starts_; std::vector ends_; std::vector axes_; - bool ban_fp16_{false}; + int* offset_temp_data_{nullptr}; + cudaEvent_t copy_event_; + cudaStream_t copy_stream_; }; + +class SlicePluginV2Creator : public nvinfer1::IPluginCreator { + public: + SlicePluginV2Creator() {} + const char* getPluginName() const override { return "slice_plugin"; } + + const char* getPluginVersion() const override { return "1"; } + + const nvinfer1::PluginFieldCollection* getFieldNames() override { + return &field_collection_; + } + + nvinfer1::IPluginV2* createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) override { + return nullptr; + } + + nvinfer1::IPluginV2* deserializePlugin(const char* name, + const void* serialData, + size_t serialLength) override { + auto plugin = new SlicePluginDynamic(serialData, serialLength); + return plugin; + } + + void setPluginNamespace(const char* libNamespace) override { + namespace_ = libNamespace; + } + + const char* getPluginNamespace() const override { return namespace_.c_str(); } + + private: + std::string namespace_; + nvinfer1::PluginFieldCollection field_collection_; +}; + +REGISTER_TRT_PLUGIN_V2(SlicePluginV2Creator); + #endif } // namespace plugin diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index b3ec4b5714eb17032039eb234e148cdbd38c7877..d7d4f7969fa70a9a3f7c98f6cf15453c9b3eb251 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -9,6 +9,7 @@ if(WITH_GPU AND TENSORRT_FOUND) endif() function(download_data install_dir data_file) + string(REGEX MATCH "[^/\\]+$" data_file ${data_file}) if (NOT EXISTS ${install_dir}/${data_file}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${data_file}) endif() @@ -480,10 +481,9 @@ if(WITH_GPU AND TENSORRT_FOUND) inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz") endif() - # disable test_trt_dynamic_shape_ernie_ser_deser temporary - #inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc - # EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - # ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized) + inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized) endif() diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index f956c34f23ac7cc6ca06b9fcf411d0f2e9b29c54..2570325c24abcbb4bd459944480d3279f24fab1f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -245,8 +245,14 @@ TEST(Analyzer_bert, transfer_scope_cache) { // Since paddle::framework::global_transfer_scope_cache() and // paddle::framework::global_transfer_data_cache() are thread_local, // their pointer should be different among different thread id. - PADDLE_ENFORCE(global_transfer_scope_cache.size(), threads_num); - PADDLE_ENFORCE(global_transfer_data_cache.size(), threads_num); + PADDLE_ENFORCE_EQ( + global_transfer_scope_cache.size(), threads_num, + paddle::platform::errors::Fatal( + "The size of scope cache is not equal to thread number.")); + PADDLE_ENFORCE_EQ( + global_transfer_data_cache.size(), threads_num, + paddle::platform::errors::Fatal( + "The size of data cache is not equal to thread number.")); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc index 0bc67aff7af1be9f34ffa2bb71c25d2964a62521..a9c24c4503f9f1b803c0d9fcde21199ef4089c41 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc @@ -69,11 +69,13 @@ void PD_run() { PD_DeletePaddleTensor(input); int size; const int* out_shape = PD_GetPaddleTensorShape(out_data, &size); - CHECK(size == 2) << "The Output shape's size is NOT match."; + PADDLE_ENFORCE_EQ(size, 2, paddle::platform::errors::InvalidArgument( + "The Output shape's size is NOT match.")); std::vector ref_outshape_size({9, 6}); for (int i = 0; i < 2; ++i) { - CHECK(out_shape[i] == ref_outshape_size[i]) - << "The Output's shape is NOT match."; + PADDLE_ENFORCE_EQ(out_shape[i], ref_outshape_size[i], + paddle::platform::errors::InvalidArgument( + "The Output shape's size is NOT match.")); } PD_DeletePaddleBuf(buf); } diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index d76799a679cbf27700c6d9af4f2e2e50c5e33e35..fd20581123c10f8c569e7765c7a0bf17ddd1d0b9 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -36,9 +36,9 @@ void zero_copy_run() { PD_SwitchIrDebug(config, true); PD_SetModel(config, prog_file.c_str(), params_file.c_str()); bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config); - CHECK(!use_feed_fetch) << "NO"; + EXPECT_FALSE(use_feed_fetch); bool specify_input_names = PD_SpecifyInputName(config); - CHECK(specify_input_names) << "NO"; + EXPECT_TRUE(specify_input_names); const int batch_size = 1; const int channels = 3; @@ -85,13 +85,13 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { PD_SwitchIrDebug(config, true); PD_EnableMKLDNN(config); bool mkldnn_enable = PD_MkldnnEnabled(config); - CHECK(mkldnn_enable) << "NO"; + EXPECT_TRUE(mkldnn_enable); PD_EnableMkldnnQuantizer(config); bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); - CHECK(quantizer_enable) << "NO"; + EXPECT_TRUE(quantizer_enable); PD_EnableMkldnnBfloat16(config); bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); - CHECK(bfloat16_enable) << "NO"; + EXPECT_TRUE(bfloat16_enable); PD_SetMkldnnCacheCapacity(config, 0); PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_DeleteAnalysisConfig(config); diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index 00a475b6047e8215264c664dd3c775b9687eb0ff..d61c28c30d203acf4dd48e1461a881d61f8ec263 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -126,7 +126,9 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, std::string turn_mask_pre = "turn_mask_"; auto one_batch = data->NextBatch(); - PADDLE_ENFORCE(!one_batch.response.empty()); + PADDLE_ENFORCE( + !one_batch.response.empty(), + paddle::platform::errors::Fatal("The response of one batch is empty.")); int size = one_batch.response[0].size(); CHECK_EQ(size, kMaxTurnLen); // turn tensor assignment @@ -214,11 +216,17 @@ void profile(bool use_mkldnn = false) { input_slots_all, &outputs, FLAGS_num_threads); if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of outputs should be greater than 0.")); auto output = outputs.back(); - PADDLE_ENFORCE_GT(output.size(), 0); + PADDLE_ENFORCE_GT(output.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); size_t size = GetSize(output[0]); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT(size, 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); float *result = static_cast(output[0].data.data()); for (size_t i = 0; i < size; i++) { EXPECT_NEAR(result[i], result_data[i], 1e-3); diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc index 7f06a3b9023ba3e907c9731d576f014a3e451113..91a3233b9851f1def7717d04c4c9df5275a805ee 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc @@ -146,8 +146,9 @@ std::shared_ptr> GetWarmupData( auto iterations = test_data.size(); PADDLE_ENFORCE_LE( static_cast(num_images), iterations * test_data_batch_size, - "The requested quantization warmup data size " + - std::to_string(num_images) + " is bigger than all test data size."); + paddle::platform::errors::Fatal( + "The requested quantization warmup data size " + + std::to_string(num_images) + " is bigger than all test data size.")); PaddleTensor images; images.name = "image"; @@ -237,8 +238,9 @@ std::shared_ptr> GetWarmupData( } PADDLE_ENFORCE_EQ( static_cast(num_objects), static_cast(objects_accum), - "The requested num of objects " + std::to_string(num_objects) + - " is the same as objects_accum."); + paddle::platform::errors::Fatal("The requested num of objects " + + std::to_string(num_objects) + + " is the same as objects_accum.")); auto warmup_data = std::make_shared>(4); (*warmup_data)[0] = std::move(images); diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 142905dcd8d9964d93d0c5f7444823eef2b84900..bd3a1d737afb1ba230015fbd602c493f33952ffb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -98,7 +98,9 @@ void GetOneBatch(std::vector *input_slots, DataRecord *data, input_tensor.name = "word"; input_tensor.dtype = PaddleDType::INT64; TensorAssignData(&input_tensor, {one_batch.data}, one_batch.lod); - PADDLE_ENFORCE_EQ(batch_size, static_cast(one_batch.lod.size() - 1)); + PADDLE_ENFORCE_EQ( + batch_size, static_cast(one_batch.lod.size() - 1), + paddle::platform::errors::Fatal("The lod size of one batch is invaild.")); input_slots->assign({input_tensor}); } @@ -137,12 +139,17 @@ TEST(Analyzer_LAC, profile) { 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25, 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14, 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); auto output = outputs.back(); - PADDLE_ENFORCE_EQ(output.size(), 1UL); + PADDLE_ENFORCE_EQ(output.size(), 1UL, + paddle::platform::errors::Fatal( + "The size of output should be equal to 1.")); size_t size = GetSize(output[0]); size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); - PADDLE_ENFORCE_GE(size, batch1_size); + PADDLE_ENFORCE_GE(size, batch1_size, paddle::platform::errors::Fatal( + "The size of batch is invaild.")); int64_t *pdata = static_cast(output[0].data.data()); for (size_t i = 0; i < batch1_size; ++i) { EXPECT_EQ(pdata[i], lac_ref_data[i]); diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 2a862b1395c222cf6d23216c9d4cf9196ffb519c..50a68361d536f5aab3ed2a6bafe60f2438a9c129 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -117,11 +117,17 @@ void profile(bool memory_load = false) { // the first inference result const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, 48, 39, 38, 16, 25}; - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); auto output = outputs.back(); - PADDLE_ENFORCE_EQ(output.size(), 1UL); + PADDLE_ENFORCE_EQ(output.size(), 1UL, + paddle::platform::errors::Fatal( + "The size of output should be equal to 1.")); size_t size = GetSize(output[0]); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT(size, 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); int64_t *result = static_cast(output[0].data.data()); for (size_t i = 0; i < std::min(11, size); i++) { EXPECT_EQ(result[i], chinese_ner_result_data[i]); diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index 06a8e01b10c6eb70fe2cbac19725d96281863c29..bb1f0e8cd6334bab83973fb7d314f7017edd9e90 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -136,11 +136,17 @@ TEST(Analyzer_Pyramid_DNN, profile) { input_slots_all, &outputs, FLAGS_num_threads); if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) { - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); auto output = outputs.back(); - PADDLE_ENFORCE_EQ(output.size(), 1UL); + PADDLE_ENFORCE_EQ(output.size(), 1UL, + paddle::platform::errors::Fatal( + "The size of output should be equal to 1.")); size_t size = GetSize(output[0]); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT(size, 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); float *result = static_cast(output[0].data.data()); // output is probability, which is in (0, 1). for (size_t i = 0; i < size; i++) { diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc index 9ccbf58cbd2bbaab9b1a132c27e50356e1a5df37..34a0a5f398d7fee0f8e44f0ad59ff9711263b575 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc @@ -135,11 +135,17 @@ TEST(Analyzer_rnn2, profile) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { // the first inference result - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); auto output = outputs.back(); - PADDLE_ENFORCE_GT(output.size(), 0); + PADDLE_ENFORCE_GT(output.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); size_t size = GetSize(output[0]); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT(size, 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); float *result = static_cast(output[0].data.data()); for (size_t i = 0; i < size; i++) { EXPECT_NEAR(result[i], result_data[i], 1e-3); diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index e3f8b835f78371170aaf107e1b2d1ca41b300e56..978aaf1c6a32d5b4ec8f2d06b8873af892705da5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -47,7 +47,8 @@ struct DataRecord { num_lines++; std::vector data; split(line, '\t', &data); - PADDLE_ENFORCE(data.size() >= 4); + PADDLE_ENFORCE_GT(data.size(), 4, paddle::platform::errors::Fatal( + "The size of data is invaild.")); // load title1 data std::vector title1_data; split_to_int64(data[0], ' ', &title1_data); @@ -120,11 +121,17 @@ TEST(Analyzer_seq_conv1, profile) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { // the first inference result - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); auto output = outputs.back(); - PADDLE_ENFORCE_EQ(output.size(), 1UL); + PADDLE_ENFORCE_EQ(output.size(), 1UL, + paddle::platform::errors::Fatal( + "The size of output should be equal to 0.")); size_t size = GetSize(output[0]); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT(size, 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); float *result = static_cast(output[0].data.data()); // output is probability, which is in (0, 1). for (size_t i = 0; i < size; i++) { diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc index 56f706ae56bda8b06eba5dd9e080552aa9785c6e..9f1556cdb871aa3e5bbe613aa98299c162661c42 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -56,20 +56,26 @@ struct DataRecord { std::vector slot_data; split_to_float(data[1], ' ', &slot_data); std::string name = data[0]; - PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL, - "line %d, %s should be divisible", num_lines, name); + PADDLE_ENFORCE_EQ( + slot_data.size() % 11, 0UL, + paddle::platform::errors::Fatal("line %d, %s should be divisible", + num_lines, name)); datasets[name].emplace_back(std::move(slot_data)); } num_samples = num_lines / num_slots; - PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast(num_lines), - "num samples should be divisible"); - PADDLE_ENFORCE_GT(num_samples, 0UL); + PADDLE_ENFORCE_EQ( + num_samples * num_slots, static_cast(num_lines), + paddle::platform::errors::Fatal("num samples should be divisible")); + PADDLE_ENFORCE_GT(num_samples, 0UL, + paddle::platform::errors::Fatal( + "The num of samples should be greater than 0.")); } void Prepare(int bs) { for (auto it = datasets.begin(); it != datasets.end(); ++it) { - PADDLE_ENFORCE_EQ(it->second.size(), num_samples, - "size of each slot should be equal"); + PADDLE_ENFORCE_EQ( + it->second.size(), num_samples, + paddle::platform::errors::Fatal("size of each slot should be equal")); } size_t num_batches = num_samples / bs; EXPECT_GT(num_batches, 0UL); @@ -90,8 +96,10 @@ struct DataRecord { std::copy(datas[id].begin(), datas[id].end(), std::back_inserter(slot.data[k])); size_t len = datas[id].size() / 11; - PADDLE_ENFORCE_EQ(len * 11, datas[id].size(), - "%s %d size should be divisible", slot.name, id); + PADDLE_ENFORCE_EQ( + len * 11, datas[id].size(), + paddle::platform::errors::Fatal("%s %d size should be divisible", + slot.name, id)); lod[k + 1] = lod[k] + len; } slot.shape.assign({static_cast(lod[bs]), 11}); diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index 78e500b2ed530d5a1dce8a7927538fdd0bbb6907..ae38bcbc20a9f44eb6ef5c313b318dec38a30550 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -22,7 +22,9 @@ struct DataReader { : file(new std::ifstream(path)) {} bool NextBatch(std::vector *input, int batch_size) { - PADDLE_ENFORCE_EQ(batch_size, 1); + PADDLE_ENFORCE_EQ(batch_size, 1, + paddle::platform::errors::Fatal( + "The size of batch should be equal to 1.")); std::string line; PaddleTensor tensor; tensor.dtype = PaddleDType::INT64; @@ -81,7 +83,9 @@ TEST(Analyzer_Text_Classification, profile) { if (FLAGS_num_threads == 1) { // Get output - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); LOG(INFO) << "get outputs " << outputs.back().size(); for (auto &output : outputs.back()) { LOG(INFO) << "output.shape: " << to_string(output.shape); diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index 65755b7b15ad54e38e398a82db41a0b9d8fc59e3..a2ced21a9ac9ad10c2b067a60597eee9fdff9eeb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -59,7 +59,9 @@ void SetConfig(AnalysisConfig *cfg) { } void SetInput(std::vector> *inputs) { - PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); + PADDLE_ENFORCE_EQ( + FLAGS_test_all_data, 0, + paddle::platform::errors::Fatal("Only have single batch of data.")); std::string line; std::ifstream file(FLAGS_infer_data); std::getline(file, line); @@ -99,7 +101,9 @@ void profile(bool use_mkldnn = false) { auto refer = ProcessALine(line); file.close(); - PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_GT(outputs.size(), 0, + paddle::platform::errors::Fatal( + "The size of output should be greater than 0.")); auto &output = outputs.back().front(); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); CHECK_EQ(numel, refer.data.size()); diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc index 524e08891f4e90d8a322822e26d75689526d30f5..685f7b6600e4d73731860135469a3072d8ce7f9a 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc @@ -12,15 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include #include +#include #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { namespace inference { +int DeleteCache(std::string path) { + DIR* dir = opendir(path.c_str()); + if (dir == NULL) return 0; + struct dirent* ptr; + while ((ptr = readdir(dir)) != NULL) { + if (std::strcmp(ptr->d_name, ".") == 0 || + std::strcmp(ptr->d_name, "..") == 0) { + continue; + } else if (ptr->d_type == 8) { + std::string file_rm = path + "/" + ptr->d_name; + return remove(file_rm.c_str()); + } + } + return 0; +} + void run(const AnalysisConfig& config, std::vector* out_data) { auto predictor = CreatePaddlePredictor(config); auto input_names = predictor->GetInputNames(); @@ -86,6 +104,11 @@ void run(const AnalysisConfig& config, std::vector* out_data) { void trt_ernie(bool with_fp16, std::vector result) { AnalysisConfig config; std::string model_dir = FLAGS_infer_model; + // Delete serialization cache to perform serialization first rather than + // deserialization. + std::string opt_cache_dir = FLAGS_infer_model + "/_opt_cache"; + DeleteCache(opt_cache_dir); + SetConfig(&config, model_dir, true /* use_gpu */); config.SwitchUseFeedFetchOps(false); diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 7183cbac71562bfe4092bf78270096996b74c525..1457f5337e3ed05b74d247d65e2f6b2f7f6735d3 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.h" @@ -162,7 +163,8 @@ void TestInference(const std::string& dirname, // int device_id = place.GetDeviceId(); paddle::platform::SetDeviceId(0); #else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); + PADDLE_THROW(paddle::platform::errors::Unavailable( + "'CUDAPlace' is not supported in CPU only device.")); #endif } diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu index eb24ba84c886e3393cf36b6f764d7b33e76defeb..59c14103ca67dbf325928a9aee73d903d7d9e9e3 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu +++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu @@ -16,6 +16,7 @@ #include #include // NOLINT #include + #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cuda_allocator.h" @@ -41,12 +42,14 @@ TEST(BestFitAllocator, concurrent_cuda) { LockedAllocator concurrent_allocator( std::unique_ptr(new BestFitAllocator(cuda_allocation.get()))); + platform::CUDAPlace gpu(0); + platform::CUDADeviceContext dev_ctx(gpu); + auto th_main = [&](std::random_device::result_type seed) { std::default_random_engine engine(seed); std::uniform_int_distribution dist(1U, 1024U); - platform::CUDAPlace gpu(0); - platform::CUDADeviceContext dev_ctx(gpu); std::array buf; + for (size_t i = 0; i < 128; ++i) { size_t allocate_size = dist(engine); diff --git a/paddle/fluid/operators/arg_min_max_op_base.h b/paddle/fluid/operators/arg_min_max_op_base.h index c296ddcfbef703e8484b6ea0b7f96f037e415186..57e1c06f73c56334fc93dee7a16d6899f5a6f12a 100644 --- a/paddle/fluid/operators/arg_min_max_op_base.h +++ b/paddle/fluid/operators/arg_min_max_op_base.h @@ -110,10 +110,12 @@ struct VisitDataArgMinMaxFunctor { CALL_ARG_MINMAX_FUNCTOR(6); break; default: - PADDLE_THROW( - "%s operator doesn't supports tensors whose ranks are greater " - "than 6.", - (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")); + PADDLE_ENFORCE_LE( + x_dims.size(), 6, + platform::errors::InvalidArgument( + "%s operator doesn't supports tensors whose ranks are greater " + "than 6.", + (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"))); break; #undef CALL_ARG_MINMAX_FUNCTOR } @@ -164,7 +166,8 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( axis, x_dims.size(), platform::errors::InvalidArgument( - "'axis'(%d) must be less than Rank(X)(%d).", axis, x_dims.size())); + "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", axis, + x_dims.size())); const int& dtype = ctx->Attrs().Get("dtype"); PADDLE_ENFORCE_EQ( @@ -192,10 +195,11 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_LE( all_element_num, INT_MAX, - "The element num of the argmin/argmax input at axis is " - "%d, is larger than int32 maximum value:%d, you must " - "set the dtype of argmin/argmax to 'int64'.", - all_element_num, INT_MAX); + platform::errors::InvalidArgument( + "The element num of the argmin/argmax input at axis is " + "%d, is larger than int32 maximum value:%d, you must " + "set the dtype of argmin/argmax to 'int64'.", + all_element_num, INT_MAX)); } } std::vector vec; diff --git a/paddle/fluid/operators/assign_op.h b/paddle/fluid/operators/assign_op.h index 6ce04d19fc4376e4263712e2904e480e26590553..c2154f78bbe97418f2c7388a000dc833134d0c84 100644 --- a/paddle/fluid/operators/assign_op.h +++ b/paddle/fluid/operators/assign_op.h @@ -52,7 +52,10 @@ class AssignFunctor { template void operator()(const T &v) const { - PADDLE_THROW("Not support type for assign op %s", typeid(T).name()); + PADDLE_ENFORCE_EQ( + true, false, + platform::errors::PermissionDenied( + "Not support type for assign op with type %s", typeid(T).name())); } private: diff --git a/paddle/fluid/operators/cudnn_lstm_cache.h b/paddle/fluid/operators/cudnn_lstm_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..4b46e2b475e8bd59c59744bdfde7bfb1248bc99a --- /dev/null +++ b/paddle/fluid/operators/cudnn_lstm_cache.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/cudnn_helper.h" +#include "paddle/fluid/platform/dynload/cudnn.h" + +namespace paddle { +namespace operators { + +class ScopedRNNBase { + public: + ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size, + int num_layers, float dropout_prob, int seed, int weight_numel, + bool initialized, bool is_bidirec) + : seq_length_(seq_length), + batch_size_(batch_size), + input_size_(input_size), + hidden_size_(hidden_size), + num_layers_(num_layers), + dropout_prob_(dropout_prob), + seed_(seed), + weight_numel_(weight_numel), + initialized_(initialized), + is_bidirec_(is_bidirec) {} + + template + void Create(const cudnnHandle_t& handle, const platform::Place& place, + const std::vector& sequence_length, size_t* workspace_size, + size_t* reserve_size, framework::Tensor* dropout_state) { + int numDirections = is_bidirec_ ? 2 : 1; + cudnnDataType_t cudnn_type = platform::CudnnDataType::type; + + // ------------------- cudnn x, y descriptors --------------------- + std::vector dims_x = {batch_size_, input_size_, 1}; + std::vector strides_x = {input_size_, 1, 1}; + std::vector dims_y = {batch_size_, hidden_size_ * numDirections, 1}; + std::vector strides_y = {hidden_size_ * numDirections, 1, 1}; + for (int i = 0; i < seq_length_; ++i) { + x_descs_.emplace_back(x_desc_.descriptor(dims_x, strides_x)); + y_descs_.emplace_back(y_desc_.descriptor(dims_y, strides_y)); + } + if (!sequence_length.empty()) { + x_seq_desc_.descriptor(seq_length_, batch_size_, input_size_, true, + sequence_length); + y_seq_desc_.descriptor(seq_length_, batch_size_, + hidden_size_ * numDirections, true, + sequence_length); + } + + // ------------------- cudnn hx, hy, cx, cy descriptors---------- + std::vector dims_hx = {num_layers_ * numDirections, batch_size_, + hidden_size_}; + std::vector strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1}; + init_h_desc_.descriptor(dims_hx, strides_hx); + init_c_desc_.descriptor(dims_hx, strides_hx); + last_h_desc_.descriptor(dims_hx, strides_hx); + last_c_desc_.descriptor(dims_hx, strides_hx); + + // ------------------- cudnn dropout descriptors --------------------- + size_t state_size; + if (!initialized_) { + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); + dropout_state->mutable_data({static_cast(state_size)}, + place); + } + dropout_desc_.descriptor(handle, place, initialized_, dropout_prob_, + dropout_state, seed_, state_size); + +// ------------------- cudnn rnn descriptors --------------------- +#if CUDNN_VERSION >= 6000 + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6( + handle, rnn_desc_.desc(), hidden_size_, num_layers_, + dropout_desc_.desc(), CUDNN_LINEAR_INPUT, + is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, + CUDNN_RNN_ALGO_STANDARD, cudnn_type)); +#else + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor( + rnn_desc_.desc(), hidden_size_, num_layers_, dropout_desc_.desc(), + CUDNN_LINEAR_INPUT, + is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, + cudnn_type)); +#endif + if (!sequence_length.empty()) { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode( + rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED)); + } + + // ------------------- cudnn weights_size --------------------- + size_t weights_size_; + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize( + handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type)); + PADDLE_ENFORCE_EQ( + weights_size_, sizeof(T) * weight_numel_, + platform::errors::InvalidArgument( + "The cudnn lstm and setting weight size should be same.")); + // ------------------- cudnn weight descriptors --------------------- + platform::DataLayout layout = platform::DataLayout::kNCHW; + int dim_tmp = weights_size_ / sizeof(T); + std::vector dim_w = {dim_tmp, 1, 1}; + weight_desc_.descriptor(layout, dim_w); + // ------------------- cudnn workspace, reserve size --------------------- + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize( + handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), + workspace_size)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetRNNTrainingReserveSize( + handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), + reserve_size)); + } + cudnnTensorDescriptor_t* x_descs() { return x_descs_.data(); } + cudnnTensorDescriptor_t* y_descs() { return y_descs_.data(); } + cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_.desc(); } + cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_.desc(); } + cudnnTensorDescriptor_t init_h_desc() { return init_h_desc_.desc(); } + cudnnTensorDescriptor_t init_c_desc() { return init_c_desc_.desc(); } + cudnnTensorDescriptor_t last_h_desc() { return last_h_desc_.desc(); } + cudnnTensorDescriptor_t last_c_desc() { return last_c_desc_.desc(); } + cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_.desc(); } + cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_.desc(); } + cudnnFilterDescriptor_t weight_desc() { return weight_desc_.desc(); } + + private: + int seq_length_; + int batch_size_; + int input_size_; + int hidden_size_; + int num_layers_; + float dropout_prob_; + int seed_; + int weight_numel_; + bool initialized_; + bool is_bidirec_; + std::vector x_descs_; + std::vector y_descs_; + + platform::ScopedTensorDescriptor x_desc_; + platform::ScopedTensorDescriptor y_desc_; + platform::ScopedRNNTensorDescriptor x_seq_desc_; + platform::ScopedRNNTensorDescriptor y_seq_desc_; + platform::ScopedTensorDescriptor init_h_desc_; + platform::ScopedTensorDescriptor init_c_desc_; + platform::ScopedTensorDescriptor last_h_desc_; + platform::ScopedTensorDescriptor last_c_desc_; + platform::ScopedDropoutDescriptor dropout_desc_; + platform::ScopedFilterDescriptor weight_desc_; + platform::ScopedRNNDescriptor rnn_desc_; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/cudnn_lstm_op.cc b/paddle/fluid/operators/cudnn_lstm_op.cc index cc807f193ed835cfbf04dfcefad7ffb24e8ab286..82954bc109a740c0fe31ab889eb07bbbe3f52417 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cc @@ -51,6 +51,16 @@ class CudnnLSTMOp : public framework::OperatorWithKernel { "received InitH's rank is %d.", init_h_dims.size())); + if (ctx->HasInput("SequenceLength")) { + auto seq_dims = ctx->GetInputDim("SequenceLength"); + PADDLE_ENFORCE_EQ( + in_dims[1], seq_dims[0], + platform::errors::InvalidArgument( + "The size of SequenceLength has to equal the batch_size. But " + "received batch_size is %d and the size of SequenceLength is %d.", + in_dims[1], seq_dims[0])); + } + PADDLE_ENFORCE_EQ( in_dims[1], init_h_dims[1], platform::errors::InvalidArgument( @@ -113,6 +123,12 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor) the learnable hidden-hidden weights." " The shape is (N), where N is total weight size of the LSTM. " " cudnn concatenate all the weight to one Tensor"); + AddInput("SequenceLength", + "(Tensor) When the input data is padding, " + "set this parameter. This parameter represents " + "the variable sequence lengths in a batch. " + "The size of the vector has to equal the batch_size.") + .AsDispensable(); AddOutput("Reserve", "(Tensor, a temporary output Tensor to store the reserve_data " "of cudnn kernel.") @@ -155,13 +171,6 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(1); AddAttr("is_test", "True if in test phase.").SetDefault(false); AddAttr("seed", "seed to used if fix_seed is True").SetDefault(0); - AddAttr>("sequence_length", - "(vector) When the input data is padding, " - "set this parameter. This parameter represents " - "the variable sequence" - "lengths in a batch. The size of the vector has " - "to equal the batch_size.") - .SetDefault({}); AddComment(R"DOC( CUDNN LSTM implementation @@ -243,6 +252,9 @@ class CudnnLSTMGradOpMaker : public framework::SingleGradOpMaker { op->SetInput("InitH", this->Input("InitH")); op->SetInput("InitC", this->Input("InitC")); op->SetInput("W", this->Input("W")); + if (this->HasInput("SequenceLength")) { + op->SetInput("SequenceLength", this->Input("SequenceLength")); + } op->SetInput("Reserve", this->Output("Reserve")); op->SetInput("StateOut", this->Output("StateOut")); op->SetInput("Out", this->Output("Out")); diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index f60cd41d9a218c444254d268eb43abfb97db43e6..6457d9295dcbfa99d18f63fbda3dae048d7713cd 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -13,8 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/cudnn_rnn_cache.h" +#include "paddle/fluid/operators/cudnn_lstm_cache.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/cudnn_desc.h" #include "paddle/fluid/platform/cudnn_helper.h" @@ -24,6 +25,43 @@ namespace operators { using LoDTensor = framework::LoDTensor; using Tensor = framework::Tensor; +template +void LSTMInferece(const bool &has_seq_length, const cudnnHandle_t &handle, + const int &seq_length, ScopedRNNBase *rnn, const T *x_data, + const T *init_h_data, const T *init_c_data, const T *w_data, + T *out_data, T *last_h_data, T *last_c_data, + framework::Tensor *workspace_data, + const size_t &workspace_size) { + if (!has_seq_length) { + // for inference + // This interface is used when the input/output is unpadded. + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference( + handle, rnn->rnn_desc(), seq_length, rnn->x_descs(), x_data, + rnn->init_h_desc(), init_h_data, rnn->init_c_desc(), init_c_data, + rnn->weight_desc(), w_data, rnn->y_descs(), out_data, + rnn->last_h_desc(), last_h_data, rnn->last_c_desc(), last_c_data, + workspace_data->data(), workspace_size)); + } else { +#if CUDNN_VERSION >= 7201 + // for inference + // This interface is used when the input/output is padded. + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInferenceEx( + handle, rnn->rnn_desc(), rnn->x_seq_desc(), x_data, rnn->init_h_desc(), + init_h_data, rnn->init_c_desc(), init_c_data, rnn->weight_desc(), + w_data, rnn->y_seq_desc(), out_data, rnn->last_h_desc(), last_h_data, + rnn->last_c_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, workspace_data->data(), + workspace_size)); +#else + // CUDNN VERSION has to >=7.2.1 + PADDLE_THROW(platform::errors::Unavailable( + "The padded input is supported by " + "cudnnRNNForwardInferenceEx, but it only works when " + "the version of cudnn is larger than 7.2.1")); +#endif + } +} + template class CudnnLSTMGPUKernel : public framework::OpKernel { public: @@ -56,7 +94,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { int num_layers = ctx.Attr("num_layers"); bool is_test = ctx.Attr("is_test"); int seed = ctx.Attr("seed"); - auto sequence_length = ctx.Attr>("sequence_length"); + + bool has_seq_length = ctx.HasInput("SequenceLength"); + std::vector SequenceLength; + if (has_seq_length) { + auto *sequence_length = ctx.Input("SequenceLength"); + SequenceLength = operators::GetDataFromTensor(sequence_length); + } auto &dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); @@ -70,58 +114,32 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { size_t workspace_size; size_t reserve_size; - platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size, - num_layers, dropout_prob, seed, weight_numel, - state_initialized, is_bidirec); - rnn.Create(handle, ctx.GetPlace(), sequence_length, &workspace_size, + ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size, + num_layers, dropout_prob, seed, weight_numel, + state_initialized, is_bidirec); + rnn.Create(handle, ctx.GetPlace(), SequenceLength, &workspace_size, &reserve_size, state_out); framework::Tensor workspace_data_; - workspace_data_.Resize({static_cast(workspace_size)}); - workspace_data_.mutable_data(ctx.GetPlace()); + workspace_data_.mutable_data( + {static_cast(workspace_size)}, ctx.GetPlace()); auto *reserve_data = reserve->mutable_data( {static_cast(reserve_size)}, ctx.GetPlace()); if (is_test) { - if (sequence_length.empty()) { - // for inference - // This interface is used when the input/output is unpadded. - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference( - handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data, - rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, - rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(), - last_h_data, rnn.cy_desc(), last_c_data, - workspace_data_.data(), workspace_size)); - } else { -#if CUDNN_VERSION >= 7201 - // for inference - // This interface is used when the input/output is padded. - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnRNNForwardInferenceEx( - handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(), - init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data, - rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data, - rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, - workspace_data_.data(), workspace_size)); -#else - PADDLE_ENFORCE_NOT_NULL( - nullptr, platform::errors::Unavailable( - "The padded input is supported by " - "cudnnRNNForwardInferenceEx, but it only works when " - "the version of cudnn is larger than 7.2.1")); -#endif - } + LSTMInferece(has_seq_length, handle, seq_length, &rnn, x_data, + init_h_data, init_c_data, w_data, out_data, last_h_data, + last_c_data, &workspace_data_, workspace_size); } else { - if (sequence_length.empty()) { + if (!has_seq_length) { // for train // This interface is used when the input/output is unpadded. PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardTraining( - handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), x_data, - rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, - rnn.w_desc(), w_data, rnn.y_desc(), out_data, rnn.hy_desc(), - last_h_data, rnn.cy_desc(), last_c_data, + handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), x_data, + rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data, + rnn.weight_desc(), w_data, rnn.y_descs(), out_data, + rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data, workspace_data_.data(), workspace_size, reserve_data, reserve_size)); } else { @@ -130,19 +148,18 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { // This interface is used when the input/output is padded. PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnRNNForwardTrainingEx( - handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, rnn.hx_desc(), - init_h_data, rnn.cx_desc(), init_c_data, rnn.w_desc(), w_data, - rnn.y_seq_desc(), out_data, rnn.hy_desc(), last_h_data, - rnn.cy_desc(), last_c_data, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, - workspace_data_.data(), workspace_size, reserve_data, - reserve_size)); + handle, rnn.rnn_desc(), rnn.x_seq_desc(), x_data, + rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data, + rnn.weight_desc(), w_data, rnn.y_seq_desc(), out_data, + rnn.last_h_desc(), last_h_data, rnn.last_c_desc(), last_c_data, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, workspace_data_.data(), workspace_size, + reserve_data, reserve_size)); #else - PADDLE_ENFORCE_NOT_NULL( - nullptr, platform::errors::Unavailable( - "The padded input is supported by " - "cudnnRNNForwardTrainingEx, but it only works when " - "the version of cudnn is larger than 7.2.1")); + PADDLE_THROW(platform::errors::Unavailable( + "The padded input is supported by " + "cudnnRNNForwardTrainingEx, but it only works when " + "the version of cudnn is larger than 7.2.1")); #endif } } @@ -203,7 +220,13 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { int hidden_size = ctx.Attr("hidden_size"); int num_layers = ctx.Attr("num_layers"); int seed = ctx.Attr("seed"); - auto sequence_length = ctx.Attr>("sequence_length"); + + bool has_seq_length = ctx.HasInput("SequenceLength"); + std::vector SequenceLength; + if (has_seq_length) { + auto *sequence_length = ctx.Input("SequenceLength"); + SequenceLength = operators::GetDataFromTensor(sequence_length); + } int seq_length = input_dims[0]; int batch_size = input->dims()[1]; @@ -213,33 +236,33 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { size_t workspace_size; size_t reserve_size; - platform::ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size, - num_layers, dropout_prob, seed, weight_numel, - true, is_bidirec); + ScopedRNNBase rnn(seq_length, batch_size, input_size, hidden_size, + num_layers, dropout_prob, seed, weight_numel, true, + is_bidirec); - rnn.Create(handle, ctx.GetPlace(), sequence_length, &workspace_size, + rnn.Create(handle, ctx.GetPlace(), SequenceLength, &workspace_size, &reserve_size, const_cast(state_out)); framework::Tensor workspace_data_; - workspace_data_.Resize({static_cast(workspace_size)}); - workspace_data_.mutable_data(ctx.GetPlace()); + workspace_data_.mutable_data( + {static_cast(workspace_size)}, ctx.GetPlace()); const uint8_t *reserve_data = reserve->data(); - if (sequence_length.empty()) { + if (!has_seq_length) { // This interface is used when the input/output is unpadded. PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardData( - handle, rnn.rnn_desc(), seq_length, rnn.y_desc(), out_data, - rnn.y_desc(), out_grad_data, rnn.hy_desc(), last_h_grad_data, - rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data, - rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, rnn.x_desc(), - in_grad_data, rnn.hx_desc(), init_h_grad_data, rnn.cx_desc(), - init_c_grad_data, workspace_data_.data(), workspace_size, - const_cast(reserve_data), reserve_size)); + handle, rnn.rnn_desc(), seq_length, rnn.y_descs(), out_data, + rnn.y_descs(), out_grad_data, rnn.last_h_desc(), last_h_grad_data, + rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data, + rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data, + rnn.x_descs(), in_grad_data, rnn.init_h_desc(), init_h_grad_data, + rnn.init_c_desc(), init_c_grad_data, workspace_data_.data(), + workspace_size, const_cast(reserve_data), reserve_size)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeights( - handle, rnn.rnn_desc(), seq_length, rnn.x_desc(), input->data(), - rnn.hx_desc(), init_h->data(), rnn.y_desc(), out->data(), - workspace_data_.data(), workspace_size, rnn.w_desc(), + handle, rnn.rnn_desc(), seq_length, rnn.x_descs(), input->data(), + rnn.init_h_desc(), init_h->data(), rnn.y_descs(), out->data(), + workspace_data_.data(), workspace_size, rnn.weight_desc(), weight_grad->data(), const_cast(reserve_data), reserve_size)); } else { @@ -248,27 +271,25 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { // This interface is used when the input/output is padded. PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardDataEx( handle, rnn.rnn_desc(), rnn.y_seq_desc(), out_data, rnn.y_seq_desc(), - out_grad_data, nullptr, nullptr, rnn.hy_desc(), last_h_grad_data, - rnn.cy_desc(), last_c_grad_data, rnn.w_desc(), weight_data, - rnn.hx_desc(), init_h_data, rnn.cx_desc(), init_c_data, - rnn.x_seq_desc(), in_grad_data, rnn.hx_desc(), init_h_grad_data, - rnn.cx_desc(), init_c_grad_data, nullptr, nullptr, + out_grad_data, nullptr, nullptr, rnn.last_h_desc(), last_h_grad_data, + rnn.last_c_desc(), last_c_grad_data, rnn.weight_desc(), weight_data, + rnn.init_h_desc(), init_h_data, rnn.init_c_desc(), init_c_data, + rnn.x_seq_desc(), in_grad_data, rnn.init_h_desc(), init_h_grad_data, + rnn.init_c_desc(), init_c_grad_data, nullptr, nullptr, workspace_data_.data(), workspace_size, const_cast(reserve_data), reserve_size)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeightsEx( handle, rnn.rnn_desc(), rnn.x_seq_desc(), input->data(), - rnn.hx_desc(), init_h->data(), rnn.y_seq_desc(), out->data(), - workspace_data_.data(), workspace_size, rnn.w_desc(), - weight_grad->data(), const_cast(reserve_data), - reserve_size)); + rnn.init_h_desc(), init_h->data(), rnn.y_seq_desc(), + out->data(), workspace_data_.data(), workspace_size, + rnn.weight_desc(), weight_grad->data(), + const_cast(reserve_data), reserve_size)); #else - PADDLE_ENFORCE_NOT_NULL( - nullptr, - platform::errors::Unavailable( - "The padded input of rnn is supported by cudnnRNNBackwardDataEx, " - "cudnnRNNBackwardWeightsEx, but it only works when the version " - "of cudnn is larger than 7.2.1")); + PADDLE_THROW(platform::errors::Unavailable( + "The padded input of rnn is supported by cudnnRNNBackwardDataEx, " + "cudnnRNNBackwardWeightsEx, but it only works when the version " + "of cudnn is larger than 7.2.1")); #endif } } diff --git a/paddle/fluid/operators/distributed_ops/allreduce_op.h b/paddle/fluid/operators/distributed_ops/allreduce_op.h index c77113ad405e991db20c035371550a1eccaa1971..e486faa575847311c2d668ada5519fe9c047f053 100644 --- a/paddle/fluid/operators/distributed_ops/allreduce_op.h +++ b/paddle/fluid/operators/distributed_ops/allreduce_op.h @@ -76,7 +76,8 @@ class AllReduceOpKernel : public framework::OpKernel { PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); } #else - PADDLE_THROW("PaddlePaddle should compile with GPU."); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "PaddlePaddle should compile with GPU.")); #endif } }; diff --git a/paddle/fluid/operators/distributed_ops/broadcast_op.cc b/paddle/fluid/operators/distributed_ops/broadcast_op.cc index 535cf7014419292863a684eaaebbf15d367671ab..61e27887b68c75f3d5c5cc48b4f1fac11d5f4eae 100644 --- a/paddle/fluid/operators/distributed_ops/broadcast_op.cc +++ b/paddle/fluid/operators/distributed_ops/broadcast_op.cc @@ -58,7 +58,8 @@ template class BroadcastOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW("Broadcast op can run on gpu place only for now."); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Broadcast op can run on gpu place only for now.")); } }; diff --git a/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc b/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc index f067840e539ac046b53be7d3bc83c783f7c8cf9c..337422f0bd643f131d5044e802851a09d6171c13 100644 --- a/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc +++ b/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc @@ -68,10 +68,11 @@ class NCCLBroadcastOpKernel : public framework::OpKernel { << " From " << root_dev_id << " to " << dev_id; if (ctx.Attr("sync_mode")) { - PADDLE_ENFORCE(cudaStreamSynchronize(stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); } #else - PADDLE_THROW("PaddlePaddle should compile with GPU."); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "PaddlePaddle should compile with GPU.")); #endif } }; diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc index b8163169734bd2c64412bab7286aca9cc5e1b830..6ec8f2c2355ee098aed4a6b92410bcc60bca4736 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc @@ -33,9 +33,12 @@ namespace operators { static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) { if (copy_to_gpu) { #ifdef PADDLE_WITH_CUDA - PADDLE_ENFORCE(cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice)); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice)); #else - PADDLE_THROW("Not compiled with cuda"); + PADDLE_THROW( + platform::errors::InvalidArgument("Check your paddle version, current " + "version is not compiled with cuda")); #endif } else { std::memcpy(dst, src, n); @@ -88,11 +91,22 @@ bool TestMain(const platform::Place &place, const framework::DDim &dims, framework::LoDTensor cpu_out; auto &out_tensor = scope.FindVar(out_name)->Get(); - PADDLE_ENFORCE(scope.kids().empty()); + PADDLE_ENFORCE_EQ(scope.kids().empty(), true, + platform::errors::InvalidArgument( + "The scope can not have the child scopes," + "please check your code.")); if (inplace) { - PADDLE_ENFORCE_EQ(&out_tensor, x); + PADDLE_ENFORCE_EQ( + &out_tensor, x, + platform::errors::InvalidArgument( + "The output tensor should be same as input x in inplace mode," + " but now is not same.")); } else { - PADDLE_ENFORCE_EQ(&out_tensor, z); + PADDLE_ENFORCE_EQ( + &out_tensor, z, + platform::errors::InvalidArgument( + "The output tensor should be same as output z in normal mode," + " but now is not same.")); } if (is_gpu_place) { diff --git a/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h b/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h index 89849ef92cd19ff5f83f2b57c65c78610d7c2c69..54e7c7d1b6aa9776f5637359b334e6304d7906ce 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h +++ b/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h @@ -92,7 +92,9 @@ class TestElementwiseOpGradGrad { auto dst_place = BOOST_GET_CONST(platform::CUDAPlace, place_); memory::Copy(dst_place, dst, src_place, src, bytes, nullptr); #else - PADDLE_THROW("Not compiled with cuda"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Check your paddle version, current version is not compiled with " + "cuda")); #endif } } @@ -107,7 +109,10 @@ class TestElementwiseOpGradGrad { op->Run(scope_, place_); platform::DeviceContextPool::Instance().Get(place_)->Wait(); framework::LoDTensor cpu_out; - PADDLE_ENFORCE_EQ(scope_.kids().empty(), true, "scope has child scopes"); + PADDLE_ENFORCE_EQ(scope_.kids().empty(), true, + platform::errors::InvalidArgument( + "The scope can not have the child scopes," + "please check your code.")); // get outputs from scope and compare them with expected_outs bool all_equal = true; diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index 28afeb6f541c68fe7e0719a782fd8c9147b15163..a99879316d684ca95e73ce8db43e988efcbab4c4 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -37,8 +37,21 @@ class GatherOp : public framework::OperatorWithKernel { "Output(Out) of GatherOp should not be null.")); auto index_dims = ctx->GetInputDim("Index"); - PADDLE_ENFORCE(index_dims.size() == 1 || - (index_dims.size() == 2 && index_dims[1] == 1)); + + if (index_dims.size() == 2) { + PADDLE_ENFORCE_EQ( + index_dims[1], 1, + platform::errors::InvalidArgument( + "The last dim of index should be 1 when it is 2D, but we get %d", + index_dims[1])); + } else { + PADDLE_ENFORCE_EQ( + index_dims.size(), 1, + platform::errors::InvalidArgument( + "The index should be 1D, when it is not 2D, but we get %d", + index_dims.size())); + } + int batch_size = ctx->GetInputDim("Index")[0]; framework::DDim output_dims(ctx->GetInputDim("X")); output_dims[0] = batch_size; diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index af737ec42f631c534bb26ad38901e03d804d07b3..9b92ce3e538aa660dedda67de0cabaa4adbdc8c7 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -43,7 +43,11 @@ class OverflowOp : public framework::OperatorWithKernel { } else if (x_var->IsType()) { dtype = x_var->Get().value().type(); } else { - PADDLE_THROW("Cannot find the input data type by all input data"); + PADDLE_ENFORCE_EQ( + true, false, + platform::errors::InvalidArgument( + "The input type mismatch, the type of Input(X) must be Tensor or " + "SelectedRows, please check your input.")); } return framework::OpKernelType(framework::proto::VarType::Type(dtype), ctx.GetPlace()); diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h index 83b080856366ac3332c5856a19b721893bb80eb3..2fc0d58669bae428d811c7200e025f36f087b905 100644 --- a/paddle/fluid/operators/isfinite_op.h +++ b/paddle/fluid/operators/isfinite_op.h @@ -57,7 +57,11 @@ class OverflowKernel : public framework::OpKernel { auto& in = ctx.Input("X")->value(); functor(in, out); } else { - PADDLE_THROW("Unsupported input type."); + PADDLE_ENFORCE_EQ( + true, false, + platform::errors::InvalidArgument( + "The input type mismatch, the type of Input(X) must be Tensor or " + "SelectedRows, please check your input.")); } } }; diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index 2c3172d2a1112e2c79a3c1215ccd0d3f08d59451..667c6e892956e29478f1401c3cb2622713433037 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -22,8 +22,6 @@ class LinspaceOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Start"), - "Input(Start) of LinspaceOp should not be null."); OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace"); diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu index 793253b6b8894de8d89b301921383ebfd53d66fc..c51e8785263b5de7a897f3865ed2dabdf93adfaa 100644 --- a/paddle/fluid/operators/linspace_op.cu +++ b/paddle/fluid/operators/linspace_op.cu @@ -63,7 +63,10 @@ class CUDALinspaceKernel : public framework::OpKernel { framework::TensorCopy(*num_t, platform::CPUPlace(), &n); int32_t num = n.data()[0]; - PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); + PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument( + "The num of linspace op should be larger " + "than 0, but received num is %d", + num)); out->Resize(framework::make_ddim({num})); T* out_data = out->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index 898f611f864dc8bfac2ba7e41b91f5f5bbe524ab..2c30a66ef8e937127fb69a459a901164934b5b13 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -46,7 +46,10 @@ class CPULinspaceKernel : public framework::OpKernel { T start = start_t.data()[0]; T stop = stop_t.data()[0]; - PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); + PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument( + "The num of linspace op should be larger " + "than 0, but received num is %d", + num)); out->Resize(framework::make_ddim({num})); diff --git a/paddle/fluid/operators/math/concat_test.cc b/paddle/fluid/operators/math/concat_test.cc index 411dbca25bb48c99dfd16779f54e46a3e80d0d4e..270a9d3f80a80d5ea2c8b97d4a69125355ddef61 100644 --- a/paddle/fluid/operators/math/concat_test.cc +++ b/paddle/fluid/operators/math/concat_test.cc @@ -79,8 +79,16 @@ void ConcatCase1(DeviceContext* context) { concat_functor(*context, input, 0, &out); // check the dim of input_a, input_b - PADDLE_ENFORCE_EQ(input_a.dims(), dim_a); - PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); + PADDLE_ENFORCE_EQ(input_a.dims(), dim_a, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_a.dims(), dim_a)); + PADDLE_ENFORCE_EQ(input_b.dims(), dim_b, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_b.dims(), dim_b)); int* out_ptr = nullptr; if (paddle::platform::is_gpu_place(Place())) { @@ -95,10 +103,14 @@ void ConcatCase1(DeviceContext* context) { int idx_a = 0, idx_b = 0; for (int j = 0; j < 5 * 3 * 4; ++j) { if (j >= cols) { - PADDLE_ENFORCE_EQ(out_ptr[j], b_ptr[idx_b]); + PADDLE_ENFORCE_EQ(out_ptr[j], b_ptr[idx_b], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_b; } else { - PADDLE_ENFORCE_EQ(out_ptr[j], a_ptr[idx_a]); + PADDLE_ENFORCE_EQ(out_ptr[j], a_ptr[idx_a], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_a; } } @@ -166,8 +178,16 @@ void ConcatCase2(DeviceContext* context) { concat_functor(*context, input, 1, &out); // check the dim of input_a, input_b - PADDLE_ENFORCE_EQ(input_a.dims(), dim_a); - PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); + PADDLE_ENFORCE_EQ(input_a.dims(), dim_a, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_a.dims(), dim_a)); + PADDLE_ENFORCE_EQ(input_b.dims(), dim_b, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_b.dims(), dim_b)); int* out_ptr = nullptr; if (paddle::platform::is_gpu_place(Place())) { @@ -183,10 +203,16 @@ void ConcatCase2(DeviceContext* context) { for (int i = 0; i < 2; ++i) { for (int j = 0; j < 28; ++j) { if (j >= cols) { - PADDLE_ENFORCE_EQ(out_ptr[i * 28 + j], b_ptr[idx_b]); + PADDLE_ENFORCE_EQ( + out_ptr[i * 28 + j], b_ptr[idx_b], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_b; } else { - PADDLE_ENFORCE_EQ(out_ptr[i * 28 + j], a_ptr[idx_a]); + PADDLE_ENFORCE_EQ( + out_ptr[i * 28 + j], a_ptr[idx_a], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_a; } } @@ -255,8 +281,16 @@ void ConcatCase3(DeviceContext* context) { concat_functor(*context, input, 2, &out); // check the dim of input_a, input_b - PADDLE_ENFORCE_EQ(input_a.dims(), dim_a); - PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); + PADDLE_ENFORCE_EQ(input_a.dims(), dim_a, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_a.dims(), dim_a)); + PADDLE_ENFORCE_EQ(input_b.dims(), dim_b, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_b.dims(), dim_b)); int* out_ptr = nullptr; if (paddle::platform::is_gpu_place(Place())) { @@ -273,10 +307,16 @@ void ConcatCase3(DeviceContext* context) { for (int i = 0; i < 6; ++i) { for (int j = 0; j < 9; ++j) { if (j >= cols) { - PADDLE_ENFORCE_EQ(out_ptr[i * 9 + j], b_ptr[idx_b]); + PADDLE_ENFORCE_EQ( + out_ptr[i * 9 + j], b_ptr[idx_b], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_b; } else { - PADDLE_ENFORCE_EQ(out_ptr[i * 9 + j], a_ptr[idx_a]); + PADDLE_ENFORCE_EQ( + out_ptr[i * 9 + j], a_ptr[idx_a], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_a; } } @@ -347,8 +387,16 @@ void ConcatCase4(DeviceContext* context) { context->Wait(); // check the dim of input_a, input_b - PADDLE_ENFORCE_EQ(input_a.dims(), dim_a); - PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); + PADDLE_ENFORCE_EQ(input_a.dims(), dim_a, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_a.dims(), dim_a)); + PADDLE_ENFORCE_EQ(input_b.dims(), dim_b, + paddle::platform::errors::InvalidArgument( + "The dims of Input tensor should be the same as the " + "declared dims. Tensor dims: [%s], declared dims: [%s]", + input_b.dims(), dim_b)); int* out_ptr = nullptr; if (paddle::platform::is_gpu_place(Place())) { @@ -365,10 +413,16 @@ void ConcatCase4(DeviceContext* context) { for (int i = 0; i < 2; ++i) { for (int j = 0; j < 24; ++j) { if (j >= cols) { - PADDLE_ENFORCE_EQ(out_ptr[i * 24 + j], b_ptr[idx_b]); + PADDLE_ENFORCE_EQ( + out_ptr[i * 24 + j], b_ptr[idx_b], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_b; } else { - PADDLE_ENFORCE_EQ(out_ptr[i * 24 + j], a_ptr[idx_a]); + PADDLE_ENFORCE_EQ( + out_ptr[i * 24 + j], a_ptr[idx_a], + paddle::platform::errors::InvalidArgument( + "Concat test failed, the result should be equal.")); ++idx_a; } } diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index e9019c6d2fe6890ee92cb5a3b047666e3c2a7e04..051c6019d74f7d2820dc0ba668da3cafe8864346 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -134,7 +134,10 @@ class ContextProjectFunctor { } } if (padding_trainable) { - PADDLE_ENFORCE_NOT_NULL(padding_data); + PADDLE_ENFORCE_NOT_NULL( + padding_data, + platform::errors::InvalidArgument( + "The input tensor 'padding_data' should not be NULL.")); for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { if (lod_level_0[i] == lod_level_0[i + 1]) continue; diff --git a/paddle/fluid/operators/math/cpu_vec.h b/paddle/fluid/operators/math/cpu_vec.h index 8940a41424b01c975f1264ca309cc09fc3c7ae85..925f3b6161ae8506107f917196e77ecb2d9c5593 100644 --- a/paddle/fluid/operators/math/cpu_vec.h +++ b/paddle/fluid/operators/math/cpu_vec.h @@ -621,7 +621,10 @@ class VecActivations { } else if (type == "identity" || type == "") { return vec_identity; } - PADDLE_THROW("Not support type: %s", type); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type should be one of sigmod, relu, tanh, identity. But got " + "not support type: %s.", + type)); } }; diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index c7fac60dd3e663088813f795352e4d751059de39..84fa0d6af990e22083ec1a0e3993893cefad1ab5 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -27,8 +27,8 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int64_t* label, const int ignore_index) { CUDA_KERNEL_LOOP(i, N) { PADDLE_ENFORCE(label[i] >= 0 && label[i] < D || label[i] == ignore_index, - "label[%d] expected >= 0 and < %ld, or == %ld, but got " - "%ld. Please check input value.", + "The value of label[%d] expected >= 0 and < %ld, or == %ld, " + "but got %ld. Please check input value.", i, D, ignore_index, label[i]); Y[i] = ignore_index == label[i] ? static_cast(0) diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 094a7237826610af574061263e5b0df5eafdf239..6fb393d791cc2a077dbcd0a912bcf31b5d59ad65 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -34,9 +34,16 @@ class Im2ColFunctor& stride, const std::vector& padding, framework::Tensor* col, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im.dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im.dims())); PADDLE_ENFORCE_EQ(col->dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col->dims())); if (stride[0] == 1 && stride[1] == 1 && dilation[0] == 1 && dilation[1] == 1) { @@ -70,9 +77,16 @@ class Col2ImFunctor& stride, const std::vector& padding, framework::Tensor* im, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im->dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im->dims())); PADDLE_ENFORCE_EQ(col.dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col.dims())); int im_channels = (data_layout != DataLayout::kNHWC ? im->dims()[0] : im->dims()[2]); int im_height = @@ -88,16 +102,16 @@ class Col2ImFunctor& stride, const std::vector& padding, framework::Tensor* col, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im.dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im.dims())); PADDLE_ENFORCE_EQ(col->dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col->dims())); int im_channels = im.dims()[0]; int im_height = im.dims()[1]; int im_width = im.dims()[2]; @@ -218,9 +239,16 @@ class Col2ImFunctor& stride, const std::vector& padding, framework::Tensor* im, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im->dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im->dims())); PADDLE_ENFORCE_EQ(col.dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col.dims())); int im_channels = im->dims()[0]; int im_height = im->dims()[1]; int im_width = im->dims()[2]; @@ -231,14 +259,14 @@ class Col2ImFunctordata(); const T* col_data = col.data(); diff --git a/paddle/fluid/operators/math/im2col.cu b/paddle/fluid/operators/math/im2col.cu index 97719300daed9c02a716f31d853e3a381312961c..f2a2148ba6954f50cf59ae30f4f4be6aa070739f 100644 --- a/paddle/fluid/operators/math/im2col.cu +++ b/paddle/fluid/operators/math/im2col.cu @@ -81,9 +81,16 @@ class Im2ColFunctor& stride, const std::vector& padding, framework::Tensor* col, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im.dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im.dims())); PADDLE_ENFORCE_EQ(col->dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col->dims())); int im_channels = (data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]); @@ -182,9 +189,16 @@ class Col2ImFunctor& stride, const std::vector& padding, framework::Tensor* im, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im->dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im->dims())); PADDLE_ENFORCE_EQ(col.dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col.dims())); int im_channels = (data_layout != DataLayout::kNHWC ? im->dims()[0] : im->dims()[2]); @@ -201,16 +215,16 @@ class Col2ImFunctor& stride, const std::vector& padding, framework::Tensor* col, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im.dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im.dims())); PADDLE_ENFORCE_EQ(col->dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col->dims())); int im_channels = im.dims()[0]; int im_height = im.dims()[1]; @@ -370,9 +391,16 @@ class Col2ImFunctor& stride, const std::vector& padding, framework::Tensor* im, const DataLayout data_layout) { - PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3."); + PADDLE_ENFORCE_EQ(im->dims().size(), 3, + platform::errors::InvalidArgument( + "The dimension of tensor 'im' should be 3. But got " + "the dims of tensor 'im' is [%s].", + im->dims())); PADDLE_ENFORCE_EQ(col.dims().size(), 5, - "The dimension of col should be 5."); + platform::errors::InvalidArgument( + "The dimension of tensor 'col' should be 5. But got " + "the dims of tensor 'col' is [%s].", + col.dims())); int im_channels = im->dims()[0]; int im_height = im->dims()[1]; @@ -386,16 +414,16 @@ class Col2ImFunctor()[i], paddle::platform::errors::InvalidArgument( - "Each value of input" - "tensor should be 10, but received %d.", + "Each value of input tensor should be 10, " + "but received %d.", t.data()[i])); } delete ctx; diff --git a/paddle/fluid/operators/math/sampler.h b/paddle/fluid/operators/math/sampler.h index de9113f2bb616b489747d8d960154f55bb988847..b90e7e1980335f1facd38738671e7986187d1ceb 100644 --- a/paddle/fluid/operators/math/sampler.h +++ b/paddle/fluid/operators/math/sampler.h @@ -33,10 +33,10 @@ namespace math { class Sampler { public: explicit Sampler(int64_t range, unsigned int seed = 0UL) : range_(range) { - PADDLE_ENFORCE_GT(range, 0, platform::errors::InvalidArgument( - "Range should be" - " greater than 0, but recevied %d.", - range)); + PADDLE_ENFORCE_GT( + range, 0, + platform::errors::InvalidArgument( + "Range should be greater than 0, but recevied %d.", range)); if (seed == 0) { std::random_device r; seed_ = r(); diff --git a/paddle/fluid/operators/math/vol2col.cc b/paddle/fluid/operators/math/vol2col.cc index c05da0062f2bab66746feb9d8ebedeca0c0f9688..794fc647172b040d4e926144a87b84eb4e5216b0 100644 --- a/paddle/fluid/operators/math/vol2col.cc +++ b/paddle/fluid/operators/math/vol2col.cc @@ -34,16 +34,15 @@ class Vol2ColFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* col, const DataLayout data_layout) const { - PADDLE_ENFORCE_EQ( - vol.dims().size(), 4, - platform::errors::InvalidArgument("The dimension of" - " vol should be 4, but received %d.", - vol.dims().size())); - PADDLE_ENFORCE_EQ( - col->dims().size(), 7, - platform::errors::InvalidArgument("The dimension of" - "col should be 7, but received %d.", - col->dims().size())); + PADDLE_ENFORCE_EQ(vol.dims().size(), 4, + platform::errors::InvalidArgument( + "The dimension of vol should be 4, but received %d.", + vol.dims().size())); + + PADDLE_ENFORCE_EQ(col->dims().size(), 7, + platform::errors::InvalidArgument( + "The dimension of col should be 7, but received %d.", + col->dims().size())); int input_channels = (data_layout != DataLayout::kNHWC ? vol.dims()[0] : vol.dims()[3]); @@ -152,16 +151,15 @@ class Col2VolFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* vol, const DataLayout data_layout) const { - PADDLE_ENFORCE_EQ( - vol->dims().size(), 4, - platform::errors::InvalidArgument("The dimension of vol" - " should be 4, but received %d.", - vol->dims().size())); - PADDLE_ENFORCE_EQ( - col.dims().size(), 7, - platform::errors::InvalidArgument("The dimension of col" - " should be 7, but received %d.", - col.dims().size())); + PADDLE_ENFORCE_EQ(vol->dims().size(), 4, + platform::errors::InvalidArgument( + "The dimension of vol should be 4, but received %d.", + vol->dims().size())); + + PADDLE_ENFORCE_EQ(col.dims().size(), 7, + platform::errors::InvalidArgument( + "The dimension of col should be 7, but received %d.", + col.dims().size())); int input_channels = (data_layout != DataLayout::kNHWC ? vol->dims()[0] : vol->dims()[3]); @@ -192,29 +190,29 @@ class Col2VolFunctor { ((dilations[0] * (filter_depth - 1) + 1))) / strides[0] + 1; - PADDLE_ENFORCE_EQ(input_depth_tmp, output_depth, - platform::errors::InvalidArgument( - "input_depth(%d)" - " and output_depth(%d) are mismatching.", - input_depth_tmp, output_depth)); + PADDLE_ENFORCE_EQ( + input_depth_tmp, output_depth, + platform::errors::InvalidArgument( + "input_depth(%d) and output_depth(%d) are mismatching.", + input_depth_tmp, output_depth)); auto input_height_tmp = (input_height + pad_h_up + pad_h_down - ((dilations[1] * (filter_height - 1) + 1))) / strides[1] + 1; - PADDLE_ENFORCE_EQ(input_height_tmp, output_height, - platform::errors::InvalidArgument( - "input_height(%d)" - " and output_height(%d) are mismatching.", - input_height_tmp, output_height)); + PADDLE_ENFORCE_EQ( + input_height_tmp, output_height, + platform::errors::InvalidArgument( + "input_height(%d) and output_height(%d) are mismatching.", + input_height_tmp, output_height)); auto input_width_tmp = (input_width + pad_w_left + pad_w_right - ((dilations[2] * (filter_width - 1) + 1))) / strides[2] + 1; - PADDLE_ENFORCE_EQ(input_width_tmp, output_width, - platform::errors::InvalidArgument( - "input_width(%d)" - " and output_width(%d) are mismatching.", - input_width_tmp, output_width)); + PADDLE_ENFORCE_EQ( + input_width_tmp, output_width, + platform::errors::InvalidArgument( + "input_width(%d) and output_width(%d) are mismatching.", + input_width_tmp, output_width)); T* vol_data = vol->data(); const T* col_data = col.data(); diff --git a/paddle/fluid/operators/math/vol2col.cu b/paddle/fluid/operators/math/vol2col.cu index fe5a600909893b8313d470923ef4d43eae155e76..eca39e919737210267d7af1856903d3e1fc697d1 100644 --- a/paddle/fluid/operators/math/vol2col.cu +++ b/paddle/fluid/operators/math/vol2col.cu @@ -90,16 +90,14 @@ class Vol2ColFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* col, const DataLayout data_layout) const { - PADDLE_ENFORCE_EQ( - vol.dims().size(), 4, - platform::errors::InvalidArgument("The dimension of" - " vol should be 4, but received %d.", - vol.dims().size())); - PADDLE_ENFORCE_EQ( - col->dims().size(), 7, - platform::errors::InvalidArgument("The dimension of" - "col should be 7, but received %d.", - col->dims().size())); + PADDLE_ENFORCE_EQ(vol.dims().size(), 4, + platform::errors::InvalidArgument( + "The dimension of vol should be 4, but received %d.", + vol.dims().size())); + PADDLE_ENFORCE_EQ(col->dims().size(), 7, + platform::errors::InvalidArgument( + "The dimension of col should be 7, but received %d.", + col->dims().size())); int input_channels = (data_layout != DataLayout::kNHWC ? vol.dims()[0] : vol.dims()[3]); @@ -253,16 +251,14 @@ class Col2VolFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* vol, const DataLayout data_layout) const { - PADDLE_ENFORCE_EQ( - vol->dims().size(), 4, - platform::errors::InvalidArgument("The dimension of vol" - " should be 4, but received %d.", - vol->dims().size())); - PADDLE_ENFORCE_EQ( - col.dims().size(), 7, - platform::errors::InvalidArgument("The dimension of col" - " should be 7, but received %d.", - col.dims().size())); + PADDLE_ENFORCE_EQ(vol->dims().size(), 4, + platform::errors::InvalidArgument( + "The dimension of vol should be 4, but received %d.", + vol->dims().size())); + PADDLE_ENFORCE_EQ(col.dims().size(), 7, + platform::errors::InvalidArgument( + "The dimension of col should be 7, but received %d.", + col.dims().size())); int input_channels = (data_layout != DataLayout::kNHWC ? vol->dims()[0] : vol->dims()[3]); @@ -291,29 +287,29 @@ class Col2VolFunctor { ((dilations[0] * (filter_depth - 1) + 1))) / strides[0] + 1; - PADDLE_ENFORCE_EQ(input_depth_tmp, output_depth, - platform::errors::InvalidArgument( - "input_depth(%d)" - " and output_depth(%d) are mismatching.", - input_depth_tmp, output_depth)); + PADDLE_ENFORCE_EQ( + input_depth_tmp, output_depth, + platform::errors::InvalidArgument( + "input_depth(%d) and output_depth(%d) are mismatching.", + input_depth_tmp, output_depth)); auto input_height_tmp = (input_height + pad_h_up + pad_h_down - ((dilations[1] * (filter_height - 1) + 1))) / strides[1] + 1; - PADDLE_ENFORCE_EQ(input_height_tmp, output_height, - platform::errors::InvalidArgument( - "input_height(%d)" - " and output_height(%d) are mismatching.", - input_height_tmp, output_height)); + PADDLE_ENFORCE_EQ( + input_height_tmp, output_height, + platform::errors::InvalidArgument( + "input_height(%d) and output_height(%d) are mismatching.", + input_height_tmp, output_height)); auto input_width_tmp = (input_width + pad_w_left + pad_w_right - ((dilations[2] * (filter_width - 1) + 1))) / strides[2] + 1; - PADDLE_ENFORCE_EQ(input_width_tmp, output_width, - platform::errors::InvalidArgument( - "input_width(%d)" - " and output_width(%d) are mismatching.", - input_width_tmp, output_width)); + PADDLE_ENFORCE_EQ( + input_width_tmp, output_width, + platform::errors::InvalidArgument( + "input_width(%d) and output_width(%d) are mismatching.", + input_width_tmp, output_width)); int num_kernels = input_channels * input_depth * input_height * input_width; diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 3cafb0e9fc6147626f066bbeba1b10d074a37b87..b2815cbdc65b53beba9cdb1864d10875d5db5e62 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -86,8 +86,10 @@ class ConcatPrimitiveFactory { concat CreateConcatPrimitive(const concat::primitive_desc& concat_pd, Tensor* output, platform::CPUPlace place, const mkldnn::engine& mkldnn_engine) { - dst_mem = mkldnn::memory(concat_pd.dst_desc(), mkldnn_engine, - output->mutable_data(place)); + dst_mem = mkldnn::memory( + concat_pd.dst_desc(), mkldnn_engine, + output->mutable_data(place, concat_pd.dst_desc().get_size())); + return concat(concat_pd); } @@ -193,7 +195,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { prim_creator.SetSrcDataHandleByIndex( *srcs, i, to_void_cast(multi_input[i]->data())); } - prim_creator.SetDstDataHandle(*dst_mem, output->mutable_data(place)); + prim_creator.SetDstDataHandle( + *dst_mem, + output->mutable_data(place, concat_pd->dst_desc().get_size())); } mkldnn::stream astream(mkldnn_engine); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 29a86a35d7b26f41745907fb6bacf30506c027a0..a6c8f8656a4e252f1a1eedb6d67ca322f0747a66 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -48,6 +48,7 @@ class QuantOpKernel : public framework::OpKernel { const T* input_data = input->data(); bool is_negative = ctx.Attr("is_negative_input"); + bool bfloat16 = ctx.Attr("bfloat16"); std::string key = platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data, is_negative, ctx.OutputName("Output")); @@ -74,7 +75,10 @@ class QuantOpKernel : public framework::OpKernel { src_md, engine, to_void_cast(input_data)); std::shared_ptr dst_md; - if (is_negative) { + if (bfloat16) { + platform::SetDstMemoryQuantized( + ctx, output, dst_tz, engine, dst_md, dst_memory, out_format); + } else if (is_negative) { platform::SetDstMemoryQuantized(ctx, output, dst_tz, engine, dst_md, dst_memory, out_format); } else { @@ -96,7 +100,11 @@ class QuantOpKernel : public framework::OpKernel { dst_memory = std::static_pointer_cast( dev_ctx.GetBlob(key_dst_mem)); auto place = ctx.GetPlace(); - if (is_negative) { + + if (bfloat16) { + dst_memory->set_data_handle( + output->mutable_data(place)); + } else if (is_negative) { dst_memory->set_data_handle(output->mutable_data(place)); } else { dst_memory->set_data_handle(output->mutable_data(place)); diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc index 8924e21b46f49b0fd0ec72e6acc7463d7d574d6f..602fdc6ff67787ace488379a2730dad4b8ffe1b1 100644 --- a/paddle/fluid/operators/quantize_op.cc +++ b/paddle/fluid/operators/quantize_op.cc @@ -40,6 +40,8 @@ void QuantOpMaker::Make() { AddAttr("output_format", "Convert format to NHWC or NCHW during quantization.") .SetDefault("NHWC"); + AddAttr("bfloat16", "(bool, default false) Convert to bfloat16") + .SetDefault(false); AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC"); } diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h index 64ee868fb6d8b1cf55f6400a28c10038efc7884e..11c81d23b2ed271ce89e6a27b1179e7d06dd0ebd 100644 --- a/paddle/fluid/operators/scale_op.h +++ b/paddle/fluid/operators/scale_op.h @@ -60,7 +60,10 @@ class ScaleKernel : public framework::OpKernel { out->mutable_data(in->place()); PADDLE_ENFORCE_EQ(in->dims(), out->dims(), - "in and out should have the same dim"); + paddle::platform::errors::InvalidArgument( + "the input and output should have the same dim" + "but input dim is %s, output dim is %s", + in->dims(), out->dims())); auto eigen_out = framework::EigenVector::Flatten(*out); auto eigen_in = framework::EigenVector::Flatten(*in); diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 013170199a6bbe0246406b9c35a326bd063875a9..1186ed891e8c080c023aae5076cf1cb086fbc231 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -92,9 +92,11 @@ class SequenceExpandKernel : public framework::OpKernel { auto& x_lod = x->lod(); auto& y_lod = y->lod(); - PADDLE_ENFORCE_EQ(y_lod.empty(), false, - "Input(Y) Tensor of SequenceExpandOp does not contain " - "LoD information."); + PADDLE_ENFORCE_EQ( + y_lod.empty(), false, + platform::errors::InvalidArgument( + "Input(Y) Tensor of SequenceExpandOp does not contain " + "LoD information.")); if (ref_level == -1) ref_level = y_lod.size() - 1; diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index b06e8202cc79f017e26e3c8339ad05951a5a2bf7..52c4c63b473c443bb97fb7962179ce27e06fb16c 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -186,10 +186,17 @@ class SumOp : public framework::OperatorWithKernel { } } } - PADDLE_THROW("Cannot find the input data type by all input data"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected each tensor in Input(x) in sum op has be initialized, but " + "some tensor in Input(x) is not be initialized, please check your " + "code.", + framework::ToTypeName(x_vars[0]->Type()))); } - PADDLE_THROW("Unexpected branch. Input type is %s", - framework::ToTypeName(x_vars[0]->Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type of Input(X) must be Tensor, SelectedRows or " + "LodTensorArray. But got " + "unsupport type: %s.", + framework::ToTypeName(x_vars[0]->Type()))); } }; diff --git a/paddle/fluid/operators/sum_op.cu b/paddle/fluid/operators/sum_op.cu index d0bf3a0abf58c47720216bd839eb84260ac207d8..6034cda50c32a857d8a501bf243a91df2f966eea 100644 --- a/paddle/fluid/operators/sum_op.cu +++ b/paddle/fluid/operators/sum_op.cu @@ -169,8 +169,18 @@ void SumToLoDTensor(const framework::ExecutionContext &context) { auto row_numel = sr_value.numel() / sr_rows.size(); auto out_dims = out->dims(); - PADDLE_ENFORCE_EQ(sr.height(), out_dims[0]); - PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height()); + PADDLE_ENFORCE_EQ(sr.height(), out_dims[0], + platform::errors::InvalidArgument( + "The table height of input must be same as output, " + "but received input height is %d" + ", output height is %d", + sr.height(), out_dims[0])); + PADDLE_ENFORCE_EQ(row_numel, out->numel() / sr.height(), + platform::errors::InvalidArgument( + "The table width of input must be same as output, " + "but received input width is %d" + ", output width is %d", + row_numel, out->numel() / sr.height())); auto *sr_data = sr_value.data(); auto *sr_out_data = out->data(); @@ -231,8 +241,11 @@ class SumKernel } else if (out_var->IsType()) { LodTensorArrayCompute(context); } else { - PADDLE_THROW("Unexpected branch, output variable type is %s", - framework::ToTypeName(out_var->Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type of Ouput(out) must be Tensor, SelectedRows or " + "LodTensorArray. But got " + "unsupport type: %s.", + framework::ToTypeName(out_var->Type()))); } } }; diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 6847a81377979ab05aec03f43ba08fbec646d974..4c8f7be6ea26394bd3143058260c1fc94ce1e7e1 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -182,7 +182,11 @@ class SumKernel : public framework::OpKernel { auto &in_t = in_vars[i]->Get(); functor(context.template device_context(), in_t, out); } else { - PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type of Input(X) of %d-th must be Tensor, " + "SelectedRows. But got " + "unsupport type: %s.", + framework::ToTypeName(in_vars[i]->Type()))); } } } else if (out_var->IsType()) { @@ -190,8 +194,11 @@ class SumKernel : public framework::OpKernel { } else if (out_var->IsType()) { LodTensorArrayCompute(context); } else { - PADDLE_THROW("Unexpected branch, output variable type is %s", - framework::ToTypeName(out_var->Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type of Output(out) must be Tensor, SelectedRows, " + "LoDTensorArray. But got " + "unsupport type: %s.", + framework::ToTypeName(out_var->Type()))); } } }; diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index 9cffe09a33abf29308072d6b3c8bfb8a636048da..6efada4343ca54c0d56f98cae20963bf0182f47b 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -54,9 +54,11 @@ class CPUUniformRandomKernel : public framework::OpKernel { tensor = out_var->GetMutable(); if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape)); } else { - PADDLE_THROW( - "uniform_random_op's output only" - "supports SelectedRows and LoDTensor"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type of Output(out) in uniform_random_op must be Tensor, " + "SelectedRows. But got " + "unsupport type: %s.", + framework::ToTypeName(out_var->Type()))); } T *data = tensor->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu index 6237137cccbc6840b345c9e26dda1ccdc8df43b0..563a6c165b748543516eabbcdb0e1c8b9be8a44d 100644 --- a/paddle/fluid/operators/uniform_random_op.cu +++ b/paddle/fluid/operators/uniform_random_op.cu @@ -116,9 +116,11 @@ class GPUUniformRandomKernel : public framework::OpKernel { tensor = out_var->GetMutable(); if (!new_shape.empty()) tensor->Resize(framework::make_ddim(new_shape)); } else { - PADDLE_THROW( - "uniform_random_op's output only" - "supports SelectedRows and LoDTensor"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected type of Output(out) in uniform_random_op must be Tensor, " + "SelectedRows. But got " + "unsupport type: %s.", + framework::ToTypeName(out_var->Type()))); } T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index d263dd03dd0de0d1b12925d0c3ec428b6730ef2e..6052e533643f3c4e5be977a87fceafa932892862 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -50,7 +50,10 @@ inline std::vector GetNewDataFromShapeTensor( } return vec_new_data; } else { - PADDLE_THROW("The dtype of shape tensor must be int32 or int64."); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected dtype of ShapeTensor must be int32, int64. But got " + "unsupport dtype: %s.", + paddle::framework::DataTypeToString(new_data_tensor->type()))); } } @@ -84,7 +87,11 @@ inline std::vector GetNewDataFromShapeTensorList( vec_new_shape.push_back(*tensor->data()); } } else { - PADDLE_THROW("The dtype of shape tensor must be int32 or int64."); + PADDLE_THROW(platform::errors::InvalidArgument( + "Expected dtype of ShapeTensorList of %d-th must be int32, int64. " + "But got " + "unsupport dtype: %s.", + i, paddle::framework::DataTypeToString(tensor->type()))); } } diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index bbe847e7190d6f9812dcc814d4b4fe74a0cc7ef6..bb4c2a89f6fa5e531aa322b69218cf58d3e94285 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -287,6 +287,8 @@ class ScopedTensorDescriptor { return descriptor(CudnnDataType::type, dim, stride); } + inline cudnnTensorDescriptor_t desc() { return desc_; } + private: cudnnTensorDescriptor_t desc_; DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor); @@ -329,6 +331,8 @@ class ScopedRNNTensorDescriptor { input_size, time_major, seq_length); } + inline cudnnRNNDataDescriptor_t desc() { return desc_; } + private: cudnnRNNDataDescriptor_t desc_; DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor); @@ -361,6 +365,7 @@ class ScopedDropoutDescriptor { } return desc_; } + inline cudnnDropoutDescriptor_t desc() { return desc_; } private: cudnnDropoutDescriptor_t desc_; @@ -376,7 +381,7 @@ class ScopedRNNDescriptor { PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_)); } - inline cudnnRNNDescriptor_t descriptor() { return desc_; } + inline cudnnRNNDescriptor_t desc() { return desc_; } private: cudnnRNNDescriptor_t desc_; @@ -419,172 +424,13 @@ class ScopedFilterDescriptor { kernel, groups); } + inline cudnnFilterDescriptor_t desc() { return desc_; } + private: cudnnFilterDescriptor_t desc_; DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor); }; -class ScopedRNNBase { - public: - ScopedRNNBase(int seq_length, int batch_size, int input_size, int hidden_size, - int num_layers, float dropout_prob, int seed, int weight_numel, - bool initialized, bool is_bidirec) - : seq_length_(seq_length), - batch_size_(batch_size), - input_size_(input_size), - hidden_size_(hidden_size), - num_layers_(num_layers), - dropout_prob_(dropout_prob), - seed_(seed), - weight_numel_(weight_numel), - initialized_(initialized), - is_bidirec_(is_bidirec) {} - - template - void Create(const cudnnHandle_t& handle, const platform::Place& place, - std::vector sequence_length, size_t* workspace_size, - size_t* reserve_size, framework::Tensor* dropout_state) { - int numDirections = is_bidirec_ ? 2 : 1; - cudnnDataType_t cudnn_type = platform::CudnnDataType::type; - - // ------------------- cudnn x, y descriptors --------------------- - std::vector dims_x = {batch_size_, input_size_, 1}; - std::vector strides_x = {input_size_, 1, 1}; - - std::vector dims_y = {batch_size_, hidden_size_ * numDirections, 1}; - std::vector strides_y = {hidden_size_ * numDirections, 1, 1}; - - for (int i = 0; i < seq_length_; ++i) { - x_desc_.emplace_back(x_d.descriptor(dims_x, strides_x)); - y_desc_.emplace_back(y_d.descriptor(dims_y, strides_y)); - } - - if (!sequence_length.empty()) { - x_seq_desc_ = x_seq_d.descriptor(seq_length_, batch_size_, input_size_, - true, sequence_length); - y_seq_desc_ = y_seq_d.descriptor(seq_length_, batch_size_, - hidden_size_ * numDirections, true, - sequence_length); - } - - // ------------------- cudnn hx, hy, cx, cy descriptors---------- - std::vector dims_hx = {num_layers_ * numDirections, batch_size_, - hidden_size_}; - std::vector strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1}; - - hx_desc_ = hx_d.descriptor(dims_hx, strides_hx); - cx_desc_ = cx_d.descriptor(dims_hx, strides_hx); - hy_desc_ = hy_d.descriptor(dims_hx, strides_hx); - cy_desc_ = cy_d.descriptor(dims_hx, strides_hx); - - // ------------------- cudnn dropout descriptors --------------------- - size_t state_size; - if (!initialized_) { - PADDLE_ENFORCE_CUDA_SUCCESS( - dynload::cudnnDropoutGetStatesSize(handle, &state_size)); - dropout_state->mutable_data({static_cast(state_size)}, - place); - } - dropout_desc_ = - dropout_d.descriptor(handle, place, initialized_, dropout_prob_, - dropout_state, seed_, state_size); - - // ------------------- cudnn rnn descriptors --------------------- - rnn_desc_ = rnn_d.descriptor(); - -#if CUDNN_VERSION >= 6000 - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor_v6( - handle, rnn_desc_, hidden_size_, num_layers_, dropout_desc_, - CUDNN_LINEAR_INPUT, - is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, - CUDNN_RNN_ALGO_STANDARD, cudnn_type)); -#else - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor( - rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT, - is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, - cudnn_type)); -#endif - if (!sequence_length.empty()) { - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode( - rnn_desc_, CUDNN_RNN_PADDED_IO_ENABLED)); - } - // ------------------- cudnn weights_size --------------------- - size_t weights_size_; - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize( - handle, rnn_desc_, x_desc_[0], &weights_size_, cudnn_type)); - - PADDLE_ENFORCE_EQ( - weights_size_, sizeof(T) * weight_numel_, - platform::errors::InvalidArgument( - "The cudnn lstm and setting weight size should be same.")); - - // ------------------- cudnn weight descriptors --------------------- - platform::DataLayout layout = platform::DataLayout::kNCHW; - int dim_tmp = weights_size_ / sizeof(T); - std::vector dim_w = {dim_tmp, 1, 1}; - w_desc_ = w_d.descriptor(layout, dim_w); - - // ------------------- cudnn workspace, reserve size --------------------- - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize( - handle, rnn_desc_, seq_length_, x_desc_.data(), workspace_size)); - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnGetRNNTrainingReserveSize( - handle, rnn_desc_, seq_length_, x_desc_.data(), reserve_size)); - } - - cudnnTensorDescriptor_t* x_desc() { return x_desc_.data(); } - cudnnTensorDescriptor_t* y_desc() { return y_desc_.data(); } - cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_; } - cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_; } - cudnnTensorDescriptor_t hx_desc() { return hx_desc_; } - cudnnTensorDescriptor_t cx_desc() { return cx_desc_; } - cudnnTensorDescriptor_t hy_desc() { return hy_desc_; } - cudnnTensorDescriptor_t cy_desc() { return cy_desc_; } - cudnnRNNDescriptor_t rnn_desc() { return rnn_desc_; } - cudnnDropoutDescriptor_t dropout_desc() { return dropout_desc_; } - cudnnFilterDescriptor_t w_desc() { return w_desc_; } - - private: - int seq_length_; - int batch_size_; - int input_size_; - int hidden_size_; - int num_layers_; - float dropout_prob_; - int seed_; - int weight_numel_; - bool initialized_; - bool is_bidirec_; - - std::vector x_desc_; - std::vector y_desc_; - cudnnRNNDataDescriptor_t x_seq_desc_; - cudnnRNNDataDescriptor_t y_seq_desc_; - // A tensor descriptor describing the initial hidden state of the RNN. - cudnnTensorDescriptor_t hx_desc_; - // A tensor descriptor describing the initial cell state for LSTM networks. - cudnnTensorDescriptor_t cx_desc_; - // A tensor descriptor describing the final hidden state of the RNN. - cudnnTensorDescriptor_t hy_desc_; - // A tensor descriptor describing the final cell state for LSTM networks. - cudnnTensorDescriptor_t cy_desc_; - cudnnDropoutDescriptor_t dropout_desc_; - cudnnFilterDescriptor_t w_desc_; - cudnnRNNDescriptor_t rnn_desc_; - - ScopedTensorDescriptor x_d; - ScopedTensorDescriptor y_d; - ScopedRNNTensorDescriptor x_seq_d; - ScopedRNNTensorDescriptor y_seq_d; - ScopedTensorDescriptor hx_d; - ScopedTensorDescriptor cx_d; - ScopedTensorDescriptor hy_d; - ScopedTensorDescriptor cy_d; - ScopedDropoutDescriptor dropout_d; - ScopedFilterDescriptor w_d; - ScopedRNNDescriptor rnn_d; -}; - class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 8fb66c6f34bd8453f1aceb731bb1cd94b8e75a69..b012a103ea3031efb381d7039b15e82b2af52bf7 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -443,6 +443,13 @@ inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) { op->GetAttrIfExists("use_quantizer")); } +inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) { + return op->GetAttrIfExists("mkldnn_data_type") == "bfloat16"; +} + +inline bool HasOpFLOAT32DataType(const paddle::framework::OpDesc* op) { + return op->GetAttrIfExists("mkldnn_data_type") == "float32"; +} enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP }; } // namespace platform diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index d733cf26ed209bcb86eaf2d366e45cfa0e7f9a90..92d9473141009216e3c7e64ccb793884dc67aadc 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -38,6 +38,7 @@ set(PYBIND_SRCS imperative.cc ir.cc inference_api.cc + compatible.cc generator_py.cc) if(WITH_GLOO) diff --git a/paddle/fluid/pybind/compatible.cc b/paddle/fluid/pybind/compatible.cc new file mode 100644 index 0000000000000000000000000000000000000000..971d230458db4bc2196ca529e01b0586da79567c --- /dev/null +++ b/paddle/fluid/pybind/compatible.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pybind/compatible.h" + +#include +#include + +#include "paddle/fluid/framework/op_version_registry.h" + +namespace py = pybind11; + +using paddle::framework::compatible::PassVersionCheckerRegistrar; + +namespace paddle { +namespace pybind { + +void BindCompatible(py::module* m) { + py::class_(*m, "PassVersionChecker") + .def_static("IsCompatible", [](const std::string& name) -> bool { + auto instance = PassVersionCheckerRegistrar::GetInstance(); + return instance.IsPassCompatible(name); + }); +} + +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/compatible.h b/paddle/fluid/pybind/compatible.h new file mode 100644 index 0000000000000000000000000000000000000000..f9d4cf5888fee8f62ce2e64636da6b98542b1a75 --- /dev/null +++ b/paddle/fluid/pybind/compatible.h @@ -0,0 +1,23 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace paddle { +namespace pybind { +void BindCompatible(pybind11::module *m); +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 9950eb9adc241ca5c82b4b0289dd57da4195e558..97056eca411f29e9a2c379cbcb2f88775242f692 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -184,6 +184,7 @@ void BindVarDsec(pybind11::module *m) { .value("FP16", pd::proto::VarType::FP16) .value("FP32", pd::proto::VarType::FP32) .value("FP64", pd::proto::VarType::FP64) + .value("BF16", pd::proto::VarType::BF16) .value("LOD_TENSOR", pd::proto::VarType::LOD_TENSOR) .value("SELECTED_ROWS", pd::proto::VarType::SELECTED_ROWS) .value("FEED_MINIBATCH", pd::proto::VarType::FEED_MINIBATCH) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 4b8f7c853ceaf2148722a9c65f38e0ec3d9f4df5..330254ecaafd29c00e8942765956ea065d2bb7cf 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -60,6 +60,7 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/box_helper_py.h" +#include "paddle/fluid/pybind/compatible.h" #include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/data_set_py.h" #include "paddle/fluid/pybind/exception.h" @@ -2619,6 +2620,7 @@ All parameter, weight, gradient are variables in Paddle. BindGraph(&m); BindNode(&m); BindInferenceApi(&m); + BindCompatible(&m); BindDataset(&m); BindGenerator(&m); #ifdef PADDLE_WITH_CRYPTO diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 15610abef0f2d07eeb02e37bb0d4cbf394c94d90..9e150763dbb30ec6196ce2e62d28f737f42185fb 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -51,6 +51,17 @@ if %ERRORLEVEL% NEQ 0 ( exit /b 7 ) +rem ------pre install clcache and init config---------- +pip install clcache +:: set USE_CLCACHE to enable clcache +set USE_CLCACHE=1 +:: In some scenarios, CLCACHE_HARDLINK can save one file copy. +set CLCACHE_HARDLINK=1 +:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported +set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 +:: set maximum cache size to 20G +clcache.exe -M 21474836480 + rem ------initialize common variable------ if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" if not defined BRANCH set BRANCH=develop @@ -173,7 +184,7 @@ echo Build third_party successfully! set build_times=1 :build_paddle echo Build Paddle the %build_times% time: -msbuild /m:%PARALLEL_PROJECT_COUNT% /p:Configuration=Release /verbosity:minimal paddle.sln +msbuild /m:%PARALLEL_PROJECT_COUNT% /p:TrackFileAccess=false /p:CLToolExe=clcache.exe /p:CLToolPath=%PYTHON_ROOT%\Scripts /p:Configuration=Release /verbosity:minimal paddle.sln if %ERRORLEVEL% NEQ 0 ( set /a build_times=%build_times%+1 if %build_times% GTR 2 ( diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index ed0b415d0bfd86b5160d339a286cfddac37cf4df..016726633ea355ed20149e94833ca7e1657c3f7d 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -49,6 +49,7 @@ import paddle.optimizer import paddle.metric import paddle.device import paddle.incubate.complex as complex +import paddle.regularizer # TODO: define alias in tensor and framework directory diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py index b7357eef7ad9a3abae7f9c1c09fdc00b409061ad..27c82227316309b370aefe5e0550230c3f703c8c 100644 --- a/python/paddle/distributed/__init__.py +++ b/python/paddle/distributed/__init__.py @@ -21,6 +21,7 @@ from .parallel import get_rank from .parallel import get_world_size from paddle.fluid.dygraph.parallel import prepare_context #DEFINE_ALIAS from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS +from paddle.distributed.fleet.dataset import * from . import collective from .collective import * @@ -30,11 +31,8 @@ __all__ = ["spawn"] # dygraph parallel apis __all__ += [ - "init_parallel_env", - "get_rank", - "get_world_size", - "prepare_context", - "ParallelEnv", + "init_parallel_env", "get_rank", "get_world_size", "prepare_context", + "ParallelEnv", "InMemoryDataset", "QueueDataset" ] # collective apis diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py index 345b783d60bb79e99c98c4e9d212aa11cbe91dcc..5b7268e4b64fe34e6376819a7ac5659d1a5f5959 100644 --- a/python/paddle/distributed/cloud_utils.py +++ b/python/paddle/distributed/cloud_utils.py @@ -19,7 +19,7 @@ from paddle.distributed.utils import get_cluster, logger def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus): """ - args_node_ips, args_node_ip:string + args_node_ips:string, args_node_ip:string, args_port: int, selected_gpus:list """ #you can automatically get ip info while using paddlecloud multi nodes mode. node_ips = os.getenv("PADDLE_TRAINERS") @@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_gpus): node_rank = os.getenv("PADDLE_TRAINER_ID") assert node_rank is not None, "PADDLE_TRAINER_ID should not be None" + paddle_ports_num = int(os.getenv("TRAINER_PORTS_NUM")) + assert paddle_ports_num is not None, "TRAINER_PORTS_NUM should not be None" + node_ips = node_ips.split(",") num_nodes = len(node_ips) node_rank = int(node_rank) @@ -47,32 +50,47 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\ Your input cluster_node_ips: {} doesn't equals to IPs: {} from \ paddlecloud environment.".format(args_node_ips, node_ips)) - started_port = args_port - print("num_nodes:", num_nodes) - if num_nodes > 1: - try: - paddle_port = int(os.getenv("PADDLE_PORT", "")) - paddle_port_num = int(os.getenv("TRAINER_PORTS_NUM", "")) - - if paddle_port_num >= len( - selected_gpus) and paddle_port != args_port: - logger.warning("Use Cloud specified port:{}.".format( - paddle_port)) - started_port = paddle_port - - except Exception as e: - print(e) - pass - - if started_port is None: - started_port = 6170 - - logger.debug("parsed from args:node_ips:{} \ - node_ip:{} node_rank:{} started_port:{}" - .format(node_ips, node_ip, node_rank, started_port)) - - ports = [x for x in range(started_port, started_port + len(selected_gpus))] - cluster, pod = get_cluster(node_ips, node_ip, ports, selected_gpus) + # DISTRIBUTED_TRAINER_ENDPOINTS: new environment since paddlecloud 1.8.4 + # e.g: DISTRIBUTED_TRAINER_ENDPOINTS="ip1:port1,ip1:port2,ip1:port3,ip1:port4,ip2:port5,ip2:port6,ip2:port7,ip2:port8" + trainer_endpoints = os.getenv("DISTRIBUTED_TRAINER_ENDPOINTS") + if trainer_endpoints is None: + started_port = args_port + if num_nodes > 1: + try: + paddle_port = int(os.getenv("PADDLE_PORT", "")) + + if paddle_ports_num >= len( + selected_gpus) and paddle_port != args_port: + logger.warning("Use Cloud specified port:{}.".format( + paddle_port)) + started_port = paddle_port + + except Exception as e: + print(e) + pass + + if started_port is None: + started_port = 6170 + ports = [ + x for x in range(started_port, started_port + len(selected_gpus)) + ] + trainer_endpoints = [] + for ip in node_ips: + trainer_endpoints.append(["%s:%d" % (ip, port) for port in ports]) + else: + trainer_endpoints_ori = trainer_endpoints.split(",") + trainer_endpoints = [] + assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori) + for i in range(num_nodes): + trainer_endpoints.append(trainer_endpoints_ori[ + i * paddle_ports_num:(i + 1) * paddle_ports_num]) + + logger.debug("parsed from args: node_ips:{} \ + node_ip:{} node_rank:{} trainer_endpoints:{}" + .format(node_ips, node_ip, node_rank, trainer_endpoints)) + + cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints, + selected_gpus) return cluster, cluster.pods[node_rank] diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 5f0cf9f93d62eba9b81e8a834b52f84122f2702d..2539fa57a34b1fe6fdea6b6b847d52f765df3fa3 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -23,7 +23,6 @@ from .dataset import * __all__ = [ "DistributedStrategy", "UtilBase", - "DatasetFactory", "UserDefinedRoleMaker", "PaddleCloudRoleMaker", "Fleet", diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 1b86056c00443be4170757cee3cc60bbafd0f40b..f1c836468daf36db753c67a3e09757be728d37a7 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -728,6 +728,63 @@ class DistributedStrategy(object): "localsgd_configs") assign_configs_value(self.strategy.localsgd_configs, configs) + @property + def adaptive_localsgd(self): + """ + Indicating whether we are using Adaptive Local SGD training. Default Value: False + For more details, please refer to `Adaptive Communication Strategies to Achieve + the Best Error-Runtime Trade-off in Local-Update SGD `_. + + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.adaptive_localsgd = True # by default this is false + + """ + return self.strategy.localsgd + + @adaptive_localsgd.setter + @is_strict_auto + def adaptive_localsgd(self, flag): + if isinstance(flag, bool): + self.strategy.localsgd = flag + else: + print("WARNING: adaptive_localsgd should have value of bool type") + + @property + def adaptive_localsgd_configs(self): + """ + Set AdaptiveLocalSGD training configurations. AdaptiveLocalSGD has a configurable + setting that can be configured through a dict. + + **Notes**: + init_k_steps(int) The initial steps for training before adaptive localsgd. + Then, the adaptive localsgd method will modify init_k_steps automatically. + Default 1. + begin_step(int) The step of begining training by adaptive localsgd. Default 1. + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.adaptive_localsgd = True + strategy.adaptive_localsgd_configs = {"init_k_steps": 1, + "begin_step": 30} + """ + + return get_msg_dict(self.strategy.adaptive_localsgd_configs) + + @adaptive_localsgd_configs.setter + @is_strict_auto + def adaptive_localsgd_configs(self, configs): + check_configs_key(self.strategy.adaptive_localsgd_configs, configs, + "adaptive_localsgd_configs") + assign_configs_value(self.strategy.adaptive_localsgd_configs, configs) + @property def dgc(self): """ diff --git a/python/paddle/distributed/fleet/base/strategy_compiler.py b/python/paddle/distributed/fleet/base/strategy_compiler.py index 4097fc1237f8d7616101810f994c243dffb2cd67..29e10661888f8a7fd6e3c40ee356aad326c193a9 100644 --- a/python/paddle/distributed/fleet/base/strategy_compiler.py +++ b/python/paddle/distributed/fleet/base/strategy_compiler.py @@ -60,7 +60,7 @@ class StrategyCompiler(StrategyCompilerBase): def _get_valid_strategy(self, dist_strategy, can_not_apply_optimizer_list): import copy - valid_strategy = copy.copy(dist_strategy) + valid_strategy = copy.deepcopy(dist_strategy) invalid_optimizers = [] for candidate in self._meta_optimizer_candidates: is_valid = False diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py index 49d66118d902e43f7ee0c4003c516081092b2a97..a1203bed85cadd859132ad67159b604c7b78916b 100644 --- a/python/paddle/distributed/fleet/cloud_utils.py +++ b/python/paddle/distributed/fleet/cloud_utils.py @@ -19,7 +19,7 @@ from paddle.distributed.fleet.launch_utils import get_cluster, logger def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170): """ - args_node_ips, args_node_ip:string + args_node_ips:string, selected_gpus:list, args_port: int """ #you can automatically get ip info while using paddlecloud multi nodes mode. node_ips = os.getenv("PADDLE_TRAINERS") @@ -31,6 +31,9 @@ def get_cloud_cluster(args_node_ips, selected_gpus, args_port=6170): node_rank = os.getenv("PADDLE_TRAINER_ID") assert node_rank is not None, "PADDLE_TRAINER_ID should not be None" + paddle_ports_num = int(os.getenv("TRAINER_PORTS_NUM")) + assert paddle_ports_num is not None, "TRAINER_PORTS_NUM should not be None" + node_ips = node_ips.split(",") num_nodes = len(node_ips) node_rank = int(node_rank) @@ -42,32 +45,47 @@ automatically got from PADDLE_TRAINERS(multi nodes) or POD_IP(single node).\ Your input cluster_node_ips: {} doesn't equals to IPs: {} from \ paddlecloud environment.".format(args_node_ips, node_ips)) - started_port = args_port - print("num_nodes:", num_nodes) - if num_nodes > 1: - try: - paddle_port = int(os.getenv("PADDLE_PORT", "")) - paddle_port_num = int(os.getenv("TRAINER_PORTS_NUM", "")) - - if paddle_port_num >= len( - selected_gpus) and paddle_port != args_port: - logger.warning("Use Cloud specified port:{}.".format( - paddle_port)) - started_port = paddle_port - - except Exception as e: - print(e) - pass - - if started_port is None: - started_port = 6170 - - logger.debug("parsed from args:node_ips:{} \ - node_ip:{} node_rank:{} started_port:{}" - .format(node_ips, node_ip, node_rank, started_port)) - - ports = [x for x in range(started_port, started_port + len(selected_gpus))] - cluster, pod = get_cluster(node_ips, node_ip, ports, selected_gpus) + # DISTRIBUTED_TRAINER_ENDPOINTS: new environment since paddlecloud 1.8.4 + # e.g: DISTRIBUTED_TRAINER_ENDPOINTS="ip1:port1,ip1:port2,ip1:port3,ip1:port4,ip2:port5,ip2:port6,ip2:port7,ip2:port8" + trainer_endpoints = os.getenv("DISTRIBUTED_TRAINER_ENDPOINTS") + if trainer_endpoints is None: + started_port = args_port + if num_nodes > 1: + try: + paddle_port = int(os.getenv("PADDLE_PORT", "")) + + if paddle_ports_num >= len( + selected_gpus) and paddle_port != args_port: + logger.warning("Use Cloud specified port:{}.".format( + paddle_port)) + started_port = paddle_port + + except Exception as e: + print(e) + pass + + if started_port is None: + started_port = 6170 + ports = [ + x for x in range(started_port, started_port + len(selected_gpus)) + ] + trainer_endpoints = [] + for ip in node_ips: + trainer_endpoints.append(["%s:%d" % (ip, port) for port in ports]) + else: + trainer_endpoints_ori = trainer_endpoints.split(",") + trainer_endpoints = [] + assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori) + for i in range(num_nodes): + trainer_endpoints.append(trainer_endpoints_ori[ + i * paddle_ports_num:(i + 1) * paddle_ports_num]) + + logger.debug("parsed from args: node_ips:{} \ + node_ip:{} node_rank:{} trainer_endpoints:{}" + .format(node_ips, node_ip, node_rank, trainer_endpoints)) + + cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints, + selected_gpus) return cluster, cluster.pods[node_rank] @@ -75,7 +93,8 @@ def use_paddlecloud(): node_ips = os.getenv("PADDLE_TRAINERS") node_ip = os.getenv("POD_IP") node_rank = os.getenv("PADDLE_TRAINER_ID") - if node_ips is None or node_ip is None or node_rank is None: + paddle_ports_num = os.getenv("TRAINER_PORTS_NUM") + if node_ips is None or node_ip is None or node_rank is None or paddle_ports_num is None: return False else: return True diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index f6504cacd9680806a13b4bb815247124b7e6a23c..5bd971181ed34e53ec90a31eb7371071372d443a 100644 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -14,54 +14,11 @@ """This is definition of dataset class, which is high performance IO.""" import paddle -import paddle.fluid as fluid from paddle.fluid.proto import data_feed_pb2 from google.protobuf import text_format import paddle.fluid.core as core -class DatasetFactory(object): - """ - DatasetFactory is a factory which create dataset by its name, - you can create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset", - the default is "QueueDataset". - - Example: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - - """ - - def __init__(self): - """ Init. """ - pass - - def create_dataset(self, datafeed_class="QueueDataset"): - """ - Create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset", - the default is "QueueDataset". - - Args: - datafeed_class(str): datafeed class name, QueueDataset or InMemoryDataset. - Default is QueueDataset. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - - """ - try: - dataset = globals()[datafeed_class]() - return dataset - except: - raise ValueError("datafeed class %s does not exist" % - datafeed_class) - - class DatasetBase(object): """ Base dataset class. """ @@ -75,96 +32,67 @@ class DatasetBase(object): self.thread_num = 1 self.filelist = [] - def set_pipe_command(self, pipe_command): + def init(self, + batch_size=1, + thread_num=1, + use_var=[], + pipe_command="cat", + input_type=0, + fs_name="", + fs_ugi="", + download_cmd="cat"): """ - Set pipe command of current dataset - A pipe command is a UNIX pipeline command that can be used only - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_pipe_command("python my_script.py") + should be called only once in user's python scripts to initialize setings of dataset instance. + Normally, it is called by InMemoryDataset or QueueDataset. Args: - pipe_command(str): pipe command + batch_size(int): batch size. It will be effective during training. default is 1. + thread_num(int): thread num, it is the num of readers. default is 1. + use_var(list): list of variables. Variables which you will use. default is []. + pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat" + input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0. + fs_name(str): fs name. default is "". + fs_ugi(str): fs ugi. default is "". + download_cmd(str): customized download command. default is "cat" - """ - self.proto_desc.pipe_command = pipe_command - def set_rank_offset(self, rank_offset): """ - Set rank_offset for merge_pv. It set the message of Pv. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_rank_offset("rank_offset") - - Args: - rank_offset(str): rank_offset's name + self._set_batch_size(batch_size) + self._set_thread(thread_num) + self._set_use_var(use_var) + self._set_pipe_command(pipe_command) + self._set_input_type(input_type) + self._set_hdfs_config(fs_name, fs_ugi) + self._set_download_cmd(download_cmd) + def _set_pipe_command(self, pipe_command): """ - self.proto_desc.rank_offset = rank_offset + Set pipe command of current dataset + A pipe command is a UNIX pipeline command that can be used only - def set_fea_eval(self, record_candidate_size, fea_eval=True): - """ - set fea eval mode for slots shuffle to debug the importance level of - slots(features), fea_eval need to be set True for slots shuffle. - - Args: - record_candidate_size(int): size of instances candidate to shuffle - one slot - fea_eval(bool): whether enable fea eval mode to enable slots shuffle. - default is True. - Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_fea_eval(1000000, True) + import paddle + dataset = paddle.distributed.fleet.dataset.DatasetBase() + dataset._set_pipe_command("python my_script.py") - """ - if fea_eval: - self.dataset.set_fea_eval(fea_eval, record_candidate_size) - self.fea_eval = fea_eval - - def slots_shuffle(self, slots): - """ - Slots Shuffle - Slots Shuffle is a shuffle method in slots level, which is usually used - in sparse feature with large scale of instances. To compare the metric, i.e. - auc while doing slots shuffle on one or several slots with baseline to - evaluate the importance level of slots(features). - Args: - slots(list[string]): the set of slots(string) to do slots shuffle. + pipe_command(str): pipe command - Examples: - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_merge_by_lineid() - #suppose there is a slot 0 - dataset.slots_shuffle(['0']) """ - if self.fea_eval: - slots_set = set(slots) - self.dataset.slots_shuffle(slots_set) + self.proto_desc.pipe_command = pipe_command - def set_batch_size(self, batch_size): + def _set_batch_size(self, batch_size): """ Set batch size. Will be effective during training Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_batch_size(128) + import paddle + dataset = paddle.distributed.fleet.DatasetBase() + dataset._set_batch_size(128) Args: batch_size(int): batch size @@ -172,32 +100,16 @@ class DatasetBase(object): """ self.proto_desc.batch_size = batch_size - def set_pv_batch_size(self, pv_batch_size): - """ - Set pv batch size. It will be effective during enable_pv_merge - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_pv_batch(128) - Args: - pv_batch_size(int): pv batch size - - """ - self.proto_desc.pv_batch_size = pv_batch_size - - def set_thread(self, thread_num): + def _set_thread(self, thread_num): """ Set thread num, it is the num of readers. Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_thread(12) + import paddle + dataset = paddle.distributed.fleet.DatasetBase() + dataset._set_thread(12) Args: thread_num(int): thread num @@ -212,8 +124,8 @@ class DatasetBase(object): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() + import paddle + dataset = paddle.distributed.fleet.DatasetBase() dataset.set_filelist(['a.txt', 'b.txt']) Args: @@ -222,19 +134,19 @@ class DatasetBase(object): self.dataset.set_filelist(filelist) self.filelist = filelist - def set_input_type(self, input_type): + def _set_input_type(self, input_type): self.proto_desc.input_type = input_type - def set_use_var(self, var_list): + def _set_use_var(self, var_list): """ Set Variables which you will use. Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_use_var([data, label]) + import paddle + dataset = paddle.distributed.fleet.DatasetBase() + dataset._set_use_var([data, label]) Args: var_list(list): variable list @@ -253,19 +165,19 @@ class DatasetBase(object): slot_var.type = "uint64" else: raise ValueError( - "Currently, fluid.dataset only supports dtype=float32 and dtype=int64" + "Currently, paddle.distributed.fleet.dataset only supports dtype=float32 and dtype=int64" ) - def set_hdfs_config(self, fs_name, fs_ugi): + def _set_hdfs_config(self, fs_name, fs_ugi): """ Set hdfs config: fs name ad ugi Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_hdfs_config("my_fs_name", "my_fs_ugi") + import paddle + dataset = paddle.distributed.fleet.DatasetBase() + dataset._set_hdfs_config("my_fs_name", "my_fs_ugi") Args: fs_name(str): fs name @@ -273,16 +185,16 @@ class DatasetBase(object): """ self.dataset.set_hdfs_config(fs_name, fs_ugi) - def set_download_cmd(self, download_cmd): + def _set_download_cmd(self, download_cmd): """ Set customized download cmd: download_cmd Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_download_cmd("./read_from_afs") + import paddle + dataset = paddle.distributed.fleet.DatasetBase() + dataset._set_download_cmd("./read_from_afs") Args: download_cmd(str): customized download command @@ -297,22 +209,22 @@ class DatasetBase(object): if self.thread_num > len(self.filelist): self.thread_num = len(self.filelist) self.dataset.set_thread_num(self.thread_num) - self.dataset.set_data_feed_desc(self.desc()) + self.dataset.set_data_feed_desc(self._desc()) self.dataset.create_readers() def _finish_to_run(self): self.dataset.destroy_readers() - def desc(self): + def _desc(self): """ Returns a protobuf message for this DataFeedDesc Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset() - print(dataset.desc()) + import paddle + dataset = paddle.distributed.fleet.DatasetBase() + print(dataset._desc()) Returns: A string message @@ -330,10 +242,10 @@ class InMemoryDataset(DatasetBase): """ InMemoryDataset, it will load data into memory and shuffle data before training. - This class should be created by DatasetFactory Example: - dataset = paddle.fluid.DatasetFactory().create_dataset("InMemoryDataset") + import paddle + dataset = paddle.distributed.InMemoryDataset() """ def __init__(self): @@ -351,7 +263,229 @@ class InMemoryDataset(DatasetBase): self.merge_by_lineid = False self.fleet_send_sleep_seconds = None - def set_feed_type(self, data_feed_type): + def _init_distributed_settings(self, **kwargs): + """ + should be called only once in user's python scripts to initialize distributed-related setings of dataset instance + Args: + kwargs: Keyword arguments. Currently, we support following keys in **kwargs: + + merge_size(int): ins size to merge, if merge_size > 0, set merge by line id, + instances of same line id will be merged after shuffle, + you should parse line id in data generator. default is -1. + parse_ins_id(bool): Set if Dataset need to parse ins_id. default is False. + parse_content(bool): Set if Dataset need to parse content. default is False. + fleet_send_batch_size(int): Set fleet send batch size in one rpc, default is 1024 + fleet_send_sleep_seconds(int): Set fleet send sleep time, default is 0 + fea_eval(bool): Set if Dataset need to do feature importance evaluation using slots shuffle. + default is False. + candidate_size(int): if fea_eval is set True, set the candidate size used in slots shuffle. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=[]) + dataset._init_distributed_settings( + parse_ins_id=True, + parse_content=True, + fea_eval=True, + candidate_size=10000) + + """ + merge_size = kwargs.get("merge_size", -1) + if merge_size > 0: + self._set_merge_by_lineid(merge_size) + + parse_ins_id = kwargs.get("parse_ins_id", False) + self._set_parse_ins_id(parse_ins_id) + + parse_content = kwargs.get("parse_content", False) + self._set_parse_content(parse_content) + + fleet_send_batch_size = kwargs.get("fleet_send_batch_size", None) + if fleet_send_batch_size: + self._set_fleet_send_batch_size(fleet_send_batch_size) + + fleet_send_sleep_seconds = kwargs.get("fleet_send_sleep_seconds", None) + if fleet_send_sleep_seconds: + self._set_fleet_send_sleep_seconds(fleet_send_sleep_seconds) + + fea_eval = kwargs.get("fea_eval", False) + if fea_eval: + candidate_size = kwargs.get("candidate_size", 10000) + self._set_fea_eval(candidate_size, True) + + def update_settings(self, **kwargs): + """ + should be called in user's python scripts to update setings of dataset instance + Args: + kwargs: Keyword arguments. Currently, we support following keys in **kwargs, + including single node settings and advanced distributed related settings: + + batch_size(int): batch size. It will be effective during training. default is 1. + thread_num(int): thread num, it is the num of readers. default is 1. + use_var(list): list of variables. Variables which you will use. default is []. + input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0. + fs_name(str): fs name. default is "". + fs_ugi(str): fs ugi. default is "". + pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat" + download_cmd(str): customized download command. default is "cat" + data_feed_type(str): data feed type used in c++ code. default is "MultiSlotInMemoryDataFeed". + queue_num(int): Dataset output queue num, training threads get data from queues. default is-1, which is set same as thread number in c++. + + merge_size(int): ins size to merge, if merge_size > 0, set merge by line id, + instances of same line id will be merged after shuffle, + you should parse line id in data generator. default is -1. + parse_ins_id(bool): Set if Dataset need to parse ins_id. default is False. + parse_content(bool): Set if Dataset need to parse content. default is False. + fleet_send_batch_size(int): Set fleet send batch size in one rpc, default is 1024 + fleet_send_sleep_seconds(int): Set fleet send sleep time, default is 0 + fea_eval(bool): Set if Dataset need to do feature importance evaluation using slots shuffle. + default is False. + candidate_size(int): if fea_eval is set True, set the candidate size used in slots shuffle. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=[]) + dataset._init_distributed_settings( + parse_ins_id=True, + parse_content=True, + fea_eval=True, + candidate_size=10000) + dataset.update_settings(batch_size=2) + + """ + for key in kwargs: + if key == "pipe_command": + self._set_pipe_command(kwargs[key]) + elif key == "batch_size": + self._set_batch_size(kwargs[key]) + elif key == "thread_num": + self._set_thread(kwargs[key]) + elif key == "use_var": + self._set_use_var(kwargs[key]) + elif key == "input_type": + self._set_input_type(kwargs[key]) + elif key == "fs_name" and "fs_ugi" in kwargs: + self._set_hdfs_config(kwargs[key], kwargs["fs_ugi"]) + elif key == "download_cmd": + self._set_download_cmd(kwargs[key]) + elif key == "merge_size" and kwargs.get("merge_size", -1) > 0: + self._set_merge_by_lineid(kwargs[key]) + elif key == "parse_ins_id": + self._set_parse_ins_id(kwargs[key]) + elif key == "parse_content": + self._set_parse_content(kwargs[key]) + elif key == "fleet_send_batch_size": + self._set_fleet_send_batch_size(kwargs[key]) + elif key == "fleet_send_sleep_seconds": + self._set_fleet_send_sleep_seconds(kwargs[key]) + elif key == "fea_eval" and kwargs[key] == True: + candidate_size = kwargs.get("candidate_size", 10000) + self._set_fea_eval(candidate_size, True) + + def init(self, **kwargs): + """ + should be called only once in user's python scripts to initialize setings of dataset instance + Args: + kwargs: Keyword arguments. Currently, we support following keys in **kwargs: + + batch_size(int): batch size. It will be effective during training. default is 1. + thread_num(int): thread num, it is the num of readers. default is 1. + use_var(list): list of variables. Variables which you will use. default is []. + input_type(int): the input type of generated input. 0 is for one sample, 1 is for one batch. defalut is 0. + fs_name(str): fs name. default is "". + fs_ugi(str): fs ugi. default is "". + pipe_command(str): pipe command of current dataset. A pipe command is a UNIX pipeline command that can be used only. default is "cat" + download_cmd(str): customized download command. default is "cat" + data_feed_type(str): data feed type used in c++ code. default is "MultiSlotInMemoryDataFeed". + queue_num(int): Dataset output queue num, training threads get data from queues. default is -1, which is set same as thread number in c++. + + Examples: + .. code-block:: python + + import paddle + with open("test_queue_dataset_run_a.txt", "w") as f: + data = "2 1 2 2 5 4 2 2 7 2 1 3\n" + data += "2 6 2 2 1 4 2 2 4 2 2 3\n" + data += "2 5 2 2 9 9 2 2 7 2 1 3\n" + data += "2 7 2 2 1 9 2 3 7 2 5 3\n" + f.write(data) + with open("test_queue_dataset_run_b.txt", "w") as f: + data = "2 1 2 2 5 4 2 2 7 2 1 3\n" + data += "2 6 2 2 1 4 2 2 4 2 2 3\n" + data += "2 5 2 2 9 9 2 2 7 2 1 3\n" + data += "2 7 2 2 1 9 2 3 7 2 5 3\n" + f.write(data) + + slots = ["slot1", "slot2", "slot3", "slot4"] + slots_vars = [] + for slot in slots: + var = fluid.data( + name=slot, shape=[None, 1], dtype="int64", lod_level=1) + slots_vars.append(var) + + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=slots_vars) + dataset.set_filelist( + ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) + dataset.load_into_memory() + + exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0)) + exe.run(fluid.default_startup_program()) + exe.train_from_dataset(fluid.default_main_program(), + dataset) + os.remove("./test_queue_dataset_run_a.txt") + os.remove("./test_queue_dataset_run_b.txt") + """ + batch_size = kwargs.get("batch_size", 1) + thread_num = kwargs.get("thread_num", 1) + use_var = kwargs.get("use_var", []) + input_type = kwargs.get("input_type", 0) + fs_name = kwargs.get("fs_name", "") + fs_ugi = kwargs.get("fs_ugi", "") + pipe_command = kwargs.get("pipe_command", "cat") + download_cmd = kwargs.get("download_cmd", "cat") + + super(InMemoryDataset, self).init( + batch_size=batch_size, + thread_num=thread_num, + use_var=use_var, + pipe_command=pipe_command, + input_type=input_type, + fs_name=fs_name, + fs_ugi=fs_ugi, + download_cmd=download_cmd) + + data_feed_type = kwargs.get("data_feed_type", + "MultiSlotInMemoryDataFeed") + self._set_feed_type(data_feed_type) + + if kwargs.get("queue_num", -1) > 0: + queue_num = kwargs.get("queue_num", -1) + self._set_queue_num(queue_num) + + def _set_feed_type(self, data_feed_type): """ Set data_feed_desc """ @@ -373,7 +507,7 @@ class InMemoryDataset(DatasetBase): self.dataset.set_parse_logkey(self.parse_logkey) self.dataset.set_merge_by_sid(self.merge_by_sid) self.dataset.set_enable_pv_merge(self.enable_pv_merge) - self.dataset.set_data_feed_desc(self.desc()) + self.dataset.set_data_feed_desc(self._desc()) self.dataset.create_channel() self.dataset.create_readers() @@ -387,7 +521,7 @@ class InMemoryDataset(DatasetBase): self.dataset.dynamic_adjust_channel_num(self.thread_num, False) self.dataset.dynamic_adjust_readers_num(self.thread_num) - def set_queue_num(self, queue_num): + def _set_queue_num(self, queue_num): """ Set Dataset output queue num, training threads get data from queues @@ -397,17 +531,17 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_queue_num(12) + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_queue_num(12) """ self.is_user_set_queue_num = True self.queue_num = queue_num - def set_parse_ins_id(self, parse_ins_id): + def _set_parse_ins_id(self, parse_ins_id): """ - Set id Dataset need to parse insid + Set if Dataset need to parse insid Args: parse_ins_id(bool): if parse ins_id or not @@ -415,14 +549,14 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_parse_ins_id(True) + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_parse_ins_id(True) """ self.parse_ins_id = parse_ins_id - def set_parse_content(self, parse_content): + def _set_parse_content(self, parse_content): """ Set if Dataset need to parse content @@ -432,120 +566,14 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_parse_content(True) + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_parse_content(True) """ self.parse_content = parse_content - def set_parse_logkey(self, parse_logkey): - """ - Set if Dataset need to parse logkey - - Args: - parse_content(bool): if parse logkey or not - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_parse_logkey(True) - - """ - self.parse_logkey = parse_logkey - - def set_merge_by_sid(self, merge_by_sid): - """ - Set if Dataset need to merge sid. If not, one ins means one Pv. - - Args: - merge_by_sid(bool): if merge sid or not - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_merge_by_sid(True) - - """ - self.merge_by_sid = merge_by_sid - - def set_enable_pv_merge(self, enable_pv_merge): - """ - Set if Dataset need to merge pv. - - Args: - enable_pv_merge(bool): if enable_pv_merge or not - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_enable_pv_merge(True) - - """ - self.enable_pv_merge = enable_pv_merge - - def preprocess_instance(self): - """ - Merge pv instance and convey it from input_channel to input_pv_channel. - It will be effective when enable_pv_merge_ is True. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.preprocess_instance() - - """ - self.dataset.preprocess_instance() - - def set_current_phase(self, current_phase): - """ - Set current phase in train. It is useful for untest. - current_phase : 1 for join, 0 for update. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.set_current_phase(1) - - """ - self.dataset.set_current_phase(current_phase) - - def postprocess_instance(self): - """ - Divide pv instance and convey it to input_channel. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - dataset.preprocess_instance() - exe.train_from_dataset(dataset) - dataset.postprocess_instance() - - """ - self.dataset.postprocess_instance() - - def set_fleet_send_batch_size(self, fleet_send_batch_size=1024): + def _set_fleet_send_batch_size(self, fleet_send_batch_size=1024): """ Set fleet send batch size, default is 1024 @@ -555,14 +583,14 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_fleet_send_batch_size(800) + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_fleet_send_batch_size(800) """ self.fleet_send_batch_size = fleet_send_batch_size - def set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0): + def _set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0): """ Set fleet send sleep time, default is 0 @@ -572,14 +600,14 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_fleet_send_sleep_seconds(2) + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_fleet_send_sleep_seconds(2) """ self.fleet_send_sleep_seconds = fleet_send_sleep_seconds - def set_merge_by_lineid(self, merge_size=2): + def _set_merge_by_lineid(self, merge_size=2): """ Set merge by line id, instances of same line id will be merged after shuffle, you should parse line id in data generator. @@ -590,22 +618,22 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_merge_by_lineid() + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_merge_by_lineid() """ self.dataset.set_merge_by_lineid(merge_size) self.merge_by_lineid = True self.parse_ins_id = True - def set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num): + def _set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num): self.dataset.set_generate_unique_feasigns(generate_uni_feasigns) self.gen_uni_feasigns = generate_uni_feasigns self.local_shard_num = shard_num - def generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num, - consume_thread_num, shard_num): + def _generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num, + consume_thread_num, shard_num): self.dataset.generate_local_tables_unlock( table_id, fea_dim, read_thread_num, consume_thread_num, shard_num) @@ -616,8 +644,8 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + import paddle + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -635,8 +663,8 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + import paddle + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.preload_into_memory() @@ -656,8 +684,8 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + import paddle + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.preload_into_memory() @@ -673,8 +701,8 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + import paddle + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -692,9 +720,9 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -736,9 +764,9 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -751,30 +779,6 @@ class InMemoryDataset(DatasetBase): """ self.dataset.release_memory() - def get_pv_data_size(self): - """ - Get memory data size of Pv, user can call this function to know the pv num - of ins in all workers after load into memory. - - Note: - This function may cause bad performance, because it has barrier - - Returns: - The size of memory pv data. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") - filelist = ["a.txt", "b.txt"] - dataset.set_filelist(filelist) - dataset.load_into_memory() - print dataset.get_pv_data_size() - - """ - return self.dataset.get_pv_data_size() - def get_memory_data_size(self, fleet=None): """ Get memory data size, user can call this function to know the num @@ -792,9 +796,9 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -829,9 +833,9 @@ class InMemoryDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + dataset = paddle.distributed.InMemoryDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -849,6 +853,51 @@ class InMemoryDataset(DatasetBase): return global_data_size[0] return local_data_size[0] + def _set_fea_eval(self, record_candidate_size, fea_eval=True): + """ + set fea eval mode for slots shuffle to debug the importance level of + slots(features), fea_eval need to be set True for slots shuffle. + + Args: + record_candidate_size(int): size of instances candidate to shuffle + one slot + fea_eval(bool): whether enable fea eval mode to enable slots shuffle. + default is True. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset._set_fea_eval(1000000, True) + + """ + if fea_eval: + self.dataset.set_fea_eval(fea_eval, record_candidate_size) + self.fea_eval = fea_eval + + def slots_shuffle(self, slots): + """ + Slots Shuffle + Slots Shuffle is a shuffle method in slots level, which is usually used + in sparse feature with large scale of instances. To compare the metric, i.e. + auc while doing slots shuffle on one or several slots with baseline to + evaluate the importance level of slots(features). + + Args: + slots(list[string]): the set of slots(string) to do slots shuffle. + + Examples: + import paddle + dataset = paddle.distributed.InMemoryDataset() + dataset.set_merge_by_lineid() + #suppose there is a slot 0 + dataset.slots_shuffle(['0']) + """ + if self.fea_eval: + slots_set = set(slots) + self.dataset.slots_shuffle(slots_set) + class QueueDataset(DatasetBase): """ @@ -857,19 +906,24 @@ class QueueDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("QueueDataset") + import paddle + dataset = paddle.distributed.QueueDataset() """ def __init__(self): """ Initialize QueueDataset - This class should be created by DatasetFactory """ super(QueueDataset, self).__init__() self.proto_desc.name = "MultiSlotDataFeed" + def init(self, **kwargs): + """ + should be called only once in user's python scripts to initialize setings of dataset instance + """ + super(QueueDataset, self).init(**kwargs) + def _prepare_to_run(self): """ Set data_feed_desc/thread num/filelist before run, @@ -881,57 +935,9 @@ class QueueDataset(DatasetBase): self.thread_num = 1 self.dataset.set_thread_num(self.thread_num) self.dataset.set_filelist(self.filelist) - self.dataset.set_data_feed_desc(self.desc()) + self.dataset.set_data_feed_desc(self._desc()) self.dataset.create_readers() - def local_shuffle(self): - """ - Local shuffle data. - - Local shuffle is not supported in QueueDataset - NotImplementedError will be raised - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("QueueDataset") - dataset.local_shuffle() - - Raises: - NotImplementedError: QueueDataset does not support local shuffle - - """ - raise NotImplementedError( - "QueueDataset does not support local shuffle, " - "please use InMemoryDataset for local_shuffle") - - def global_shuffle(self, fleet=None): - """ - Global shuffle data. - - Global shuffle is not supported in QueueDataset - NotImplementedError will be raised - - Args: - fleet(Fleet): fleet singleton. Default None. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - dataset = fluid.DatasetFactory().create_dataset("QueueDataset") - dataset.global_shuffle(fleet) - - Raises: - NotImplementedError: QueueDataset does not support global shuffle - - """ - raise NotImplementedError( - "QueueDataset does not support global shuffle, " - "please use InMemoryDataset for global_shuffle") - class FileInstantDataset(DatasetBase): """ @@ -940,35 +946,22 @@ class FileInstantDataset(DatasetBase): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory.create_dataset("FileInstantDataset") + import paddle + dataset = paddle.distributed.fleet.FileInstantDataset() """ def __init__(self): """ Initialize FileInstantDataset - This class should be created by DatasetFactory """ super(FileInstantDataset, self).__init__() self.proto_desc.name = "MultiSlotFileInstantDataFeed" - def local_shuffle(self): + def init(self, **kwargs): """ - Local shuffle - FileInstantDataset does not support local shuffle + should be called only once in user's python scripts to initialize setings of dataset instance """ - raise NotImplementedError( - "FileInstantDataset does not support local shuffle, " - "please use InMemoryDataset for local_shuffle") - - def global_shuffle(self, fleet=None): - """ - Global shuffle - FileInstantDataset does not support global shuffle - """ - raise NotImplementedError( - "FileInstantDataset does not support global shuffle, " - "please use InMemoryDataset for global_shuffle") + super(FileInstantDataset, self).init(**kwargs) class BoxPSDataset(InMemoryDataset): @@ -978,19 +971,119 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() """ def __init__(self): """ Initialize BoxPSDataset - This class should be created by DatasetFactory """ super(BoxPSDataset, self).__init__() self.boxps = core.BoxPS(self.dataset) self.proto_desc.name = "PaddleBoxDataFeed" + def init(self, **kwargs): + """ + should be called only once in user's python scripts to initialize setings of dataset instance + """ + super(BoxPSDataset, self).init(**kwargs) + + rank_offset = kwargs.get("rank_offset", "") + self._set_rank_offset(rank_offset) + pv_batch_size = kwargs.get("pv_batch_size", 1) + self._set_pv_batch_size(pv_batch_size) + parse_logkey = kwargs.get("parse_logkey", False) + self._set_parse_logkey(parse_logkey) + merge_by_sid = kwargs.get("merge_by_sid", False) + self._set_merge_by_sid(merge_by_sid) + enable_pv_merge = kwargs.get("enable_pv_merge", False) + self._set_enable_pv_merge(enable_pv_merge) + + def _set_rank_offset(self, rank_offset): + """ + Set rank_offset for merge_pv. It set the message of Pv. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset._set_rank_offset("rank_offset") + + Args: + rank_offset(str): rank_offset's name + + """ + self.proto_desc.rank_offset = rank_offset + + def _set_pv_batch_size(self, pv_batch_size): + """ + Set pv batch size. It will be effective during enable_pv_merge + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset._set_pv_batch_size(128) + Args: + pv_batch_size(int): pv batch size + + """ + self.proto_desc.pv_batch_size = pv_batch_size + + def _set_parse_logkey(self, parse_logkey): + """ + Set if Dataset need to parse logkey + + Args: + parse_content(bool): if parse logkey or not + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset._set_parse_logkey(True) + + """ + self.parse_logkey = parse_logkey + + def _set_merge_by_sid(self, merge_by_sid): + """ + Set if Dataset need to merge sid. If not, one ins means one Pv. + + Args: + merge_by_sid(bool): if merge sid or not + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset._set_merge_by_sid(True) + + """ + self.merge_by_sid = merge_by_sid + + def _set_enable_pv_merge(self, enable_pv_merge): + """ + Set if Dataset need to merge pv. + + Args: + enable_pv_merge(bool): if enable_pv_merge or not + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset._set_enable_pv_merge(True) + + """ + self.enable_pv_merge = enable_pv_merge + def set_date(self, date): """ Workaround for date @@ -1008,8 +1101,8 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() dataset.begin_pass() """ self.boxps.begin_pass() @@ -1021,8 +1114,8 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() dataset.end_pass(True) """ self.boxps.end_pass(need_save_delta) @@ -1034,8 +1127,8 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.preload_into_memory() @@ -1049,8 +1142,8 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.load_into_memory() @@ -1064,8 +1157,8 @@ class BoxPSDataset(InMemoryDataset): Examples: .. code-block:: python - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() filelist = ["a.txt", "b.txt"] dataset.set_filelist(filelist) dataset.preload_into_memory() @@ -1093,11 +1186,90 @@ class BoxPSDataset(InMemoryDataset): slots(list[string]): the set of slots(string) to do slots shuffle. Examples: - import paddle.fluid as fluid - dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() dataset.set_merge_by_lineid() #suppose there is a slot 0 dataset.slots_shuffle(['0']) """ slots_set = set(slots) self.boxps.slots_shuffle(slots_set) + + def set_current_phase(self, current_phase): + """ + Set current phase in train. It is useful for untest. + current_phase : 1 for join, 0 for update. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + filelist = ["a.txt", "b.txt"] + dataset.set_filelist(filelist) + dataset.load_into_memory() + dataset.set_current_phase(1) + + """ + self.dataset.set_current_phase(current_phase) + + def get_pv_data_size(self): + """ + Get memory data size of Pv, user can call this function to know the pv num + of ins in all workers after load into memory. + + Note: + This function may cause bad performance, because it has barrier + + Returns: + The size of memory pv data. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + filelist = ["a.txt", "b.txt"] + dataset.set_filelist(filelist) + dataset.load_into_memory() + print dataset.get_pv_data_size() + + """ + return self.dataset.get_pv_data_size() + + def preprocess_instance(self): + """ + Merge pv instance and convey it from input_channel to input_pv_channel. + It will be effective when enable_pv_merge_ is True. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + filelist = ["a.txt", "b.txt"] + dataset.set_filelist(filelist) + dataset.load_into_memory() + dataset.preprocess_instance() + + """ + self.dataset.preprocess_instance() + + def postprocess_instance(self): + """ + Divide pv instance and convey it to input_channel. + + Examples: + .. code-block:: python + + import paddle + dataset = paddle.distributed.fleet.BoxPSDataset() + filelist = ["a.txt", "b.txt"] + dataset.set_filelist(filelist) + dataset.load_into_memory() + dataset.preprocess_instance() + exe.train_from_dataset(dataset) + dataset.postprocess_instance() + + """ + self.dataset.postprocess_instance() diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 7778acaf83b310cfa9a04059ce6d3be2d5326089..6dba385c569be75b5b83e0a63e560ffa8ab73696 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -157,17 +157,20 @@ def get_cluster_from_args(args, gpus): free_ports = [x for x in range(start_port, start_port + len(gpus))] - return get_cluster(node_ips, node_ip, free_ports, gpus) + trainer_endpoints = [] + for ip in node_ips: + trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports]) + return get_cluster(node_ips, node_ip, trainer_endpoints, gpus) def get_gpus(gpus): if gpus is None: gpus_num = fluid.core.get_cuda_device_count() - gpus = [str(x) for x in range(0, gpus_num)] + res_gpus = [str(x) for x in range(0, gpus_num)] else: cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") if cuda_visible_devices is None or cuda_visible_devices == "": - gpus = [x.strip() for x in gpus.split(',')] + res_gpus = [x.strip() for x in gpus.split(',')] else: # change gpus into relative values # e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.gpus=4,5,6,7; @@ -177,12 +180,16 @@ def get_gpus(gpus): assert x in cuda_visible_devices_list, "Can't find "\ "your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\ % (x, cuda_visible_devices) - gpus = [ + res_gpus = [ cuda_visible_devices_list.index(x.strip()) for x in gpus.split(',') ] + logger.info("Change selected_gpus into reletive values. --ips:{} " + "will change into relative_ips:{} according to your " + "CUDA_VISIBLE_DEVICES:{}".format( + gpus, res_gpus, cuda_visible_devices_list)) - return gpus + return res_gpus def launch_collective(args): diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 0e995200dde035842d89d9c503566b7b70ee67b7..b6f4c75a276920f966a6b324a9bea16148bf337c 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -227,18 +227,23 @@ def get_logger(log_level=20, name="root"): return logger -def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus): - assert type(paddle_ports) is list, "paddle_ports must be list" +def get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus): + assert type(trainer_endpoints) is list, "trainer_endpoints must be list" cluster = Cluster(hdfs=None) trainer_rank = 0 for node_rank, ip in enumerate(node_ips): pod = Pod() pod.rank = node_rank pod.addr = ip + cur_node_endpoints = trainer_endpoints[node_rank] + # when use paddlecloud, endpoints may > selected_gpus(user_defined) + assert len(cur_node_endpoints) >= len( + selected_gpus + ), "current trainer_endpoints size should be greater equal than selected_gpus size." for i in range(len(selected_gpus)): trainer = Trainer() trainer.gpus.append(selected_gpus[i]) - trainer.endpoint = "%s:%d" % (ip, paddle_ports[i]) + trainer.endpoint = "%s" % (cur_node_endpoints[i]) trainer.rank = trainer_rank trainer_rank += 1 @@ -424,10 +429,6 @@ def start_local_trainers(cluster, len(pod.trainers), pretty_print_envs(proc_env, ("Distributed Envs", "Value")))) - logger.info( - "More details for debug about commands and environments are written in {}/run.sh". - format(log_dir)) - fn = None if log_dir is not None: os.system("mkdir -p {}".format(log_dir)) diff --git a/python/paddle/distributed/fleet/meta_optimizers/__init__.py b/python/paddle/distributed/fleet/meta_optimizers/__init__.py index d98b2ef3e2a083861647b2847bafad3b08c86cfd..a3a2dee70387d69b9e8e09cd86d69a76890d7a1f 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/__init__.py +++ b/python/paddle/distributed/fleet/meta_optimizers/__init__.py @@ -18,6 +18,7 @@ from .graph_execution_optimizer import GraphExecutionOptimizer from .parameter_server_optimizer import ParameterServerOptimizer from .pipeline_optimizer import PipelineOptimizer from .localsgd_optimizer import LocalSGDOptimizer +from .localsgd_optimizer import AdaptiveLocalSGDOptimizer from .lars_optimizer import LarsOptimizer from .parameter_server_graph_optimizer import ParameterServerGraphOptimizer from .dgc_optimizer import DGCOptimizer diff --git a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py index 31a9913701c3e08f5268d578d09c15f5bf8a86f8..3e89d3821110752d2f526f2af89a426648c05ce1 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py @@ -24,7 +24,7 @@ class AMPOptimizer(MetaOptimizerBase): self.meta_optimizers_white_list = [ "LarsOptimizer", "LambOptimizer", "RecomputeOptimizer", "LocalSGDOptimizer", "GradientMergeOptimizer", - "GraphExecutionOptimizer" + "GraphExecutionOptimizer", "AdaptiveLocalSGDOptimizer" ] self.meta_optimizers_black_list = ["DGCOptimizer"] diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 6fa34d8d28a907d936500907db3e4c65ab4f4da8..75271968fca6853ad8d26cb2237a4d6fa7c7dffc 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -25,7 +25,9 @@ class LocalSGDOptimizer(MetaOptimizerBase): super(LocalSGDOptimizer, self).__init__(optimizer) self.inner_opt = optimizer self.meta_optimizers_white_list = [] - self.meta_optimizers_black_list = ["GraphExecutionOptimizer"] + self.meta_optimizers_black_list = [ + "GraphExecutionOptimizer", "AdaptiveLocalSGDOptimizer" + ] self.snapshot_key = '@SNAPSHOT' def _can_apply(self): @@ -186,3 +188,252 @@ class LocalSGDOptimizer(MetaOptimizerBase): layers.cond(step > begin_step, begin_localsgd, communicate) return minimized + + +class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): + super(AdaptiveLocalSGDOptimizer, self).__init__(optimizer) + self.inner_opt = optimizer + self.meta_optimizers_white_list = [] + self.meta_optimizers_black_list = [ + "GraphExecutionOptimizer", "LocalSGDOptimizer" + ] + self.snapshot_key = '@SNAPSHOT' + + def _can_apply(self): + if not self.role_maker._is_collective: + return False + + if not self.user_defined_strategy.adaptive_localsgd: + return False + + if self.role_maker.worker_num() <= 1: + return False + + return isinstance(self.inner_opt, paddle.optimizer.momentum.Momentum) \ + or isinstance(self.inner_opt, paddle.fluid.optimizer.Momentum) \ + or isinstance(self.inner_opt, paddle.optimizer.sgd.SGD) \ + or isinstance(self.inner_opt, paddle.fluid.optimizer.SGD) + + def _disable_strategy(self, dist_strategy): + dist_strategy.adaptive_localsgd = False + dist_strategy.adaptive_localsgd_configs = {} + + def _enable_strategy(self, dist_strategy, context): + dist_strategy.adaptive_localsgd = True + dist_strategy.adaptive_localsgd_configs = { + "init_k_steps": 1, + "begin_step": 1 + } + + def snapshot_name(self, param_name): + return param_name + self.snapshot_key + + def create_snapshot_vars(self, program): + block = program.global_block() + + non_dist_params = [] + for param in block.iter_parameters(): + if not param.is_distributed: + non_dist_params.append(param) + + p2s = [] + for param in non_dist_params: + snapshot = block.create_var( + name=self.snapshot_name(param.name), + shape=param.shape, + persistable=True, + stop_gradient=True, + dtype=param.dtype) + p2s.append([param, snapshot]) + return p2s + + def init_snapshot_vars(self, startup_program, param2snapshot): + with program_guard(startup_program): + for param, snapshot in param2snapshot: + layers.assign(param, snapshot) + + def _generate_avg_loss(self, program_block, loss, avg_loss): + program_block.append_op( + type='c_allreduce_sum', + inputs={'X': [loss]}, + outputs={'Out': [avg_loss]}, + attrs={ + 'ring_id': 0, + OP_ROLE_KEY: OpRole.Optimize, + 'use_calc_stream': True + }) + program_block.append_op( + type='c_sync_calc_stream', + inputs={'X': [avg_loss]}, + outputs={'Out': [avg_loss]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + + program_block.append_op( + type='scale', + inputs={'X': [avg_loss]}, + outputs={'Out': [avg_loss]}, + attrs={ + 'scale': 1.0 / self.role_maker.worker_num(), + OP_ROLE_KEY: OpRole.Optimize + }) + + def minimize_impl(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None): + minimized = self.inner_opt.minimize( + loss, startup_program=startup_program) + + init_k_steps = self.user_defined_strategy.adaptive_localsgd_configs[ + 'init_k_steps'] + begin_step_value = self.user_defined_strategy.adaptive_localsgd_configs[ + 'begin_step'] + + if startup_program is None: + startup_program = default_startup_program() + main_block = loss.block + + self.nrings = 2 + collective_helper = CollectiveHelper(self.role_maker, self.nrings) + collective_helper.update_startup_program(startup_program) + p2s = self.create_snapshot_vars(startup_program) + self.init_snapshot_vars(startup_program, p2s) + + p2s = self.create_snapshot_vars(main_block.program) + with program_guard(main_block.program, startup_program): + step = layers.autoincreased_step_counter(begin=1) + + k_steps = layers.create_global_var( + name="k_steps", + shape=[1], + value=int(init_k_steps), + dtype='int64', + persistable=True) + + begin_step = layers.create_global_var( + name="begin_step", + shape=[1], + value=int(begin_step_value), + dtype='int64', + persistable=True) + + last_step = layers.create_global_var( + name="last_step", + shape=[1], + value=int(0), + dtype='int64', + persistable=True) + + avg_loss = layers.create_global_var( + name="avg_loss", + shape=[1], + value=float(0), + dtype=loss.dtype, + persistable=True) + + lr_0 = layers.create_global_var( + name="lr_0", + shape=[1], + value=float(0), + dtype='float32', + persistable=True) + + loss_0 = layers.create_global_var( + name="loss_0", + shape=[1], + value=float(0), + dtype='float32', + persistable=True) + + global_lr = self.inner_opt._global_learning_rate() + + def initialize(): + self._generate_avg_loss(main_block, loss, avg_loss) + layers.assign(avg_loss, loss_0) + layers.assign(global_lr, lr_0) + + layers.cond(step == 1, initialize) + + def communicate(): + sub_block = default_main_program().current_block() + ring_id = -1 + for param, snapshot in p2s: + sub_block.append_op( + type='elementwise_sub', + inputs={'X': [snapshot], + 'Y': [param]}, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op( + type='c_sync_calc_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + ring_id = (ring_id + 1) % self.nrings + sub_block.append_op( + type='c_allreduce_sum', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) + + for ring_id in range(self.nrings): + sub_block.append_op( + type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) + + for param, snapshot in p2s: + sub_block.append_op( + type='scale', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'scale': 1.0 / self.role_maker.worker_num(), + OP_ROLE_KEY: OpRole.Optimize + }) + sub_block.append_op( + type='elementwise_sub', + inputs={'X': [snapshot], + 'Y': [param]}, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op( + type='assign', + inputs={'X': [param]}, + outputs={'Out': [snapshot]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + layers.assign(step, last_step) + + def communicate_avg_loss(): + communicate() + self._generate_avg_loss(main_block, loss, avg_loss) + next_local_steps = layers.cast( + layers.ceil( + layers.sqrt(lr_0 * avg_loss / (global_lr * loss_0) * + float(init_k_steps))), + dtype='int64') + max_local_steps = layers.fill_constant( + shape=[1], dtype='int64', value=16) + min_local_steps = layers.fill_constant( + shape=[1], dtype='int64', value=1) + next_local_steps = layers.elementwise_min(next_local_steps, + max_local_steps) + next_local_steps = layers.elementwise_max(next_local_steps, + min_local_steps) + layers.assign(next_local_steps, k_steps) + + def begin_localsgd(): + layers.cond(step - last_step == k_steps, communicate_avg_loss) + + layers.cond(step > begin_step, begin_localsgd, communicate) + + return minimized diff --git a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py index 8f9595486922a37cff02d1ac96c1c4c2bbf4b0d5..59ca7e633099e8688a57fa9024575e29008c0341 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py @@ -38,7 +38,7 @@ class RecomputeOptimizer(MetaOptimizerBase): list(user_defined_strategy.recompute_configs["checkpoints"])) def _can_apply(self): - if self.role_maker._is_collective: + if not self.role_maker._is_collective: return False if self.user_defined_strategy.recompute == True: diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index 2dbe5cefbb4944e219989358ebeb0c321f942551..966b7219d609d5da71a466d3bda86b13408be281 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -200,26 +200,6 @@ class LocalFS(FS): return dirs -"""HDFS Utils.""" - - -def _handle_errors(f): - def handler(*args, **kwargs): - start = time.time() - while True: - try: - return f(*args, **kwargs) - except ExecuteError as e: - o = args[0] - time_out = float(o._time_out) / 1000.0 - inter = float(o._sleep_inter) / 1000.0 - if time.time() - start >= time_out: - raise FSTimeOut - time.sleep(inter) - - return functools.wraps(f)(handler) - - def _handle_errors(max_time_out=None): def decorator(f): @functools.wraps(f) diff --git a/python/paddle/distributed/launch.py b/python/paddle/distributed/launch.py index e2ab321f9aebddd437c92ded9e6005495f760096..9b969cf3002379058b9cff0d604d2db750573028 100644 --- a/python/paddle/distributed/launch.py +++ b/python/paddle/distributed/launch.py @@ -160,18 +160,21 @@ def get_cluster_from_args(args, selected_gpus): x for x in range(started_port, started_port + len(selected_gpus)) ] - return get_cluster(node_ips, node_ip, free_ports, selected_gpus) + trainer_endpoints = [] + for ip in node_ips: + trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports]) + return get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus) def get_gpus(selected_gpus): if selected_gpus is None: from paddle.fluid import core gpus_num = core.get_cuda_device_count() - selected_gpus = [str(x) for x in range(0, gpus_num)] + gpus = [str(x) for x in range(0, gpus_num)] else: cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") if cuda_visible_devices is None or cuda_visible_devices == "": - selected_gpus = [x.strip() for x in selected_gpus.split(',')] + gpus = [x.strip() for x in selected_gpus.split(',')] else: # change selected_gpus into relative values # e.g. CUDA_VISIBLE_DEVICES=4,5,6,7; args.selected_gpus=4,5,6,7; @@ -181,12 +184,16 @@ def get_gpus(selected_gpus): assert x in cuda_visible_devices_list, "Can't find "\ "your selected_gpus %s in CUDA_VISIBLE_DEVICES[%s]."\ % (x, cuda_visible_devices) - selected_gpus = [ + gpus = [ cuda_visible_devices_list.index(x.strip()) for x in selected_gpus.split(',') ] + logger.info("Change selected_gpus into reletive values. --ips:{} " + "will change into relative_ips:{} according to your " + "CUDA_VISIBLE_DEVICES:{}".format( + selected_gpus, gpus, cuda_visible_devices_list)) - return selected_gpus + return gpus def get_cluster_and_pod(args): diff --git a/python/paddle/distributed/utils.py b/python/paddle/distributed/utils.py index 1fa307c4d1b89d4033a8f8346b254177053e9dc0..be144a55b86200042f4d03b112071a374612b3a5 100644 --- a/python/paddle/distributed/utils.py +++ b/python/paddle/distributed/utils.py @@ -227,18 +227,23 @@ def get_logger(log_level, name="root"): return logger -def get_cluster(node_ips, node_ip, paddle_ports, selected_gpus): - assert type(paddle_ports) is list, "paddle_ports must be list" +def get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus): + assert type(trainer_endpoints) is list, "trainer_endpoints must be list" cluster = Cluster(hdfs=None) trainer_rank = 0 for node_rank, ip in enumerate(node_ips): pod = Pod() pod.rank = node_rank pod.addr = ip + cur_node_endpoints = trainer_endpoints[node_rank] + # when use paddlecloud, endpoints may > selected_gpus(user_defined) + assert len(cur_node_endpoints) >= len( + selected_gpus + ), "current trainer_endpoints size should be greater equal than selected_gpus size." for i in range(len(selected_gpus)): trainer = Trainer() trainer.gpus.append(selected_gpus[i]) - trainer.endpoint = "%s:%d" % (ip, paddle_ports[i]) + trainer.endpoint = "%s" % (cur_node_endpoints[i]) trainer.rank = trainer_rank trainer_rank += 1 @@ -253,7 +258,8 @@ def terminate_local_procs(procs): for p in procs: if p.proc.poll() is None: p.proc.terminate() - p.log_fn.close() + if p.log_fn: + p.log_fn.close() logger.debug("terminate process id:{}".format(p.proc.pid)) #wait all process terminiated diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 244a621611060b87805846f1ea748615bcdde19a..ddbd99e16cebdfc839a8e96e44d4f96f02e70c55 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -143,7 +143,7 @@ class PostTrainingQuantization(object): weight_quantize_type='channel_wise_abs_max', optimize_model=False, is_use_cache_file=False, - cache_dir="./temp_post_training"): + cache_dir=None): ''' Constructor. @@ -206,13 +206,8 @@ class PostTrainingQuantization(object): `conv2d/depthwise_conv2d + bn`, the weights scale for all channel will be different. In address this problem, fuse the pattern before quantization. Default False. - is_use_cache_file(bool, optional): If set is_use_cache_file as False, - all temp data will be saved in memory. If set is_use_cache_file as True, - it will save temp data to disk. When the fp32 model is complex or - the number of calibrate data is large, we should set is_use_cache_file - as True. Defalut is False. - cache_dir(str, optional): When is_use_cache_file is True, set cache_dir as - the directory for saving temp data. Default is ./temp_post_training. + is_use_cache_file(bool, optional): This param is deprecated. + cache_dir(str, optional): This param is deprecated. Returns: None @@ -302,10 +297,6 @@ class PostTrainingQuantization(object): assert op_type in self._support_quantize_op_type, \ op_type + " is not supported for quantization." self._optimize_model = optimize_model - self._is_use_cache_file = is_use_cache_file - self._cache_dir = cache_dir - if self._is_use_cache_file and not os.path.exists(self._cache_dir): - os.mkdir(self._cache_dir) # Define variables self._place = self._executor.place @@ -317,11 +308,17 @@ class PostTrainingQuantization(object): self._out_scale_op_list = _out_scale_op_list self._quantized_weight_var_name = set() self._quantized_act_var_name = set() - self.weight_op_pairs = {} + self._weight_op_pairs = {} + # The vars for alog = KL + self._sampling_act_abs_min_max = {} + self._sampling_act_histogram = {} self._sampling_data = {} self._quantized_var_kl_threshold = {} + self._histogram_bins = 2048 + # The vars for algo = min_max self._quantized_var_min = {} self._quantized_var_max = {} + # The vars for algo = abs_max self._quantized_var_abs_max = {} def quantize(self): @@ -339,6 +336,25 @@ class PostTrainingQuantization(object): self._collect_target_varnames() self._set_activation_persistable() + if self._algo == "KL": + _logger.info("Preparation stage ...") + batch_id = 0 + for data in self._data_loader(): + self._executor.run(program=self._program, + feed=data, + fetch_list=self._fetch_list, + return_numpy=False, + scope=self._scope) + self._collect_activation_abs_min_max() + if batch_id % 5 == 0: + _logger.info("Run batch: " + str(batch_id)) + batch_id += 1 + if self._batch_nums and batch_id >= self._batch_nums: + break + _logger.info("Finish preparation stage, all batch:" + str(batch_id)) + self._init_sampling_act_histogram() + + _logger.info("Sampling stage ...") batch_id = 0 for data in self._data_loader(): self._executor.run(program=self._program, @@ -346,17 +362,13 @@ class PostTrainingQuantization(object): fetch_list=self._fetch_list, return_numpy=False, scope=self._scope) - if self._algo == "KL": - self._sample_data(batch_id) - else: - self._sample_threshold() - + self._sampling() if batch_id % 5 == 0: _logger.info("Run batch: " + str(batch_id)) batch_id += 1 if self._batch_nums and batch_id >= self._batch_nums: break - _logger.info("Finish all batch: " + str(batch_id)) + _logger.info("Finish sampling stage, all batch: " + str(batch_id)) self._reset_activation_persistable() @@ -397,6 +409,7 @@ class PostTrainingQuantization(object): target_vars=self._fetch_list, executor=self._executor, main_program=self._program) + _logger.info("The quantized model is saved in " + save_model_path) def _load_model_data(self): ''' @@ -454,7 +467,7 @@ class PostTrainingQuantization(object): for var_name in var_name_list: if var_name in persistable_var_names: self._quantized_weight_var_name.add(var_name) - self.weight_op_pairs[var_name] = op_type + self._weight_op_pairs[var_name] = op_type else: self._quantized_act_var_name.add(var_name) @@ -494,20 +507,18 @@ class PostTrainingQuantization(object): if var.name in self._quantized_act_var_name: var.persistable = False - def _sample_threshold(self): + def _sampling(self): ''' - Sample the input threshold(min, max, or abs_max) in every iterations. + Sample the min/max, abs_max or histogram in every iterations. ''' - assert self._algo in ["abs_max", "min_max"], \ - "The algo should be abs_max or min_max for _sample_threshold." if self._algo == "abs_max": - self._sample_threshold_abs_max() + self._sample_abs_max() elif self._algo == "min_max": - self._sample_threshold_min_max() + self._sample_min_max() + elif self._algo == "KL": + self._sample_histogram() - def _sample_threshold_abs_max(self): - assert self._algo == "abs_max", \ - "The algo should be abs_max for _sample_threshold_abs_max." + def _sample_abs_max(self): # Only calculate abs_max value for weight for once if self._quantized_var_abs_max == {}: for var_name in self._quantized_weight_var_name: @@ -516,7 +527,7 @@ class PostTrainingQuantization(object): abs_max_value = float(np.max(np.abs(var_tensor))) elif self._weight_quantize_type == "channel_wise_abs_max": abs_max_value = [] - if self.weight_op_pairs[ + if self._weight_op_pairs[ var_name] in _channelwise_quant_axis1_ops: for i in range(var_tensor.shape[1]): abs_max_value.append( @@ -534,9 +545,7 @@ class PostTrainingQuantization(object): (abs_max_value > self._quantized_var_abs_max[var_name]): self._quantized_var_abs_max[var_name] = abs_max_value - def _sample_threshold_min_max(self): - assert self._algo == "min_max", \ - "The algo should be min_max for _sample_threshold_min_max." + def _sample_min_max(self): if self._quantized_var_min == {} and self._quantized_var_max == {}: for var_name in self._quantized_weight_var_name: var_tensor = _load_variable_data(self._scope, var_name) @@ -546,7 +555,7 @@ class PostTrainingQuantization(object): elif self._weight_quantize_type == "channel_wise_abs_max": min_value = [] max_value = [] - if self.weight_op_pairs[ + if self._weight_op_pairs[ var_name] in _channelwise_quant_axis1_ops: for i in range(var_tensor.shape[1]): min_value.append(float(np.min(var_tensor[:, i]))) @@ -569,6 +578,14 @@ class PostTrainingQuantization(object): (max_value > self._quantized_var_max[var_name]): self._quantized_var_max[var_name] = max_value + def _sample_histogram(self): + for var_name in self._quantized_act_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + var_tensor_abs = np.abs(var_tensor) + bins = self._sampling_act_histogram[var_name][1] + hist, _ = np.histogram(var_tensor_abs, bins=bins) + self._sampling_act_histogram[var_name][0] += hist + def _save_input_threhold(self): ''' Save input threshold to the quantized op. @@ -585,27 +602,36 @@ class PostTrainingQuantization(object): op._set_attr(var_name + ".max", self._quantized_var_max[var_name]) - def _sample_data(self, iter): + def _collect_activation_abs_min_max(self): ''' - Sample the tensor data of quantized variables, - applied in every iteration. + Collect the abs_min and abs_max for all activation. When algo = KL, + get the min and max value, and then calculate the threshold. ''' - assert self._algo == "KL", "The algo should be KL to sample data." - if self._is_use_cache_file: - for var_name in self._quantized_act_var_name: - var_tensor = _load_variable_data(self._scope, var_name) - var_tensor = var_tensor.ravel() - save_path = os.path.join( - self._cache_dir, - var_name.replace("/", ".") + "_" + str(iter) + ".npy") - np.save(save_path, var_tensor) - else: - for var_name in self._quantized_act_var_name: - if var_name not in self._sampling_data: - self._sampling_data[var_name] = [] - var_tensor = _load_variable_data(self._scope, var_name) - var_tensor = var_tensor.ravel() - self._sampling_data[var_name].append(var_tensor) + for var_name in self._quantized_act_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + var_tensor = np.abs(var_tensor) + min_value = float(np.min(var_tensor)) + max_value = float(np.max(var_tensor)) + if var_name not in self._sampling_act_abs_min_max: + self._sampling_act_abs_min_max[ + var_name] = [min_value, max_value] + else: + if min_value < self._sampling_act_abs_min_max[var_name][0]: + self._sampling_act_abs_min_max[var_name][0] = min_value + if max_value > self._sampling_act_abs_min_max[var_name][1]: + self._sampling_act_abs_min_max[var_name][1] = max_value + + def _init_sampling_act_histogram(self): + ''' + Based on the min/max value, init the sampling_act_histogram. + ''' + for var_name in self._quantized_act_var_name: + if var_name not in self._sampling_act_histogram: + min_val = self._sampling_act_abs_min_max[var_name][0] + max_val = self._sampling_act_abs_min_max[var_name][1] + hist, hist_edeges = np.histogram( + [], bins=self._histogram_bins, range=(min_val, max_val)) + self._sampling_act_histogram[var_name] = [hist, hist_edeges] def _calculate_kl_threshold(self): ''' @@ -621,7 +647,7 @@ class PostTrainingQuantization(object): weight_threshold = float(np.max(np.abs(weight_data))) elif self._weight_quantize_type == "channel_wise_abs_max": weight_threshold = [] - if self.weight_op_pairs[ + if self._weight_op_pairs[ var_name] in _channelwise_quant_axis1_ops: for i in range(weight_data.shape[1]): weight_threshold.append( @@ -632,25 +658,10 @@ class PostTrainingQuantization(object): float(np.max(np.abs(weight_data[i])))) self._quantized_var_kl_threshold[var_name] = weight_threshold - # KL threshold for activations - if self._is_use_cache_file: - for var_name in self._quantized_act_var_name: - sampling_data = [] - filenames = [f for f in os.listdir(self._cache_dir) \ - if re.match(var_name.replace("/", ".") + '_[0-9]+.npy', f)] - for filename in filenames: - file_path = os.path.join(self._cache_dir, filename) - sampling_data.append(np.load(file_path)) - os.remove(file_path) - sampling_data = np.concatenate(sampling_data) - self._quantized_var_kl_threshold[var_name] = \ - self._get_kl_scaling_factor(np.abs(sampling_data)) - else: - for var_name in self._quantized_act_var_name: - self._sampling_data[var_name] = np.concatenate( - self._sampling_data[var_name]) - self._quantized_var_kl_threshold[var_name] = \ - self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name])) + for var_name in self._quantized_act_var_name: + hist, hist_edeges = self._sampling_act_histogram[var_name] + self._quantized_var_kl_threshold[var_name] = \ + self._get_kl_scaling_factor(hist, hist_edeges) def _update_program(self): ''' @@ -765,22 +776,15 @@ class PostTrainingQuantization(object): for var_name in out_var_names: analysis_and_save_info(op, var_name) - def _get_kl_scaling_factor(self, activation_blob, num_quantized_bins=255): + def _get_kl_scaling_factor(self, hist, hist_edeges, num_quantized_bins=255): ''' Using the KL-divergenc method to get the more precise scaling factor. ''' - max_val = np.max(activation_blob) - min_val = np.min(activation_blob) - if min_val >= 0: - hist, hist_edeges = np.histogram( - activation_blob, bins=2048, range=(min_val, max_val)) - ending_iter = 2047 - starting_iter = int(ending_iter * 0.7) - else: - _logger.error("Please first apply abs to activation_blob.") + ending_iter = self._histogram_bins - 1 + starting_iter = int(ending_iter * 0.7) bin_width = hist_edeges[1] - hist_edeges[0] - P_sum = len(np.array(activation_blob).ravel()) + P_sum = np.sum(np.array(hist).ravel()) min_kl_divergence = 0 min_kl_index = 0 kl_inited = False diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 2f95c2b9007a53483fda86dda8d77e9baff0d8d2..01c2f0fed496081400d363d9464360c69d924be8 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -23,7 +23,6 @@ from paddle.fluid import framework from paddle.fluid.multiprocess_utils import CleanupFuncRegistrar from .tracer import Tracer import logging -import objgraph from ..data_feeder import convert_dtype import warnings @@ -368,24 +367,6 @@ def guard(place=None): yield -def _print_debug_msg(parameter_list, limit=5, is_test=False): - if not core._is_dygraph_debug_enabled(): - logging.warn( - 'Debug mode is not enabled. Please set FLAGS_dygraph_debug=1 to enable debug' - ) - return - unique_name_size = len(framework.unique_name.generator.ids) - tracer_var_size = len(parameter_list) - alive_cpp_var_size = len(core.VarBase._alive_vars()) - if not is_test: - logging.warn( - 'unique_name num: {}, tracer vars num: {}, alive cpp vars num: {}' - .format(unique_name_size, tracer_var_size, alive_cpp_var_size)) - objgraph.show_growth(limit=limit) - else: - return unique_name_size, tracer_var_size, alive_cpp_var_size - - @framework.dygraph_only def grad(outputs, inputs, diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 9876fc620b870f47b10e9f99e4de34f5cb81fde1..93cb0bafc847b897816636f92255bd06b7e67321 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -195,58 +195,11 @@ def load_dygraph(model_path, config=None): params_file_path = model_prefix + ".pdparams" opti_file_path = model_prefix + ".pdopt" - # deal with argument `configs` - configs = config - if configs is None: - configs = SaveLoadConfig() - - if not os.path.exists(params_file_path) and not os.path.exists( - opti_file_path): - # Load state dict by `jit.save/io.save_inference_model` save format - # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] - # The model saved by `save_inference_model` does not completely correspond to - # the information required by the `state_dict` under the dygraph. - # `save_inference_model` not save structured name, we need to remind - # the user to configure the `use_structured_name` argument when `set_state_dict` - # NOTE(chenweihang): `jit.save` doesn't save optimizer state - - # 1. check model path - if not os.path.isdir(model_prefix): - raise ValueError("Model saved directory '%s' is not exists." % - model_prefix) + # deal with argument `config` + if config is None: + config = SaveLoadConfig() - # 2. load program desc & construct _ProgramHolder - programs = _construct_program_holders(model_path, - configs.model_filename) - - # 3. load layer parameters & buffers - # NOTE: using fluid.dygraph.guard() here will cause import error in py2 - with guard(): - persistable_var_dict = _construct_params_and_buffers( - model_prefix, - programs, - configs.separate_params, - configs.params_filename, - append_suffix=False) - - # 4. construct state_dict - para_dict = dict() - for var_name in persistable_var_dict: - para_dict[var_name] = persistable_var_dict[var_name].numpy() - - # if __variables.info__ exists, we can recover structured_name - var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME) - if os.path.exists(var_info_path): - with open(var_info_path, 'rb') as f: - extra_var_info = pickle.load(f) - structured_para_dict = dict() - for var_name in para_dict: - structured_name = extra_var_info[var_name].get( - 'structured_name', None) - assert structured_name is not None, "Cannot find saved variable (%s)'s structured name in saved model." % var_name - structured_para_dict[structured_name] = para_dict[var_name] - para_dict = structured_para_dict - else: + if os.path.exists(params_file_path) or os.path.exists(opti_file_path): # Load state dict by `save_dygraph` save format para_dict = {} if os.path.exists(params_file_path): @@ -254,12 +207,103 @@ def load_dygraph(model_path, config=None): para_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') - if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict: + if not config.keep_name_table and "StructuredToParameterName@@" in para_dict: del para_dict["StructuredToParameterName@@"] if os.path.exists(opti_file_path): with open(opti_file_path, 'rb') as f: opti_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') + else: + # check model path + if not os.path.isdir(model_prefix): + raise ValueError("Model saved directory '%s' is not exists." % + model_prefix) + + # check whether model file exists + if config.model_filename is None: + model_filename = '__model__' + else: + model_filename = config.model_filename + model_file_path = os.path.join(model_path, model_filename) + + if os.path.exists(model_file_path): + # Load state dict by `jit.save/io.save_inference_model` save format + # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] + # The model saved by `save_inference_model` does not completely correspond to + # the information required by the `state_dict` under the dygraph. + # `save_inference_model` not save structured name, we need to remind + # the user to configure the `use_structured_name` argument when `set_state_dict` + # NOTE(chenweihang): `jit.save` doesn't save optimizer state + + # 1. load program desc & construct _ProgramHolder + programs = _construct_program_holders(model_path, + config.model_filename) + + # 2. load layer parameters & buffers + # NOTE: using fluid.dygraph.guard() here will cause import error in py2 + with guard(): + persistable_var_dict = _construct_params_and_buffers( + model_prefix, + programs, + config.separate_params, + config.params_filename, + append_suffix=False) + + # 3. construct state_dict + para_dict = dict() + for var_name in persistable_var_dict: + para_dict[var_name] = persistable_var_dict[var_name].numpy() + + # if __variables.info__ exists, we can recover structured_name + var_info_path = os.path.join(model_prefix, + EXTRA_VAR_INFO_FILENAME) + if os.path.exists(var_info_path): + with open(var_info_path, 'rb') as f: + extra_var_info = pickle.load(f) + structured_para_dict = dict() + for var_name in para_dict: + structured_name = extra_var_info[var_name].get( + 'structured_name', None) + assert structured_name is not None, "Cannot find saved variable (%s)'s structured name in saved model." % var_name + structured_para_dict[structured_name] = para_dict[ + var_name] + para_dict = structured_para_dict + else: + # load state dict by `io.save_params/persistables` save format + # TODO(chenweihang): [ Now only supports loading parameters seperately ] + # If users save all parameters as one file, the [ variable.name -> variable ] + # mapping info will lost, so users need to give variable list, but users build + # variable list in dygraph mode is difficult, we recommend users to use + # paddle.io.load_program_state in this case + + # Try to load all the files in the directory in VarBase format, + # the file name is used as the name of VarBase + load_var_list = [] + + # 1. load file names + var_name_list = [] + for root, _, files in os.walk(model_path): + for filename in files: + file_path = os.path.join(root, filename) + tmp_var_name = os.path.relpath(file_path, model_path) + var_name = tmp_var_name.replace("\\", "/") + var_name_list.append(var_name) + + # 2. create and load VarBase + with guard(): + for name in var_name_list: + new_var = _varbase_creator(name=name, persistable=True) + _dygraph_tracer().trace_op( + type='load', + inputs={}, + outputs={'Out': new_var}, + attrs={'file_path': os.path.join(model_path, name)}) + load_var_list.append(new_var) + + # 3. construct state_dict + para_dict = dict() + for var in load_var_list: + para_dict[var.name] = var.numpy() return para_dict, opti_dict diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 335ac500c898085e4bf60aabdf8db95fa65db31f..4391843b0efb5636104973f0524131aa64751ffa 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -19,6 +19,7 @@ import six import pickle import numpy as np +import paddle from paddle import compat as cpt from paddle.fluid import core from paddle.fluid import framework @@ -182,9 +183,9 @@ class _ProgramHolder(object): super(_ProgramHolder, self).__init__() # input, output, persistable var info - self._input_names = [] - self._persistable_names = [] + self._input_descs = [] self._output_descs = [] + self._persistable_names = [] # execution scope self._inner_scope = core.Scope() @@ -207,11 +208,11 @@ class _ProgramHolder(object): return self._train_program_desc @property - def input_names(self): - return self._input_names + def input_descs(self): + return self._input_descs @property - def output_decs(self): + def output_descs(self): return self._output_descs @property @@ -233,7 +234,8 @@ class _ProgramHolder(object): ops_to_remove.append(i) feed_var_name = cpt.to_bytes(op.input('X')[0]) root_block._remove_var(feed_var_name) - self._input_names.append(cpt.to_bytes(op.output('Out')[0])) + self._input_descs.append( + root_block.find_var(cpt.to_bytes(op.output('Out')[0]))) elif op.type() == 'scale' and op.output('Out')[0].startswith( 'save_infer_model/scale_'): ops_to_remove.append(i) @@ -257,7 +259,7 @@ class _ProgramHolder(object): root_block._remove_op(op_idx, op_idx + 1) # 2. Input processing, reverse feed vars - self._input_names.reverse() + self._input_descs.reverse() # 3. Output processing, add scale for outputs tmp_program = _build_program_by_desc(program_desc) @@ -738,7 +740,7 @@ class TranslatedLayer(layers.Layer): if isinstance(value, np.ndarray): var = core.VarBase( value=value, - name=program_holder.input_names[i], + name=program_holder.input_descs[i].name(), persistable=False, place=framework._current_expected_place(), zero_copy=True) @@ -746,7 +748,7 @@ class TranslatedLayer(layers.Layer): var = value # NOTE: we changed var name here, # but it may be an important name set by user - var.name = program_holder.input_names[i] + var.name = program_holder.input_descs[i].name() input_vars.append(var) persistable_vars = [] @@ -762,7 +764,7 @@ class TranslatedLayer(layers.Layer): % var_name) output_vars = [] - for var_desc in program_holder.output_decs: + for var_desc in program_holder.output_descs: var = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) @@ -913,11 +915,7 @@ class TranslatedLayer(layers.Layer): program = translated_layer.program() """ # 1. get program holder - program_holder = self._program_holder_dict.get(method_name, None) - if program_holder is None: - raise ValueError( - "The method `%s` is not exists in loaded TranslatedLayer." % - method_name) + program_holder = self._get_program_holder(method_name) # 2. get inference program desc program_desc = program_holder.infer_program @@ -925,3 +923,44 @@ class TranslatedLayer(layers.Layer): # 3. construct program program = _build_program_by_desc(program_desc) return program + + def _get_program_holder(self, method_name='forward'): + program_holder = self._program_holder_dict.get(method_name, None) + if program_holder is None: + raise ValueError( + "The method `%s` does not exist in loaded TranslatedLayer." % + method_name) + return program_holder + + def _input_spec(self, method_name='forward'): + # 1. get program holder + program_holder = self._get_program_holder(method_name) + + # 2. build input spec by input desc + input_spec = [] + for var_desc in program_holder.input_descs: + spec = paddle.static.InputSpec( + shape=var_desc.shape(), + dtype=var_desc.dtype(), + name=var_desc.name()) + input_spec.append(spec) + + return input_spec + + def _output_spec(self, method_name='forward'): + # 1. get program holder + program_holder = self._get_program_holder(method_name) + + # 2. build output spec by output desc + output_spec = [] + for var_desc in program_holder.output_descs: + # NOTE(chenweihang): InputSpec describes a tensor, not just input. + # Maybe the name is not good enough. Here we use InputSpec to + # construct the description of Output tensor + spec = paddle.static.InputSpec( + shape=var_desc.shape(), + dtype=var_desc.dtype(), + name=var_desc.name()) + output_spec.append(spec) + + return output_spec diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index f9fe4198fec3a0a2237c0bcac6e20f4269160589..3aa7b9dfc262810686319819f717f3cfd06b5e50 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -285,7 +285,7 @@ def monkey_patch_math_varbase(): ('__ge__', _binary_creator_('__ge__', 'greater_equal', False, None)), ('__array_ufunc__', None), ('sigmoid', _method_creator_('sigmoid', 'name=None')), - ('logsigmoid', _method_creator_('logsigmoid', 'name=None')), + ('log_sigmoid', _method_creator_('logsigmoid', 'name=None')), ('exp', _method_creator_('exp', 'name=None')), ('tanh', _method_creator_('tanh', 'name=None')), ('atan', _method_creator_('atan', 'name=None')), diff --git a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py index cb1a54ef19899059d1a46d0807ce58bf3b5ab8b5..58313c46c3cf0d42d6e14e10d0ca91f361ce787a 100644 --- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py +++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py @@ -26,8 +26,7 @@ import paddle.fluid as fluid from paddle.fluid.log_helper import get_logger from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet as fleet_pslib from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet as fleet_transpiler -from . import hdfs -from .hdfs import * +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient from . import utils __all__ = ["FleetUtil"] diff --git a/python/paddle/fluid/incubate/fleet/utils/fs.py b/python/paddle/fluid/incubate/fleet/utils/fs.py deleted file mode 100644 index 0ba06ef934a525d3801e233c6e2f124fb0a6df52..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/incubate/fleet/utils/fs.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import subprocess -import multiprocessing -from datetime import datetime - -import re -import copy -import errno -import time -import logging -import abc -from pathlib import PurePosixPath, Path -import shutil - -__all__ = ['FS', 'LocalFS'] - - -class ExecuteError(Exception): - pass - - -class FSFileExistsError(Exception): - pass - - -class FSFileNotExistsError(Exception): - pass - - -class FSTimeOut(Exception): - pass - - -class FSShellCmdAborted(ExecuteError): - pass - - -class FS(object): - @abc.abstractmethod - def ls_dir(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def is_file(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def is_dir(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def is_exist(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def upload(self, local_path, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def download(self, fs_path, local_path): - raise NotImplementedError - - @abc.abstractmethod - def mkdirs(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def delete(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def need_upload_download(self): - raise NotImplementedError - - @abc.abstractmethod - def rename(self, fs_src_path, fs_dst_path): - raise NotImplementedError - - @abc.abstractmethod - def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=False): - raise NotImplementedError - - @abc.abstractmethod - def upload_dir(self, local_dir, dest_dir): - raise NotImplementedError - - @abc.abstractmethod - def list_dirs(self, fs_path): - raise NotImplementedError - - @abc.abstractmethod - def touch(self, fs_path, exist_ok=True): - raise NotImplementedError - - -class LocalFS(FS): - def ls_dir(self, fs_path): - return [f for f in os.listdir(fs_path)] - - def mkdirs(self, fs_path): - assert not os.path.isfile(fs_path), "{} is already a file".format( - fs_path) - os.system("mkdir -p {}".format(fs_path)) - - def rename(self, fs_src_path, fs_dst_path): - os.rename(fs_src_path, fs_dst_path) - - def _rmr(self, fs_path): - shutil.rmtree(fs_path) - - def _rm(self, fs_path): - os.remove(fs_path) - - def delete(self, fs_path): - if not self.is_exist(fs_path): - return - - if os.path.isfile(fs_path): - return self._rm(fs_path) - - return self._rmr(fs_path) - - def need_upload_download(self): - return False - - def is_file(self, fs_path): - return os.path.isfile(fs_path) - - def is_dir(self, fs_path): - return os.path.isdir(fs_path) - - def is_exist(self, fs_path): - return os.path.exists(fs_path) - - def touch(self, fs_path, exist_ok=True): - if self.is_exist(fs_path): - if exist_ok: - return - raise FSFileExistsError - - return Path(fs_path).touch(exist_ok=True) - - def mv(self, src_path, dst_path, overwrite=False, test_exists=False): - if not self.is_exist(src_path): - raise FSFileNotExistsError - - if overwrite and self.is_exist(dst_path): - self.delete(dst_path) - - if self.is_exist(dst_path): - raise FSFileExistsError - - return self.rename(src_path, dst_path) - - def list_dirs(self, fs_path): - """ - list directory under fs_path, and only give the pure name, not include the fs_path - """ - if not self.is_exist(fs_path): - return [] - - dirs = [ - f for f in os.listdir(fs_path) if os.path.isdir(fs_path + "/" + f) - ] - - return dirs diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 1efae3ddf1f3422a53f69c4b5b8eeec6183fae96..6cdc617a0dc17ae9f0893083285c404ca73712f7 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -20,7 +20,10 @@ from ..framework import convert_np_dtype_to_dtype_, Variable from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype from paddle.utils import deprecated -__deprecated_func_name__ = {'tanh_shrink': 'tanhshrink', } +__deprecated_func_name__ = { + 'tanh_shrink': 'tanhshrink', + 'logsigmoid': 'log_sigmoid' +} __activations_noattr__ = [ 'sigmoid', @@ -106,7 +109,7 @@ Examples: paddle.disable_static() x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) - out = F.logsigmoid(x) + out = F.log_sigmoid(x) print(out.numpy()) # [-0.91301525 -0.79813887 -0.64439666 -0.55435524] diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 192effd2e42dc937fbf47efdd1d772a4c078f888..1e7915ed781a6441f32fb86c3c92e6f68ca66b93 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -3570,8 +3570,10 @@ class ExponentialMovingAverage(object): # bias correction with layers.control_flow.Switch() as switch: with switch.case(global_step > 0): - layers.assign(output=ema, input=ema / (1.0 - decay_pow)) - layers.assign(input=ema, output=param) + layers.assign( + output=param, input=ema / (1.0 - decay_pow)) + with switch.default(): + layers.assign(output=param, input=ema) self.restore_program = Program() block = self.restore_program.global_block() diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index f2bb567b95b01eaf9a820359acef74e1c360c7f2..533222531f98b188f9fe5b47184ff39736488bd6 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -1726,13 +1726,13 @@ class DatasetLoader(DataLoaderBase): logging.warn('thread_num {} which is set in Dataset is ignored'. format(dataset.thread_num)) - dataset.set_thread(thread_num) + dataset._set_thread(thread_num) if isinstance(dataset, paddle.distributed.fleet.dataset. InMemoryDataset) and dataset.queue_num > thread_num: logging.warn("queue_num {} which is set in Dataset is ignored". format(dataset.queue_num)) - dataset.set_queue_num(thread_num) + dataset._set_queue_num(thread_num) self._dataset = dataset use_slots = [ diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 102bacff9639d9f0076ac4900a89a58bdf508494..c0fff5b5c81075ec981390ce0191dec81ce39a7f 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -102,6 +102,7 @@ if(WIN32) endif() +LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2) @@ -325,7 +326,6 @@ list(REMOVE_ITEM TEST_OPS test_basic_gru_api) list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op) list(REMOVE_ITEM TEST_OPS test_basic_lstm_api) list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op) -list(REMOVE_ITEM TEST_OPS test_imperative_debug_string) list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass) list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_mnist) list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_while) @@ -399,23 +399,22 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${G py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS}) py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS FLAGS_cudnn_deterministic=1 SERIAL) -set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") py_test_modules(test_imperative_resnet_sorted_gradient MODULES test_imperative_resnet_sorted_gradient ENVS FLAGS_cudnn_deterministic=1 SERIAL) -set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS FLAGS_cudnn_deterministic=1) py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mnist_sorted_gradient ENVS FLAGS_cudnn_deterministic=1) py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS FLAGS_cudnn_deterministic=1 SERIAL) -set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_attention_model ENVS FLAGS_cudnn_deterministic=1 SERIAL) py_test_modules(test_install_check MODULES test_install_check ENVS FLAGS_cudnn_deterministic=1 SERIAL) set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST") -py_test_modules(test_imperative_debug_string MODULES test_imperative_debug_string ENVS FLAGS_dygraph_debug=1) py_test_modules(test_imperative_static_runner_mnist MODULES test_imperative_static_runner_mnist ENVS FLAGS_cudnn_deterministic=1) py_test_modules(test_imperative_static_runner_while MODULES test_imperative_static_runner_while ENVS @@ -461,11 +460,11 @@ if(WITH_DISTRIBUTE) py_test_modules(test_fleet_private_function MODULES test_fleet_private_function ENVS ${dist_ENVS}) py_test_modules(test_fleet_meta_optimizer_base MODULES test_fleet_meta_optimizer_base ENVS ${dist_ENVS}) py_test_modules(test_fleet_distributed_strategy MODULES test_fleet_distributed_strategy) - py_test_modules(test_fleet_auto MODULES test_fleet_auto ENVS ${dist_ENVS}) + #py_test_modules(test_fleet_auto MODULES test_fleet_auto ENVS ${dist_ENVS}) if(NOT WIN32) py_test_modules(test_fleet_localsgd_meta_optimizer MODULES test_fleet_localsgd_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_lars_meta_optimizer MODULES test_fleet_lars_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_lamb_meta_optimizer MODULES test_fleet_lamb_meta_optimizer ENVS ${dist_ENVS}) + #py_test_modules(test_fleet_lars_meta_optimizer MODULES test_fleet_lars_meta_optimizer ENVS ${dist_ENVS}) + #py_test_modules(test_fleet_lamb_meta_optimizer MODULES test_fleet_lamb_meta_optimizer ENVS ${dist_ENVS}) endif(NOT WIN32) endif(NOT APPLE) if(WITH_DGC) diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py index 529ff4ec45d1fdc6d1d8e765e38cff53d36aade7..2464882d617effb838c1f40d40ec2d89c13e73d2 100644 --- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py +++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py @@ -20,8 +20,7 @@ from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet import os import sys -from paddle.fluid.incubate.fleet.utils.fs import LocalFS -from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel from paddle.fluid.framework import program_guard diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 187b7d7df26f7ce09e1c089c1e463cd5d3ea945f..4c90ffdf4e26e3ba0f72d9c3f424125b8aa08465 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -210,14 +210,16 @@ class TestDistCTR2x2(FleetDistRunnerBase): filelist = train_file_list # config dataset - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset() - dataset.set_batch_size(batch_size) - dataset.set_use_var(self.feeds) + dataset = paddle.distributed.QueueDataset() pipe_command = 'python ctr_dataset_reader.py' - dataset.set_pipe_command(pipe_command) + + dataset.init( + batch_size=batch_size, + use_var=self.feeds, + pipe_command=pipe_command, + thread_num=thread_num) dataset.set_filelist(filelist) - dataset.set_thread(thread_num) for epoch_id in range(1): pass_start = time.time() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py index 887693be3d5cc46c23f21780bed77b13e881da47..3852b225234ffacc2be749245fb1341331868272 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py @@ -115,14 +115,14 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): filelist.append(train_file_path) # config dataset - dataset = paddle.fleet.DatasetFactory().create_dataset() - dataset.set_batch_size(batch_size) - dataset.set_use_var(self.feeds) + dataset = paddle.distributed.QueueDataset() + dataset._set_batch_size(batch_size) + dataset._set_use_var(self.feeds) pipe_command = 'python ctr_dataset_reader.py' - dataset.set_pipe_command(pipe_command) + dataset._set_pipe_command(pipe_command) dataset.set_filelist(filelist) - dataset.set_thread(thread_num) + dataset._set_thread(thread_num) for epoch_id in range(1): pass_start = time.time() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index b0bf203da664ae2a886ce90c56013300e25372c5..f82ee4a613b12a7d011c6dd90c9b7ca94501e014 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -184,14 +184,14 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): print("filelist: {}".format(filelist)) # config dataset - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset() - dataset.set_batch_size(batch_size) - dataset.set_use_var(self.feeds) + dataset = paddle.distributed.QueueDataset() + dataset._set_batch_size(batch_size) + dataset._set_use_var(self.feeds) pipe_command = 'python ctr_dataset_reader.py' - dataset.set_pipe_command(pipe_command) + dataset._set_pipe_command(pipe_command) dataset.set_filelist(filelist) - dataset.set_thread(thread_num) + dataset._set_thread(thread_num) for epoch_id in range(1): pass_start = time.time() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py index dfcd1758db2b22b211f84be528739aa71132ab8a..34a52e7aed342ac8db471ad94b277efd0faf9d27 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py @@ -17,6 +17,7 @@ import numpy as np from inference_pass_test import InferencePassTest import paddle.fluid as fluid import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker class TransposeFlattenConcatFusePassTest(InferencePassTest): @@ -45,6 +46,37 @@ class TransposeFlattenConcatFusePassTest(InferencePassTest): use_gpu = True self.check_output_with_option(use_gpu) + PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass') + + +class TransposeFlattenConcatFusePassWithAxisTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data1 = fluid.data(name="data1", shape=[5, 5, 5], dtype="float32") + data2 = fluid.data(name="data2", shape=[5, 5, 5], dtype="float32") + trans1 = fluid.layers.transpose(data1, perm=[2, 1, 0]) + trans2 = fluid.layers.transpose(data2, perm=[2, 1, 0]) + flatt1 = fluid.layers.flatten(trans1, axis=2) + flatt2 = fluid.layers.flatten(trans2, axis=2) + concat_out = fluid.layers.concat([flatt1, flatt2], axis=1) + # There is no parameters for above structure. + # Hence, append a batch_norm to avoid failure caused by load_combined. + out = fluid.layers.batch_norm(concat_out, is_test=True) + + self.feeds = { + "data1": np.random.random([5, 5, 5]).astype("float32"), + "data2": np.random.random([5, 5, 5]).astype("float32") + } + self.fetch_list = [out] + + def test_check_output(self): + # There is no cpu pass for transpose_flatten_concat_fuse + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu) + + PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass') + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py new file mode 100644 index 0000000000000000000000000000000000000000..060f6c6c5f0446661e886390637714ad7dfc300d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py @@ -0,0 +1,53 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig + + +class PadOpTRTTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[1, 3, 128, 128], dtype="float32") + pad_out = fluid.layers.pad(x=data, + paddings=[0, 0, 0, 0, 0, 1, 1, 2], + pad_value=0.0) + out = fluid.layers.batch_norm(pad_out, is_test=True) + + self.feeds = { + "data": np.random.random((1, 3, 128, 128)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = PadOpTRTTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..660a9c93e66715f41e4a972ff571c0c00f31316f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py @@ -0,0 +1,150 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig + + +#normal starts && ends +class SlicePluginTRTTest1(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32") + axes = [1, 3] + starts = [0, 1] + ends = [2, 3] + slice_out = fluid.layers.slice( + data, axes=axes, starts=starts, ends=ends) + out = fluid.layers.batch_norm(slice_out, is_test=True) + + self.feeds = { + "data": np.random.random((3, 3, 3, 3)).astype("float32"), + } + # Diff occurred between GPU and TRT. + # In order to provide TRT CI ASAP, this test for trt part + # is disabled temporarily. + self.enable_trt = True + self.trt_parameters = SlicePluginTRTTest1.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +#negative starts && ends +class SlicePluginTRTTest2(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32") + axes = [2, 3] + starts = [-3, -2] + ends = [-1, 3] + slice_out = fluid.layers.slice( + data, axes=axes, starts=starts, ends=ends) + out = fluid.layers.batch_norm(slice_out, is_test=True) + + self.feeds = { + "data": np.random.random((3, 3, 3, 3)).astype("float32"), + } + # Diff occurred between GPU and TRT. + # In order to provide TRT CI ASAP, this test for trt part + # is disabled temporarily. + self.enable_trt = True + self.trt_parameters = SlicePluginTRTTest2.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +#exceeded bound starts && ends +class SlicePluginTRTTest3(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32") + axes = [2, 3] + starts = [-5, -2] + ends = [-1, 8] + slice_out = fluid.layers.slice( + data, axes=axes, starts=starts, ends=ends) + out = fluid.layers.batch_norm(slice_out, is_test=True) + + self.feeds = { + "data": np.random.random((3, 3, 3, 3)).astype("float32"), + } + # Diff occurred between GPU and TRT. + # In order to provide TRT CI ASAP, this test for trt part + # is disabled temporarily. + self.enable_trt = True + self.trt_parameters = SlicePluginTRTTest3.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +#fp16 +class SlicePluginTRTTest4(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[3, 3, 3, 3], dtype="float32") + axes = [2, 3] + starts = [-5, -2] + ends = [-1, 8] + slice_out = fluid.layers.slice( + data, axes=axes, starts=starts, ends=ends) + out = fluid.layers.batch_norm(slice_out, is_test=True) + + self.feeds = { + "data": np.random.random((3, 3, 3, 3)).astype("float32"), + } + # Diff occurred between GPU and TRT. + # In order to provide TRT CI ASAP, this test for trt part + # is disabled temporarily. + self.enable_trt = True + self.trt_parameters = SlicePluginTRTTest3.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index ab61a5b3cfccb0e885debe9786ae91a9754e9345..f6ba03194aa909279aa2cd884fc575041b01a4cd 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -128,7 +128,7 @@ class TestLogSigmoid(TestActivation): class TestLogSigmoidAPI(unittest.TestCase): - # test paddle.nn.LogSigmoid, paddle.nn.functional.logsigmoid + # test paddle.nn.LogSigmoid, paddle.nn.functional.log_sigmoid def setUp(self): self.x_np = np.random.uniform(-1, 1, [11, 17]).astype('float32') self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ @@ -137,36 +137,45 @@ class TestLogSigmoidAPI(unittest.TestCase): def test_static_api(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.data('X', [11, 17]) - out1 = F.logsigmoid(x) + out1 = F.log_sigmoid(x) m = paddle.nn.LogSigmoid() out2 = m(x) exe = paddle.static.Executor(self.place) res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) out_ref = np.log(1 / (1 + np.exp(-self.x_np))) for r in res: - self.assertEqual(np.allclose(out_ref, r), True) + self.assertTrue(np.allclose(out_ref, r)) def test_dygraph_api(self): paddle.disable_static(self.place) x = paddle.to_tensor(self.x_np) - out1 = F.logsigmoid(x) + out1 = F.log_sigmoid(x) m = paddle.nn.LogSigmoid() out2 = m(x) out_ref = np.log(1 / (1 + np.exp(-self.x_np))) for r in [out1, out2]: - self.assertEqual(np.allclose(out_ref, r.numpy()), True) + self.assertTrue(np.allclose(out_ref, r.numpy())) paddle.enable_static() + def test_fluid_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [11, 17]) + out = paddle.fluid.layers.logsigmoid(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = np.log(1 / (1 + np.exp(-self.x_np))) + self.assertTrue(np.allclose(out_ref, res[0])) + def test_errors(self): with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, F.logsigmoid, 1) + self.assertRaises(TypeError, F.log_sigmoid, 1) # The input dtype must be float16, float32, float64. x_int32 = paddle.data(name='x_int32', shape=[11, 17], dtype='int32') - self.assertRaises(TypeError, F.logsigmoid, x_int32) + self.assertRaises(TypeError, F.log_sigmoid, x_int32) # support the input dtype is float16 x_fp16 = paddle.data(name='x_fp16', shape=[11, 17], dtype='float16') - F.logsigmoid(x_fp16) + F.log_sigmoid(x_fp16) class TestTanh(TestActivation, TestParameter): diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_error.py b/python/paddle/fluid/tests/unittests/test_broadcast_error.py new file mode 100644 index 0000000000000000000000000000000000000000..517de67fd6dddf1d0a74df6ffed659720862b20c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_broadcast_error.py @@ -0,0 +1,38 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core + + +class TestBroadcastOpCpu(OpTest): + def setUp(self): + self.op_type = "broadcast" + input = np.random.random((100, 2)).astype("float32") + np_out = input[:] + self.inputs = {"X": input} + self.attrs = {"sync_mode": False, "root": 0} + self.outputs = {"Out": np_out} + + def test_check_output_cpu(self): + try: + self.check_output_with_place(place=core.CPUPlace()) + except: + print("do not support cpu test, skip") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py index ad75f2aa8bc06d2af24f5d22eb126a3558cd6f74..4c1b1e0f0bf9034263d2c36747cdae301da24215 100644 --- a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py +++ b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py @@ -21,8 +21,7 @@ from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver import os import sys -from paddle.fluid.incubate.fleet.utils.fs import LocalFS -from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py index 582bb3dcc681921cdbf2111dcd26b299f06a3058..208956b825ed1d78aeacf85fc052210e42d247ce 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_dataset.py @@ -38,26 +38,22 @@ class TestDataset(unittest.TestCase): def test_dataset_create(self): """ Testcase for dataset create. """ try: - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") + dataset = paddle.distributed.InMemoryDataset() except: self.assertTrue(False) try: - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "QueueDataset") + dataset = paddle.distributed.QueueDataset() except: self.assertTrue(False) try: - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "FileInstantDataset") + dataset = paddle.distributed.fleet.dataset.FileInstantDataset() except: self.assertTrue(False) try: - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "MyOwnDataset") + dataset = paddle.distributed.fleet.dataset.MyOwnDataset() self.assertTrue(False) except: self.assertTrue(True) @@ -95,18 +91,18 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset.update_settings(pipe_command="cat1") + dataset._init_distributed_settings( + parse_ins_id=True, + parse_content=True, + fea_eval=True, + candidate_size=10000) dataset.set_filelist( ["test_run_with_dump_a.txt", "test_run_with_dump_b.txt"]) - dataset.set_parse_ins_id(True) - dataset.set_parse_content(True) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() - dataset.set_fea_eval(10000, True) dataset.local_shuffle() exe = fluid.Executor(fluid.CPUPlace()) @@ -176,14 +172,14 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, + thread_num=3, + pipe_command="cat", + download_cmd="cat", + use_var=slots_vars) dataset.set_filelist([filename1, filename2]) - dataset.set_pipe_command("cat") - dataset.set_download_cmd("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) @@ -228,22 +224,19 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset._init_distributed_settings(fea_eval=True, candidate_size=1) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() - dataset.set_fea_eval(1, True) dataset.slots_shuffle(["slot1"]) dataset.local_shuffle() - dataset.set_generate_unique_feasigns(True, 15) - dataset.generate_local_tables_unlock(0, 11, 1, 25, 15) + dataset._set_generate_unique_feasigns(True, 15) + dataset._generate_local_tables_unlock(0, 11, 1, 25, 15) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) if self.use_data_loader: @@ -300,17 +293,14 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(1) - dataset.set_parse_ins_id(True) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars) + dataset._init_distributed_settings(parse_ins_id=True) dataset.set_filelist([ "test_in_memory_dataset_masterpatch_a.txt", "test_in_memory_dataset_masterpatch_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() dataset.local_shuffle() @@ -325,7 +315,8 @@ class TestDataset(unittest.TestCase): except Exception as e: self.assertTrue(False) - dataset.set_merge_by_lineid(2) + #dataset._set_merge_by_lineid(2) + dataset.update_settings(merge_size=2) dataset.dataset.merge_by_lineid() os.remove("./test_in_memory_dataset_masterpatch_a.txt") @@ -367,17 +358,14 @@ class TestDataset(unittest.TestCase): name="slot4", shape=[1], dtype="float32", lod_level=0) slots_vars = [var1, var2, var3, var4] - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(1) - dataset.set_parse_ins_id(True) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars) + dataset._init_distributed_settings(parse_ins_id=True) dataset.set_filelist([ "test_in_memory_dataset_masterpatch1_a.txt", "test_in_memory_dataset_masterpatch1_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() dataset.local_shuffle() @@ -392,7 +380,7 @@ class TestDataset(unittest.TestCase): except Exception as e: self.assertTrue(False) - dataset.set_merge_by_lineid(2) + dataset._set_merge_by_lineid(2) dataset.dataset.merge_by_lineid() os.remove("./test_in_memory_dataset_masterpatch1_a.txt") @@ -423,16 +411,13 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() dataset.local_shuffle() @@ -473,9 +458,9 @@ class TestDataset(unittest.TestCase): except Exception as e: self.assertTrue(False) - dataset.set_merge_by_lineid(2) - dataset.set_parse_ins_id(False) - dataset.set_fleet_send_sleep_seconds(2) + dataset._set_merge_by_lineid(2) + dataset._set_parse_ins_id(False) + dataset._set_fleet_send_sleep_seconds(2) dataset.preload_into_memory() dataset.wait_preload_done() dataset.release_memory() @@ -483,10 +468,25 @@ class TestDataset(unittest.TestCase): dataset.wait_preload_done() dataset.dataset.merge_by_lineid() dataset.release_memory() - dataset.set_merge_by_lineid(30) - dataset.set_parse_ins_id(False) + dataset._set_merge_by_lineid(30) + dataset._set_parse_ins_id(False) dataset.load_into_memory() dataset.dataset.merge_by_lineid() + dataset.update_settings( + batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=[], + fs_name="", + fs_ugi="", + download_cmd="cat", + merge_size=-1, + parse_ins_id=False, + parse_content=False, + fleet_send_batch_size=2, + fleet_send_sleep_seconds=2, + fea_eval=True) fleet_ptr = fluid.core.Fleet() fleet_ptr.set_client2client_config(1, 1, 1) fleet_ptr.get_cache_threshold(0) @@ -517,14 +517,11 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "QueueDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.QueueDataset() + dataset.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) @@ -543,12 +540,9 @@ class TestDataset(unittest.TestCase): except Exception as e: self.assertTrue(False) - dataset2 = paddle.distributed.fleet.DatasetFactory().create_dataset( - "QueueDataset") - dataset2.set_use_var(slots_vars) - dataset2.set_batch_size(32) - dataset2.set_thread(3) - dataset2.set_pipe_command("cat") + dataset2 = paddle.distributed.QueueDataset() + dataset2.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist([]) try: exe.train_from_dataset(fluid.default_main_program(), dataset2) @@ -585,14 +579,11 @@ class TestDataset(unittest.TestCase): name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "QueueDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.QueueDataset() + dataset.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0)) @@ -641,15 +632,15 @@ class TestDataset(unittest.TestCase): name=slot, shape=[None, 1], dtype="int64", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_input_type(1) - dataset.set_batch_size(1) - dataset.set_thread(2) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( @@ -721,13 +712,10 @@ class TestDatasetWithFetchHandler(unittest.TestCase): inputs(list): inputs of get_dataset files(list): files of get_dataset """ - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "QueueDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.QueueDataset() + dataset.init( + batch_size=32, thread_num=3, pipe_command="cat", use_var=inputs) dataset.set_filelist(files) - dataset.set_pipe_command("cat") - dataset.set_use_var(inputs) return dataset def setUp(self): @@ -879,16 +867,17 @@ class TestDataset2(unittest.TestCase): except ImportError as e: print("warning: no mpi4py") exe.run(startup_program) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + + dataset.init( + batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run_a.txt", "test_in_memory_dataset2_run_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() fleet._opt_info = None fleet._fleet_ptr = None @@ -949,16 +938,16 @@ class TestDataset2(unittest.TestCase): except ImportError as e: print("warning: no mpi4py") exe.run(startup_program) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + dataset.init( + batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run2_a.txt", "test_in_memory_dataset2_run2_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) dataset.load_into_memory() try: dataset.global_shuffle(fleet) @@ -966,14 +955,11 @@ class TestDataset2(unittest.TestCase): print("warning: catch expected error") fleet._opt_info = None fleet._fleet_ptr = None - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_rank_offset("") - dataset.set_pv_batch_size(1) - dataset.set_hdfs_config("", "") + dataset = paddle.distributed.InMemoryDataset() + dataset.init(fs_name="", fs_ugi="") d = paddle.distributed.fleet.DatasetBase() try: - dataset.set_feed_type("MultiSlotInMemoryDataFeed") + dataset._set_feed_type("MultiSlotInMemoryDataFeed") except: print("warning: catch expected error") dataset.thread_num = 0 @@ -981,9 +967,6 @@ class TestDataset2(unittest.TestCase): dataset._prepare_to_run() except: print("warning: catch expected error") - dataset.set_parse_logkey(True) - dataset.set_merge_by_sid(True) - dataset.set_enable_pv_merge(True) try: dataset.preprocess_instance() except: @@ -996,16 +979,15 @@ class TestDataset2(unittest.TestCase): dataset.postprocess_instance() except: print("warning: catch expected error") - dataset.set_fleet_send_batch_size(1024) + dataset._set_fleet_send_batch_size(1024) try: dataset.global_shuffle() except: print("warning: catch expected error") - dataset.get_pv_data_size() + #dataset.get_pv_data_size() dataset.get_memory_data_size() dataset.get_shuffle_data_size() - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "QueueDataset") + dataset = paddle.distributed.QueueDataset() try: dataset.local_shuffle() except: @@ -1027,6 +1009,120 @@ class TestDataset2(unittest.TestCase): os.remove("./test_in_memory_dataset2_run2_a.txt") os.remove("./test_in_memory_dataset2_run2_b.txt") + def test_bosps_dataset_fleet2(self): + """ + Testcase for InMemoryDataset from create to run. + """ + with open("test_in_memory_dataset2_run2_a.txt", "w") as f: + data = "1 1 2 3 3 4 5 5 5 5 1 1\n" + data += "1 2 2 3 4 4 6 6 6 6 1 2\n" + data += "1 3 2 3 5 4 7 7 7 7 1 3\n" + f.write(data) + with open("test_in_memory_dataset2_run2_b.txt", "w") as f: + data = "1 4 2 3 3 4 5 5 5 5 1 4\n" + data += "1 5 2 3 4 4 6 6 6 6 1 5\n" + data += "1 6 2 3 5 4 7 7 7 7 1 6\n" + data += "1 7 2 3 6 4 8 8 8 8 1 7\n" + f.write(data) + + train_program = fluid.Program() + startup_program = fluid.Program() + scope = fluid.Scope() + from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + with fluid.program_guard(train_program, startup_program): + slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"] + slots_vars = [] + for slot in slots: + var = fluid.layers.data(\ + name=slot, shape=[1], dtype="float32", lod_level=1) + slots_vars.append(var) + fake_cost = \ + fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1]) + fake_cost = fluid.layers.mean(fake_cost) + with fluid.scope_guard(scope): + place = fluid.CPUPlace() + exe = fluid.Executor(place) + try: + fleet.init() + except ImportError as e: + print("warning: no mpi4py") + adam = fluid.optimizer.Adam(learning_rate=0.000005) + try: + adam = fleet.distributed_optimizer( + adam, + strategy={ + "fs_uri": "fs_uri_xxx", + "fs_user": "fs_user_xxx", + "fs_passwd": "fs_passwd_xxx", + "fs_hadoop_bin": "fs_hadoop_bin_xxx" + }) + adam.minimize([fake_cost], [scope]) + except AttributeError as e: + print("warning: no mpi") + except ImportError as e: + print("warning: no mpi4py") + exe.run(startup_program) + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset.init( + batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) + dataset.set_filelist([ + "test_in_memory_dataset2_run2_a.txt", + "test_in_memory_dataset2_run2_b.txt" + ]) + dataset.load_into_memory() + try: + dataset.global_shuffle(fleet) + except: + print("warning: catch expected error") + fleet._opt_info = None + fleet._fleet_ptr = None + dataset = paddle.distributed.fleet.BoxPSDataset() + dataset.init( + rank_offset="", + pv_batch_size=1, + fs_name="", + fs_ugi="", + data_feed_type="MultiSlotInMemoryDataFeed", + parse_logkey=True, + merge_by_sid=True, + enable_pv_merge=True) + d = paddle.distributed.fleet.DatasetBase() + try: + dataset._set_feed_type("MultiSlotInMemoryDataFeed") + except: + print("warning: catch expected error") + dataset.thread_num = 0 + try: + dataset._prepare_to_run() + except: + print("warning: catch expected error") + dataset._set_parse_logkey(True) + dataset._set_merge_by_sid(True) + dataset._set_enable_pv_merge(True) + try: + dataset.preprocess_instance() + except: + print("warning: catch expected error") + try: + dataset.set_current_phase(1) + except: + print("warning: catch expected error") + try: + dataset.postprocess_instance() + except: + print("warning: catch expected error") + dataset._set_fleet_send_batch_size(1024) + try: + dataset.global_shuffle() + except: + print("warning: catch expected error") + #dataset.get_pv_data_size() + dataset.get_memory_data_size() + dataset.get_shuffle_data_size() + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py index c13c33f209f0f7d0fff95bdfb5b4e551a145b87e..9195ac277b93ade31b50682a4c3553c3664093f3 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py @@ -97,9 +97,11 @@ class DatasetLoaderTestBase(unittest.TestCase): def check_batch_number(self, place, randomize_batch_num=False): main_prog, startup_prog, feeds = self.build_network() - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - self.dataset_name) - dataset.set_batch_size(BATCH_SIZE) + if self.dataset_name == "QueueDataset": + dataset = paddle.distributed.QueueDataset() + else: + dataset = paddle.distributed.InMemoryDataset() + dataset._set_batch_size(BATCH_SIZE) if isinstance(place, fluid.CPUPlace): file_num = 10 @@ -128,8 +130,8 @@ class DatasetLoaderTestBase(unittest.TestCase): fake_reader(batch_num=BATCH_NUM + random_delta_batch_size[i])) dataset.set_filelist(filelist) - dataset.set_use_var(feeds) - dataset.set_pipe_command("cat") + dataset._set_use_var(feeds) + dataset._set_pipe_command("cat") if self.dataset_name == 'InMemoryDataset': dataset.load_into_memory() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py b/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py index 66baf8faac51ebd19689a5b22b87f3a454842fac..fc57602b445ddd6aa615b47ecce6bf993703d858 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py @@ -21,8 +21,7 @@ from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver import os import sys -from paddle.fluid.incubate.fleet.utils.fs import LocalFS -from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver diff --git a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py index 6f8af3017efcb9010b129131a01c5ee071b5bc36..b20f33e11b656f1296510df653309a3569d45043 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py @@ -86,6 +86,13 @@ class TestStrategyConfig(unittest.TestCase): self.assertEqual(strategy.localsgd_configs["k_steps"], 4) self.assertEqual(strategy.localsgd_configs["begin_step"], 120) + def test_adaptive_localsgd_configs(self): + strategy = paddle.distributed.fleet.DistributedStrategy() + configs = {"init_k_steps": 1, "begin_step": 120} + strategy.adaptive_localsgd_configs = configs + self.assertEqual(strategy.adaptive_localsgd_configs["init_k_steps"], 1) + self.assertEqual(strategy.adaptive_localsgd_configs["begin_step"], 120) + def test_dgc(self): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.dgc = True diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py index ff305fb95231b96b6d8f951b2943a0ab47060ce0..ec055178d90c529080489218f3aca1a71311beea 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py @@ -141,7 +141,7 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): ops = [op.type for op in avg_cost.block.ops] self.assertIn('lamb', ops) self.assertIn('cast', ops) - self.assertIn('isfinite', ops) + self.assertIn('check_finite_and_unscale', ops) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py index 34ab423e064eebb9c93010fbc869adedb42bd6fa..0a70710b4590e253463640634615c2d11ff36e9f 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py @@ -145,7 +145,7 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): ops = [op.type for op in avg_cost.block.ops] self.assertIn('lars_momentum', ops) self.assertIn('cast', ops) - self.assertIn('isfinite', ops) + self.assertIn('check_finite_and_unscale', ops) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_fleet_launch.sh b/python/paddle/fluid/tests/unittests/test_fleet_launch.sh index c5edc96963408bf1fad793f7271d75159934f019..e717962ead2e2da30092b12379bf36f368e8a735 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_launch.sh +++ b/python/paddle/fluid/tests/unittests/test_fleet_launch.sh @@ -79,9 +79,9 @@ if [ -f $file_1 ]; then rm $file_1 fi - +# test use DISTRIBUTED_TRAINER_ENDPOINTS env in paddlecloud unset PADDLE_PORT -unset TRAINER_PORTS_NUM +export DISTRIBUTED_TRAINER_ENDPOINTS=127.0.0.1:6170,127.0.0.1:6171,127.0.0.2:6170,127.0.0.2:6171 echo "" echo "paddle.distributed.launch async poll process test" diff --git a/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py index 945f5ae57454b2c4a509badb93574a6e03b607e8..f5347b0c665e2a162f7f8210171ec415afee4599 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py @@ -52,5 +52,36 @@ class TestFleetLocalSGDMetaOptimizer(unittest.TestCase): optimizer.minimize(avg_cost) +class TestFleetAdaptiveLocalSGDMetaOptimizer(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_TRAINER_ID"] = "1" + os.environ[ + "PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001,127.0.0.1:36002" + + def test_adaptive_localsgd_optimizer(self): + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') + + fc = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc], size=2, act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.adaptive_localsgd = True + config = strategy.adaptive_localsgd_configs + config['init_k_steps'] = 1 + config['begin_step'] = 1 + strategy.adaptive_localsgd_configs = config + + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py index eb5d9eb66608dd397dad773158c337fc67be2dbb..a831f6e838e950f9955c762544c312ed2d8766a9 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py @@ -163,10 +163,9 @@ class TestCloudRoleMaker2(unittest.TestCase): data = "1 1 1 1\n" f.write(data) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") + dataset = paddle.distributed.InMemoryDataset() dataset.set_filelist(["test_fleet_gloo_role_maker_1.txt"]) - dataset.set_use_var([show, label]) + dataset._set_use_var([show, label]) dataset.load_into_memory() dataset.get_memory_data_size(fleet) dataset.get_shuffle_data_size(fleet) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py b/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py deleted file mode 100644 index 171687283bc5db709501ae33d131470582f4d106..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import unittest -import paddle.fluid as fluid -import numpy as np - - -class MLP(fluid.Layer): - def __init__(self, input_size): - super(MLP, self).__init__() - self._linear1 = fluid.dygraph.Linear( - input_size, - 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) - self._linear2 = fluid.dygraph.Linear( - 3, - 4, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) - - def forward(self, inputs): - x = self._linear1(inputs) - x = self._linear2(x) - x = fluid.layers.reduce_sum(x) - return x - - -class TestDygraphDebugString(unittest.TestCase): - def test_dygraph_debug_string(self): - np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) - unique_name = 0 - trace_var = 0 - alive_var = 0 - with fluid.dygraph.guard(): - mlp = MLP(input_size=2) - for i in range(10): - var_inp = fluid.dygraph.base.to_variable(np_inp) - out = mlp(var_inp) - out.backward() - mlp.clear_gradients() - unique_name_tmp, trace_var_tmp, alive_var_tmp = fluid.dygraph.base._print_debug_msg( - mlp.parameters(), is_test=True) - if i > 0: - self.assertGreaterEqual(unique_name, unique_name_tmp) - self.assertGreaterEqual(trace_var, trace_var_tmp) - self.assertGreaterEqual(alive_var, alive_var_tmp) - else: - unique_name = unique_name_tmp - trace_var = trace_var_tmp - alive_var = alive_var_tmp - try: - fluid.dygraph.base._print_debug_msg(mlp.parameters()) - except Exception as e: - raise RuntimeError( - "No Exception is accepted in _print_debug_msg, but we got: {}". - format(e)) diff --git a/python/paddle/fluid/tests/unittests/test_launch.sh b/python/paddle/fluid/tests/unittests/test_launch.sh index 98c907a551965331f79d1635362213b43d867002..958d78246627d4cd2f826f74aeccff5ffe254034 100644 --- a/python/paddle/fluid/tests/unittests/test_launch.sh +++ b/python/paddle/fluid/tests/unittests/test_launch.sh @@ -48,9 +48,9 @@ if [ -f $file_1 ]; then rm $file_1 fi - +# test use DISTRIBUTED_TRAINER_ENDPOINTS env in paddlecloud unset PADDLE_PORT -unset TRAINER_PORTS_NUM +export DISTRIBUTED_TRAINER_ENDPOINTS=127.0.0.1:6170,127.0.0.1:6171,127.0.0.2:6170,127.0.0.2:6171 echo "" echo "paddle.distributed.launch async poll process test" diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 89e9f7aad8581228411b1983580ced5566e65765..26073f49bdd3d494da7b39346c5bafb2aefba56a 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2677,13 +2677,6 @@ class TestBook(LayerTest): out = layers.sigmoid(input, name='sigmoid') return (out) - def make_logsigmoid(self): - with program_guard(fluid.default_main_program(), - fluid.default_startup_program()): - input = self._get_data(name="input", shape=[16], dtype="float32") - out = layers.logsigmoid(input, name='logsigmoid') - return (out) - def make_exp(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py index ed1939dbe279f28883d9e33178f1cfa256140e33..a1a9b3f444fa411f90e869f5265fa0933393ff56 100644 --- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -64,7 +64,7 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.batch_size = 128 self.batch_num = 10 - def train_and_save_model(self): + def train_and_save_model(self, only_params=False): with new_program_scope(): startup_program = fluid.default_startup_program() main_program = fluid.default_main_program() @@ -102,11 +102,15 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): static_param_dict[param.name] = fluid.executor._fetch_var( param.name) - fluid.io.save_inference_model( - self.save_dirname, ["img"], [prediction], - exe, - model_filename=self.model_filename, - params_filename=self.params_filename) + if only_params: + fluid.io.save_params( + exe, self.save_dirname, filename=self.params_filename) + else: + fluid.io.save_inference_model( + self.save_dirname, ["img"], [prediction], + exe, + model_filename=self.model_filename, + params_filename=self.params_filename) return static_param_dict @@ -120,9 +124,7 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = None orig_param_dict = self.train_and_save_model() - configs = paddle.SaveLoadConfig() - configs.separate_params = True - load_param_dict, _ = paddle.load(self.save_dirname, configs) + load_param_dict, _ = paddle.load(self.save_dirname) self.check_load_state_dict(orig_param_dict, load_param_dict) def test_load_with_model_filename(self): @@ -160,6 +162,14 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): load_param_dict, _ = paddle.load(self.save_dirname, configs) self.check_load_state_dict(orig_param_dict, load_param_dict) + def test_load_state_dict_from_save_params(self): + self.save_dirname = "static_mnist.load_state_dict.save_params" + self.params_filename = None + orig_param_dict = self.train_and_save_model(True) + + load_param_dict, _ = paddle.load(self.save_dirname) + self.check_load_state_dict(orig_param_dict, load_param_dict) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py index 1f3dab67f2afe4e2b0a655634bb808ad0951ae9e..29a0fa55f7729bc39b2e9202397563a5cb10747c 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py @@ -400,7 +400,8 @@ class TestCUDNNLstmOp(OpTest): 'Input': input, 'W': flat_w, 'InitH': init_h, - 'InitC': init_c + 'InitC': init_c, + 'SequenceLength': self.sequence_length } self.attrs = { 'dropout_prob': 0.0, @@ -408,7 +409,6 @@ class TestCUDNNLstmOp(OpTest): 'input_size': input_size, 'hidden_size': hidden_size, 'num_layers': 1, - 'sequence_length': self.sequence_length.tolist() } self.outputs = { 'Out': output, @@ -436,13 +436,6 @@ class TestCUDNNLstmOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNLstmOp2(TestCUDNNLstmOp): - def set_attrs(self): - self.sequence_length = np.array([], dtype=np.int32) - - -@unittest.skipIf(not core.is_compiled_with_cuda(), - "core is not compiled with CUDA") -class TestCUDNNLstmOp3(TestCUDNNLstmOp): def set_attrs(self): self.num_layers = 2 diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index 9bb12d546550a821e8a133dd9c91d5d41a50b1b2..a70862f40197c513a0cd04753553264708ee2a1c 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -307,7 +307,7 @@ class TestMathOpPatchesVarBase(unittest.TestCase): np.array_equal(x.sigmoid().numpy(), fluid.layers.sigmoid(x).numpy( ))) self.assertTrue( - np.array_equal(x.logsigmoid().numpy(), + np.array_equal(x.log_sigmoid().numpy(), fluid.layers.logsigmoid(x).numpy())) self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy())) self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/test_monitor.py b/python/paddle/fluid/tests/unittests/test_monitor.py index f6207edb41c190ac51dfe67dad22bb0191a67a07..cf273876b1f2f8a9b4828375ca6e20e591feb306 100644 --- a/python/paddle/fluid/tests/unittests/test_monitor.py +++ b/python/paddle/fluid/tests/unittests/test_monitor.py @@ -52,18 +52,17 @@ class TestDatasetWithStat(unittest.TestCase): name=slot, shape=[1], dtype="int64", lod_level=1) slots_vars.append(var) - dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( - "InMemoryDataset") - dataset.set_batch_size(32) - dataset.set_thread(3) + dataset = paddle.distributed.InMemoryDataset() + dataset._set_batch_size(32) + dataset._set_thread(3) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_b.txt" ]) - dataset.set_pipe_command("cat") - dataset.set_use_var(slots_vars) + dataset._set_pipe_command("cat") + dataset._set_use_var(slots_vars) dataset.load_into_memory() - dataset.set_fea_eval(1, True) + dataset._set_fea_eval(1, True) dataset.slots_shuffle(["slot1"]) exe = fluid.Executor(fluid.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_regularizer_api.py b/python/paddle/fluid/tests/unittests/test_regularizer_api.py new file mode 100644 index 0000000000000000000000000000000000000000..76186d2e39feafe772fce6cc7f9099e97d833232 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_regularizer_api.py @@ -0,0 +1,204 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from functools import partial +import contextlib +import numpy as np +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid +import paddle.fluid.framework as framework +import paddle.fluid.optimizer as optimizer +import paddle.regularizer as regularizer +from paddle.fluid.backward import append_backward + + +def bow_net(data, + label, + dict_dim, + is_sparse=False, + emb_dim=8, + hid_dim=8, + hid_dim2=6, + class_dim=2): + """ + BOW net + This model is from https://github.com/PaddlePaddle/models: + fluid/PaddleNLP/text_classification/nets.py + """ + emb = fluid.layers.embedding( + input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') + bow_tanh = fluid.layers.tanh(bow) + fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") + fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") + prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + + return avg_cost + + +class TestRegularizer(unittest.TestCase): + def setUp(self): + self.word_dict = paddle.dataset.imdb.word_dict() + reader = paddle.batch( + paddle.dataset.imdb.train(self.word_dict), batch_size=1)() + self.train_data = [next(reader) for _ in range(1)] + + def get_places(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + return places + + @contextlib.contextmanager + def scope_prog_guard(self, main_prog, startup_prog): + scope = fluid.core.Scope() + with fluid.unique_name.guard(): + with fluid.scope_guard(scope): + with fluid.program_guard(main_prog, startup_prog): + yield + + def run_program(self, place, feed_list): + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(feed_list=feed_list, place=place) + exe.run(fluid.default_startup_program()) + + main_prog = fluid.default_main_program() + param_list = [var.name for var in main_prog.block(0).all_parameters()] + + param_sum = [] + for data in self.train_data: + out = exe.run(main_prog, + feed=feeder.feed(data), + fetch_list=param_list) + p_sum = 0 + for v in out: + p_sum += np.sum(np.abs(v)) + param_sum.append(p_sum) + return param_sum + + def check_l2decay_regularizer(self, place, model): + paddle.manual_seed(1) + paddle.framework.random._manual_program_seed(1) + main_prog = fluid.framework.Program() + startup_prog = fluid.framework.Program() + with self.scope_prog_guard( + main_prog=main_prog, startup_prog=startup_prog): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + + avg_cost = model(data, label, len(self.word_dict)) + + optimizer = fluid.optimizer.Adagrad( + learning_rate=0.1, + regularization=paddle.regularizer.L2Decay(1.0)) + optimizer.minimize(avg_cost) + param_sum = self.run_program(place, [data, label]) + return param_sum + + def check_l2decay(self, place, model): + paddle.manual_seed(1) + paddle.framework.random._manual_program_seed(1) + main_prog = fluid.framework.Program() + startup_prog = fluid.framework.Program() + + with self.scope_prog_guard( + main_prog=main_prog, startup_prog=startup_prog): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + + avg_cost_l2 = model(data, label, len(self.word_dict)) + + param_list = fluid.default_main_program().block(0).all_parameters() + para_sum = [] + for para in param_list: + para_mul = fluid.layers.square(x=para) + para_sum.append(fluid.layers.reduce_sum(input=para_mul)) + avg_cost_l2 += fluid.layers.sums(para_sum) * .5 + + optimizer = fluid.optimizer.Adagrad(learning_rate=0.1) + optimizer.minimize(avg_cost_l2) + param_sum = self.run_program(place, [data, label]) + return param_sum + + def test_l2(self): + for place in self.get_places(): + dense_sparse_p_sum = [] + for sparse in [True, False]: + model = partial(bow_net, is_sparse=sparse) + framework_l2 = self.check_l2decay_regularizer(place, model) + l2 = self.check_l2decay(place, model) + assert len(l2) == len(framework_l2) + for i in range(len(l2)): + assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5) + dense_sparse_p_sum.append(framework_l2) + + assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1]) + for i in range(len(dense_sparse_p_sum[0])): + assert np.isclose( + a=dense_sparse_p_sum[0][i], + b=dense_sparse_p_sum[1][i], + rtol=5e-5) + + def test_repeated_regularization(self): + l1 = paddle.regularizer.L1Decay(0.1) + l2 = paddle.regularizer.L2Decay(0.01) + fc_param_attr = fluid.ParamAttr(regularizer=l1) + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.layers.uniform_random([2, 2, 3]) + out = fluid.layers.fc(x, 5, param_attr=fc_param_attr) + loss = fluid.layers.reduce_sum(out) + sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2) + sgd.minimize(loss) + with fluid.dygraph.guard(): + input = fluid.dygraph.to_variable( + np.random.randn(3, 2).astype('float32')) + paddle.manual_seed(1) + paddle.framework.random._manual_program_seed(1) + + linear1 = fluid.dygraph.Linear( + 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + linear2 = fluid.dygraph.Linear( + 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + + loss1 = linear1(input) + loss1.backward() + # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr + + fluid.optimizer.SGD(parameter_list=linear1.parameters(), + learning_rate=1e-2, + regularization=l2).minimize(loss1) + # only set l1 in fluid.ParamAttr + loss2 = linear2(input) + loss2.backward() + fluid.optimizer.SGD(parameter_list=linear2.parameters(), + learning_rate=1e-2).minimize(loss2) + # they should both be applied by l1, and keep the same + self.assertTrue( + np.allclose(linear1.weight.numpy(), linear2.weight.numpy()), + "weight should use the regularization in fluid.ParamAttr!") + self.assertTrue( + np.allclose(linear1.bias.numpy(), linear2.bias.numpy()), + "bias should use the regularization in fluid.ParamAttr!") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_translated_layer.py b/python/paddle/fluid/tests/unittests/test_translated_layer.py index 20c51b9afbafac9ba1fa032aea446383bc2b9796..e5dc279750d3d9605aeba1d27dbb84a35cf31921 100644 --- a/python/paddle/fluid/tests/unittests/test_translated_layer.py +++ b/python/paddle/fluid/tests/unittests/test_translated_layer.py @@ -49,7 +49,10 @@ class LinearNet(nn.Layer): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) - @paddle.jit.to_static + @paddle.jit.to_static(input_spec=[ + paddle.static.InputSpec( + shape=[None, IMAGE_SIZE], dtype='float32', name='x') + ]) def forward(self, x): return self._linear(x) @@ -152,6 +155,34 @@ class TestTranslatedLayer(unittest.TestCase): with self.assertRaises(ValueError): program = translated_layer.program('not_exists') + def test_get_input_spec(self): + # load + translated_layer = paddle.jit.load(self.model_path) + + expect_spec = [ + paddle.static.InputSpec( + shape=[None, IMAGE_SIZE], dtype='float32', name='x') + ] + actual_spec = translated_layer._input_spec() + + for spec_x, spec_y in zip(expect_spec, actual_spec): + self.assertEqual(spec_x, spec_y) + + def test_get_output_spec(self): + # load + translated_layer = paddle.jit.load(self.model_path) + + expect_spec = [ + paddle.static.InputSpec( + shape=[None, CLASS_NUM], + dtype='float32', + name='translated_layer/scale_0.tmp_1') + ] + actual_spec = translated_layer._output_spec() + + for spec_x, spec_y in zip(expect_spec, actual_spec): + self.assertEqual(spec_x, spec_y) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py b/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py index 227e6cc28fb4a6d05d73cbf2c3c92bda623b7d58..e19641e710dda6cd2614a75a3ca4b2f7ec1c0b58 100644 --- a/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py @@ -26,4 +26,5 @@ NEED_TO_FIX_OP_LIST = [ 'squared_l2_distance', 'tree_conv', 'cvm', + 'cudnn_lstm', ] diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index f3cc8c610ff4da16b6333931913396d84cc05981..163c249ab37457d7d4566553c71e3231f384a8b1 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -39,7 +39,7 @@ from .activation import hard_sigmoid #DEFINE_ALIAS from .activation import hard_swish #DEFINE_ALIAS from .activation import hsigmoid #DEFINE_ALIAS from .activation import leaky_relu #DEFINE_ALIAS -from .activation import logsigmoid #DEFINE_ALIAS +from .activation import log_sigmoid #DEFINE_ALIAS from .activation import maxout #DEFINE_ALIAS from .activation import prelu #DEFINE_ALIAS from .activation import relu #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index ffedb027330bda94db86dc0943a5c4a7281f254f..f7bbe0c94e03dc48ebfb21a62aeded9f446afc63 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -35,7 +35,7 @@ __all__ = [ 'hard_swish', 'hsigmoid', 'leaky_relu', - 'logsigmoid', + 'log_sigmoid', 'maxout', 'prelu', 'relu', @@ -552,13 +552,13 @@ def relu(x, name=None): return out -def logsigmoid(x, name=None): +def log_sigmoid(x, name=None): """ - logsigmoid activation. + log_sigmoid activation. .. math:: - logsigmoid(x) = log \\frac{1}{1 + e^{-x}} + log\\_sigmoid(x) = log \\frac{1}{1 + e^{-x}} Parameters: x (Tensor): The input Tensor with data type float32, float64. @@ -573,20 +573,19 @@ def logsigmoid(x, name=None): import paddle import paddle.nn.functional as F - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0])) - out = F.logsigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] + x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) + out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] """ if in_dygraph_mode(): return core.ops.logsigmoid(x) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], - 'logsigmoid') - helper = LayerHelper("logsigmoid", **locals()) + 'log_sigmoid') + helper = LayerHelper("log_sigmoid", **locals()) out = helper.create_variable_for_type_inference(x.dtype) helper.append_op(type='logsigmoid', inputs={'X': x}, outputs={'Out': out}) return out diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 3c1482e69c3c36232ee5d70f2156a8d16c2d212a..5cf4953933242292c6a732513dbee2164811dd35 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -267,8 +267,8 @@ def conv1d(x, dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] l_type = "conv2d" - if (num_channels == groups and num_filters % num_channels == 0 and - not use_cudnn): + if (num_channels == groups and num_channels != 1 and + num_filters % num_channels == 0 and not use_cudnn): l_type = 'depthwise_conv2d' use_cudnn = False @@ -491,7 +491,8 @@ def conv2d(x, dilation = utils.convert_to_list(dilation, 2, 'dilation') l_type = "conv2d" - if (num_channels == groups and num_filters % num_channels == 0): + if (num_channels == groups and num_channels != 1 and + num_filters % num_channels == 0): l_type = 'depthwise_conv2d' use_cudnn = False @@ -761,7 +762,8 @@ def conv_transpose1d(x, op_type = 'conv2d_transpose' num_filters = weight.shape[1] - if (num_channels == groups and num_filters == 1 and not use_cudnn): + if (num_channels == groups and num_channels != 1 and num_filters == 1 and + not use_cudnn): op_type = 'depthwise_conv2d_transpose' use_cudnn = False @@ -1010,7 +1012,7 @@ def conv_transpose2d(x, op_type = 'conv2d_transpose' num_filters = weight.shape[1] - if (num_channels == groups and num_filters == 1): + if (num_channels == groups and num_channels != 1 and num_filters == 1): op_type = 'depthwise_conv2d_transpose' use_cudnn = False diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index c38d6018a2500111280a482aa60d072e65e27742..585d369c607e5b6eb6a2a3bcb28bd8999a2e0dca 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -860,11 +860,10 @@ class LogSigmoid(layers.Layer): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0])) + x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) m = paddle.nn.LogSigmoid() out = m(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] """ @@ -874,7 +873,7 @@ class LogSigmoid(layers.Layer): self._name = name def forward(self, x): - return F.logsigmoid(x, self._name) + return F.log_sigmoid(x, self._name) class Softmax(layers.Layer): diff --git a/python/paddle/regularizer.py b/python/paddle/regularizer.py index 2b20bb41970f0b1bd829585cd3767c6c08421f1e..b3f483fd89197c9bd0a447b4272e958824331942 100644 --- a/python/paddle/regularizer.py +++ b/python/paddle/regularizer.py @@ -12,8 +12,134 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define the regularizer functions -# __all__ = ['L1Decay', -# 'L1DecayRegularizer', -# 'L2Decay', -# 'L2DecayRegularizer'] +__all__ = ['L1Decay', 'L2Decay'] + +import paddle.fluid as fluid + + +class L1Decay(fluid.regularizer.L1Decay): + """ + Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. + + It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ). + When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in + ``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has + higher priority than ``optimizer`` , which means that for a trainable parameter, if regularizer is defined + in its ParamAttr, then the regularizer in Optimizer will be ignored. Otherwise the regularizer + in Optimizer will be used. + + In the implementation, the formula of L1 Weight Decay Regularization is as follows: + + .. math:: + + L1WeightDecay = reg\_coeff * sign(parameter) + + Args: + coeff(float, optional): regularization coeff. Default:0.0. + + Examples: + .. code-block:: python + + # Example1: set Regularizer in optimizer + import paddle + from paddle.regularizer import L1Decay + import numpy as np + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + momentum = paddle.optimizer.Momentum( + learning_rate=0.1, + parameters=linear.parameters(), + weight_decay=L1Decay(0.0001)) + back = out.backward() + momentum.step() + momentum.clear_grad() + + # Example2: set Regularizer in parameters + # Set L1 regularization in parameters. + # Global regularizer does not take effect on my_conv2d for this case. + from paddle.nn import Conv2d + from paddle import ParamAttr + from paddle.regularizer import L2Decay + + my_conv2d = Conv2d( + in_channels=10, + out_channels=10, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(regularizer=L2Decay(coeff=0.01)), + bias_attr=False) + """ + + def __init__(self, coeff=0.0): + super(L1Decay, self).__init__(coeff) + + +class L2Decay(fluid.regularizer.L2Decay): + """ + Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. + + It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ). + When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in + ``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has + higher priority than ``optimizer`` , which means that for a trainable parameter, if regularizer is defined + in its ParamAttr, then the regularizer in Optimizer will be ignored. Otherwise the regularizer + in Optimizer will be used. + + In the implementation, the formula of L2 Weight Decay Regularization is as follows: + + .. math:: + + L2WeightDecay = reg\_coeff * parameter + + Args: + regularization_coeff(float, optional): regularization coeff. Default:0.0 + + Examples: + .. code-block:: python + + # Example1: set Regularizer in optimizer + import paddle + from paddle.regularizer import L2Decay + import numpy as np + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + momentum = paddle.optimizer.Momentum( + learning_rate=0.1, + parameters=linear.parameters(), + weight_decay=L2Decay(0.0001)) + back = out.backward() + momentum.step() + momentum.clear_grad() + + # Example2: set Regularizer in parameters + # Set L2 regularization in parameters. + # Global regularizer does not take effect on my_conv2d for this case. + from paddle.nn import Conv2d + from paddle import ParamAttr + from paddle.regularizer import L2Decay + + my_conv2d = Conv2d( + in_channels=10, + out_channels=10, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(regularizer=L2Decay(coeff=0.01)), + bias_attr=False) + """ + + def __init__(self, coeff=0.0): + super(L2Decay, self).__init__(coeff) diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index db1222fa421ef61e6f68f0d69ad0fe7f5d80f6d5..9de407841fb461713d00f997afdf33a38a531245 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -21,6 +21,7 @@ from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_t from ..fluid.layers.tensor import fill_constant from ..fluid.layers import utils import numpy as np +import six # TODO: define functions to manipulate a tensor from ..fluid.layers import cast #DEFINE_ALIAS from ..fluid.layers import slice #DEFINE_ALIAS @@ -1056,10 +1057,25 @@ def tile(x, repeat_times, name=None): """ if in_dygraph_mode(): return core.ops.tile(x, 'repeat_times', repeat_times) + check_type(repeat_times, 'repeat_times', (list, tuple, Variable), 'tile') + if isinstance(repeat_times, Variable): + assert len(repeat_times.shape) == 1, ( + 'repeat_times must be an 1-D Tensor.') + else: + for elem in repeat_times: + if isinstance(elem, Variable): + assert len(elem.shape) == 1, ( + 'Elements in repeat_times must be 1-D Tensors or integers.') + else: + if six.PY3: + type_tuple = (int, np.int32, np.int64) + elif six.PY2: + type_tuple = (int, long, np.int32, np.int64) + assert isinstance(elem, type_tuple), ( + 'Elements in repeat_times must be 1-D Tensors or integers.') check_variable_and_dtype( x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'tile') - check_type(repeat_times, 'repeat_times', (list, tuple, Variable), 'tile') if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: raise ValueError( "When the date type is bool for the input 'x' of tile op, you " @@ -1181,18 +1197,33 @@ def expand(x, shape, name=None): if in_dygraph_mode(): return core.ops.expand_v2(x, 'shape', shape) + if isinstance(shape, Variable): + assert len(shape.shape) == 1, ('shape must be an 1-D Tensor.') + else: + for elem in shape: + if isinstance(elem, Variable): + assert len(elem.shape) == 1, ( + 'Elements in shape must be 1-D Tensors or integers.') + else: + if six.PY3: + type_tuple = (int, np.int32, np.int64) + elif six.PY2: + type_tuple = (int, long, np.int32, np.int64) + assert isinstance(elem, type_tuple), ( + 'Elements in shape must be 1-D Tensors or integers.') + check_variable_and_dtype( x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'expand') check_type(shape, 'shape', (list, tuple, Variable), 'expand') - - inputs = {"X": [x]} - attrs = {} if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: raise ValueError("When the data type of input 'x' for expand is bool, " "you must set its stop_gradient to be False by " "some_var.stop_gradient = True, supporting " "some_var as the input.") + inputs = {"X": [x]} + attrs = {} + helper = LayerHelper('expand', **locals()) def get_attr_expand_shape(list_expand_shape): diff --git a/python/paddle/tests/test_dist_hapi_model.py b/python/paddle/tests/test_dist_hapi_model.py index e75e08e3749e6ce629e88c486e4f87d9109dc709..db5b63c5ae0e29fa6f1274befd277c4e46c3a1b1 100644 --- a/python/paddle/tests/test_dist_hapi_model.py +++ b/python/paddle/tests/test_dist_hapi_model.py @@ -37,7 +37,11 @@ def get_cluster_from_args(selected_gpus): free_ports = find_free_ports(len(selected_gpus)) if free_ports is not None: free_ports = list(free_ports) - return get_cluster(node_ips, node_ip, free_ports, selected_gpus) + + trainer_endpoints = [] + for ip in node_ips: + trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports]) + return get_cluster(node_ips, node_ip, trainer_endpoints, selected_gpus) def get_gpus(selected_gpus): diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index 2a649c776b4103b1d3d8648957bbff7a32007410..4a786679727fb1b42c216146685e0e6524e858c9 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -16,12 +16,13 @@ from .profiler import ProfilerOptions from .profiler import Profiler from .profiler import get_profiler from .deprecated import deprecated +from ..fluid.framework import unique_name +from ..fluid.framework import load_op_library +from ..fluid.framework import require_version from . import download __all__ = ['dump_config', 'deprecated', 'download'] #TODO: define new api under this directory -# __all__ = ['unique_name', -# 'load_op_library', -# 'require_version'] +__all__ += ['unique_name', 'load_op_library', 'require_version'] diff --git a/python/requirements.txt b/python/requirements.txt index c8d3b2af1794bb0858b187d6a4c641322f50cdd1..ddd1e943df78eed3dd36d36571954665b267019d 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -13,11 +13,9 @@ scipy ; python_version>"3.5" nltk ; python_version>="3.5" rarfile Pillow -graphviz six decorator prettytable -objgraph astor pathlib netifaces diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..af558c2ef0b42b68e47fe98ebd626c9b9034bef9 --- /dev/null +++ b/setup.py @@ -0,0 +1,577 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess +import os +import os.path +import errno +import re +import shutil +import sys +import fnmatch +import errno +import platform + +from contextlib import contextmanager +from setuptools import Command +from setuptools import setup, Distribution, Extension +from setuptools.command.install import install as InstallCommandBase + + +class BinaryDistribution(Distribution): + def has_ext_modules(foo): + return True + + +RC = 0 + +ext_name = '.dll' if os.name == 'nt' else ('.dylib' if sys.platform == 'darwin' + else '.so') + + +def git_commit(): + try: + cmd = ['git', 'rev-parse', 'HEAD'] + git_commit = subprocess.Popen( + cmd, stdout=subprocess.PIPE, + cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip() + except: + git_commit = 'Unknown' + git_commit = git_commit.decode() + return str(git_commit) + + +def _get_version_detail(idx): + assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \ + so detail index must less than 3" + + if re.match('@TAG_VERSION_REGEX@', '@PADDLE_VERSION@'): + version_details = '@PADDLE_VERSION@'.split('.') + + if len(version_details) >= 3: + return version_details[idx] + + return 0 + + +def get_major(): + return int(_get_version_detail(0)) + + +def get_minor(): + return int(_get_version_detail(1)) + + +def get_patch(): + return str(_get_version_detail(2)) + + +def is_taged(): + try: + cmd = [ + 'git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null' + ] + git_tag = subprocess.Popen( + cmd, stdout=subprocess.PIPE, + cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip() + git_tag = git_tag.decode() + except: + return False + + if str(git_tag).replace('v', '') == '@PADDLE_VERSION@': + return True + else: + return False + + +def write_version_py(filename='paddle/version.py'): + cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY +# +full_version = '%(major)d.%(minor)d.%(patch)s' +major = '%(major)d' +minor = '%(minor)d' +patch = '%(patch)s' +rc = '%(rc)d' +istaged = %(istaged)s +commit = '%(commit)s' +with_mkl = '%(with_mkl)s' + +def show(): + if istaged: + print('full_version:', full_version) + print('major:', major) + print('minor:', minor) + print('patch:', patch) + print('rc:', rc) + else: + print('commit:', commit) + +def mkl(): + return with_mkl +''' + commit = git_commit() + with open(filename, 'w') as f: + f.write(cnt % { + 'major': get_major(), + 'minor': get_minor(), + 'patch': get_patch(), + 'rc': RC, + 'version': '${PADDLE_VERSION}', + 'commit': commit, + 'istaged': is_taged(), + 'with_mkl': '@WITH_MKL@' + }) + + +write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') + + +def write_distributed_training_mode_py( + filename='paddle/fluid/incubate/fleet/parameter_server/version.py'): + cnt = '''from __future__ import print_function + +# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY + +from paddle.fluid.incubate.fleet.base.mode import Mode + +BUILD_MODE=Mode.%(mode)s + +def is_transpiler(): + return Mode.TRANSPILER == BUILD_MODE + +''' + + dirname = os.path.dirname(filename) + + try: + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + with open(filename, 'w') as f: + f.write(cnt % + {'mode': 'PSLIB' if '${WITH_PSLIB}' == 'ON' else 'TRANSPILER'}) + + +write_distributed_training_mode_py( + filename='@PADDLE_BINARY_DIR@/python/paddle/fluid/incubate/fleet/parameter_server/version.py' +) + +packages = [ + 'paddle', + 'paddle.libs', + 'paddle.utils', + 'paddle.dataset', + 'paddle.reader', + 'paddle.distributed', + 'paddle.incubate', + 'paddle.incubate.complex', + 'paddle.incubate.complex.tensor', + 'paddle.distributed.fleet', + 'paddle.distributed.fleet.base', + 'paddle.distributed.fleet.meta_optimizers', + 'paddle.distributed.fleet.runtime', + 'paddle.distributed.fleet.dataset', + 'paddle.distributed.fleet.metrics', + 'paddle.distributed.fleet.proto', + 'paddle.distributed.fleet.utils', + 'paddle.framework', + 'paddle.jit', + 'paddle.fluid', + 'paddle.fluid.inference', + 'paddle.fluid.dygraph', + 'paddle.fluid.dygraph.dygraph_to_static', + 'paddle.fluid.dygraph.amp', + 'paddle.fluid.proto', + 'paddle.fluid.proto.profiler', + 'paddle.fluid.distributed', + 'paddle.fluid.layers', + 'paddle.fluid.dataloader', + 'paddle.fluid.contrib', + 'paddle.fluid.contrib.decoder', + 'paddle.fluid.contrib.quantize', + 'paddle.fluid.contrib.reader', + 'paddle.fluid.contrib.slim', + 'paddle.fluid.contrib.slim.quantization', + 'paddle.fluid.contrib.slim.quantization.imperative', + 'paddle.fluid.contrib.utils', + 'paddle.fluid.contrib.extend_optimizer', + 'paddle.fluid.contrib.mixed_precision', + 'paddle.fluid.contrib.layers', + 'paddle.fluid.transpiler', + 'paddle.fluid.transpiler.details', + 'paddle.fluid.incubate', + 'paddle.fluid.incubate.data_generator', + 'paddle.fluid.incubate.fleet', + 'paddle.fluid.incubate.checkpoint', + 'paddle.fluid.incubate.fleet.base', + 'paddle.fluid.incubate.fleet.parameter_server', + 'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler', + 'paddle.fluid.incubate.fleet.parameter_server.pslib', + 'paddle.fluid.incubate.fleet.parameter_server.ir', + 'paddle.fluid.incubate.fleet.collective', + 'paddle.fluid.incubate.fleet.utils', + 'paddle.hapi', + 'paddle.vision', + 'paddle.vision.models', + 'paddle.vision.transforms', + 'paddle.vision.datasets', + 'paddle.text', + 'paddle.text.datasets', + 'paddle.incubate', + 'paddle.io', + 'paddle.optimizer', + 'paddle.nn', + 'paddle.nn.functional', + 'paddle.nn.layer', + 'paddle.nn.initializer', + 'paddle.nn.utils', + 'paddle.metric', + 'paddle.static', + 'paddle.static.nn', + 'paddle.tensor', +] + +with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: + setup_requires = f.read().splitlines() + +# Note(wangzhongpu): +# When compiling paddle under python36, the dependencies belonging to python2.7 will be imported, resulting in errors when installing paddle +if sys.version_info >= (3, 6) and sys.version_info < (3, 7): + setup_requires_tmp = [] + for setup_requires_i in setup_requires: + if "<\"3.6\"" in setup_requires_i or "<\"3.5\"" in setup_requires_i or "<=\"3.5\"" in setup_requires_i: + continue + setup_requires_tmp += [setup_requires_i] + setup_requires = setup_requires_tmp +if sys.version_info >= (3, 5) and sys.version_info < (3, 6): + setup_requires_tmp = [] + for setup_requires_i in setup_requires: + if "<\"3.5\"" in setup_requires_i: + continue + setup_requires_tmp += [setup_requires_i] + setup_requires = setup_requires_tmp +if sys.version_info >= (3, 7): + setup_requires_tmp = [] + for setup_requires_i in setup_requires: + if "<\"3.6\"" in setup_requires_i or "<=\"3.6\"" in setup_requires_i or "<\"3.5\"" in setup_requires_i or "<=\"3.5\"" in setup_requires_i or "<\"3.7\"" in setup_requires_i: + continue + setup_requires_tmp += [setup_requires_i] + setup_requires = setup_requires_tmp + +if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: + setup_requires += ['opencv-python'] + +# the prefix is sys.prefix which should always be usr +paddle_bins = '' + +if not '${WIN32}': + paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'] +package_data = { + 'paddle.fluid': + ['${FLUID_CORE_NAME}' + ('.so' if os.name != 'nt' else '.pyd')] +} +if '${HAS_NOAVX_CORE}' == 'ON': + package_data['paddle.fluid'] += [ + 'core_noavx' + ('.so' if os.name != 'nt' else '.pyd') + ] + +package_dir = { + '': '${PADDLE_BINARY_DIR}/python', + # The paddle.fluid.proto will be generated while compiling. + # So that package points to other directory. + 'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform', + 'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework', + 'paddle.fluid': '${PADDLE_BINARY_DIR}/python/paddle/fluid', +} + +# put all thirdparty libraries in paddle.libs +libs_path = '${PADDLE_BINARY_DIR}/python/paddle/libs' + +package_data['paddle.libs'] = [] +package_data['paddle.libs'] = [('libwarpctc' + if os.name != 'nt' else 'warpctc') + ext_name] +shutil.copy('${WARPCTC_LIBRARIES}', libs_path) + +if '${WITH_MKL}' == 'ON': + shutil.copy('${MKLML_SHARED_LIB}', libs_path) + shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path) + package_data['paddle.libs'] += [ + ('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, + ('libiomp5' if os.name != 'nt' else 'libiomp5md') + ext_name + ] +else: + if os.name == 'nt': + # copy the openblas.dll + shutil.copy('${OPENBLAS_SHARED_LIB}', libs_path) + package_data['paddle.libs'] += ['openblas' + ext_name] + +if '${WITH_LITE}' == 'ON': + shutil.copy('${LITE_SHARED_LIB}', libs_path) + package_data['paddle.libs'] += ['libpaddle_full_api_shared' + ext_name] + +if '${WITH_PSLIB}' == 'ON': + shutil.copy('${PSLIB_LIB}', libs_path) + if os.path.exists('${PSLIB_VERSION_PY}'): + shutil.copy( + '${PSLIB_VERSION_PY}', + '${PADDLE_BINARY_DIR}/python/paddle/fluid/incubate/fleet/parameter_server/pslib/' + ) + package_data['paddle.libs'] += ['libps' + ext_name] + +if '${WITH_MKLDNN}' == 'ON': + if '${CMAKE_BUILD_TYPE}' == 'Release' and os.name != 'nt': + # only change rpath in Release mode. + # TODO(typhoonzero): use install_name_tool to patch mkl libs once + # we can support mkl on mac. + # + # change rpath of libdnnl.so.1, add $ORIGIN/ to it. + # The reason is that all thirdparty libraries in the same directory, + # thus, libdnnl.so.1 will find libmklml_intel.so and libiomp5.so. + command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" + if os.system(command) != 0: + raise Exception("patch libdnnl.so failed, command: %s" % command) + shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) + if os.name != 'nt': + shutil.copy('${MKLDNN_SHARED_LIB_1}', libs_path) + package_data['paddle.libs'] += ['libmkldnn.so.0', 'libdnnl.so.1'] + else: + package_data['paddle.libs'] += ['mkldnn.dll'] + +if '${WITH_XPU}' == 'ON': + # only change rpath in Release mode, + if '${CMAKE_BUILD_TYPE}' == 'Release': + if os.name != 'nt': + if "@APPLE@" == "1": + command = "install_name_tool -id \"@loader_path/\" ${XPU_API_LIB}" + else: + command = "patchelf --set-rpath '$ORIGIN/' ${XPU_API_LIB}" + if os.system(command) != 0: + raise Exception("patch ${XPU_API_LIB} failed, command: %s" % + command) + shutil.copy('${XPU_API_LIB}', libs_path) + shutil.copy('${XPU_RT_LIB}', libs_path) + shutil.copy('${XPU_SIM_LIB}', libs_path) + package_data['paddle.libs'] += [ + '${XPU_API_LIB_NAME}', '${XPU_RT_LIB_NAME}', '${XPU_SIM_LIB_NAME}' + ] + +# copy libfuild_framework.so to libs +if os.name != 'nt' and sys.platform != 'darwin': + paddle_framework_lib = '${FLUID_FRAMEWORK_SHARED_LIB}' + shutil.copy(paddle_framework_lib, libs_path) + package_data['paddle.libs'] += [ + ('libpaddle_framework' + if os.name != 'nt' else 'paddle_framework') + ext_name + ] + +# remove unused paddle/libs/__init__.py +if os.path.isfile(libs_path + '/__init__.py'): + os.remove(libs_path + '/__init__.py') +package_dir['paddle.libs'] = libs_path + +# change rpath of ${FLUID_CORE_NAME}.ext, add $ORIGIN/../libs/ to it. +# The reason is that libwarpctc.ext, libiomp5.ext etc are in paddle.libs, and +# ${FLUID_CORE_NAME}.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. +# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213 +if '${CMAKE_BUILD_TYPE}' == 'Release': + if os.name != 'nt': + # only change rpath in Release mode, since in Debug mode, ${FLUID_CORE_NAME}.xx is too large to be changed. + if "@APPLE@" == "1": + command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' + else: + command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' + # The dynamic library compiled under aarch64 is greater than 64M, + # and an oversize error will be reported when using patchelf. + if platform.machine() != 'aarch64': + if os.system(command) != 0: + raise Exception( + "patch ${FLUID_CORE_NAME}.%s failed, command: %s" % + (ext_name, command)) + +ext_modules = [Extension('_foo', ['stub.cc'])] +if os.name == 'nt': + # fix the path separator under windows + fix_package_dir = {} + for k, v in package_dir.items(): + fix_package_dir[k] = v.replace('/', '\\') + package_dir = fix_package_dir + ext_modules = [] +elif sys.platform == 'darwin': + ext_modules = [] + + +def find_files(pattern, root): + for dirpath, _, files in os.walk(root): + for filename in fnmatch.filter(files, pattern): + yield os.path.join(dirpath, filename) + + +headers = ( + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/imperative')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/platform')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/string')) + + list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/platform')) + + list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/framework')) + + list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) + + # errorMessage.pb for errormessage + ['${EIGEN_INCLUDE_DIR}/Eigen/Core'] + # eigen + list(find_files('*', '${EIGEN_INCLUDE_DIR}/Eigen/src')) + # eigen + list(find_files('*', '${EIGEN_INCLUDE_DIR}/unsupported/Eigen')) + # eigen + list(find_files('*', '${GFLAGS_INSTALL_DIR}/include')) + # gflags + list(find_files('*', '${GLOG_INSTALL_DIR}/include')) + # glog + list(find_files('*', '${BOOST_INCLUDE_DIR}/boost')) + # boost + list(find_files('*', '${XXHASH_INSTALL_DIR}/include')) + # xxhash + list(find_files('*', '${PROTOBUF_INCLUDE_DIR}')) + # protobuf + list(find_files('*', '${DLPACK_INCLUDE_DIR}')) + # dlpack + list(find_files('*.h', '${THREADPOOL_INCLUDE_DIR}'))) # threadpool + +if '${WITH_MKLDNN}' == 'ON': + headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn + +if '${WITH_GPU}' == 'ON': + headers += list(find_files( + '*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage + + +class InstallCommand(InstallCommandBase): + def finalize_options(self): + ret = InstallCommandBase.finalize_options(self) + self.install_headers = os.path.join(self.install_purelib, 'paddle', + 'include') + self.install_lib = self.install_platlib + return ret + + +class InstallHeaders(Command): + """Override how headers are copied. + """ + description = 'install C/C++ header files' + + user_options = [ + ('install-dir=', 'd', 'directory to install header files to'), + ('force', 'f', 'force installation (overwrite existing files)'), + ] + + boolean_options = ['force'] + + def initialize_options(self): + self.install_dir = None + self.force = 0 + self.outfiles = [] + + def finalize_options(self): + self.set_undefined_options( + 'install', ('install_headers', 'install_dir'), ('force', 'force')) + + def mkdir_and_copy_file(self, header): + if 'pb.h' in header: + install_dir = re.sub('${PADDLE_BINARY_DIR}/', '', header) + elif 'third_party' not in header: + # framework + install_dir = re.sub('@PADDLE_SOURCE_DIR@/', '', header) + else: + # third_party + install_dir = re.sub('${THIRD_PARTY_PATH}', 'third_party', header) + patterns = [ + 'eigen3/src/extern_eigen3', 'boost/src/extern_boost', + 'dlpack/src/extern_dlpack/include', 'install/protobuf/include', + 'install/gflags/include', 'install/glog/include', + 'install/xxhash/include', 'install/mkldnn/include', + 'threadpool/src/extern_threadpool' + ] + for pattern in patterns: + install_dir = re.sub(pattern, '', install_dir) + install_dir = os.path.join(self.install_dir, + os.path.dirname(install_dir)) + if not os.path.exists(install_dir): + self.mkpath(install_dir) + return self.copy_file(header, install_dir) + + def run(self): + # only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows + if os.name == 'nt' or sys.platform == 'darwin': + if '${WITH_GPU}' == 'ON': + self.mkdir_and_copy_file( + '${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb') + return + hdrs = self.distribution.headers + if not hdrs: + return + self.mkpath(self.install_dir) + for header in hdrs: + (out, _) = self.mkdir_and_copy_file(header) + self.outfiles.append(out) + + def get_inputs(self): + return self.distribution.headers or [] + + def get_outputs(self): + return self.outfiles + + +# we redirect setuptools log for non-windows +if sys.platform != 'win32': + + @contextmanager + def redirect_stdout(): + f_log = open('${SETUP_LOG_FILE}', 'w') + origin_stdout = sys.stdout + sys.stdout = f_log + yield + f_log = sys.stdout + sys.stdout = origin_stdout + f_log.close() +else: + + @contextmanager + def redirect_stdout(): + yield + + +if '${WITH_GPU}' == 'ON': + os.environ['PACKAGE_NAME'] = "paddlepaddle-gpu" +else: + os.environ['PACKAGE_NAME'] = "paddlepaddle" + +with redirect_stdout(): + setup( + name='${PACKAGE_NAME}', + version='${PADDLE_VERSION}', + description='Parallel Distributed Deep Learning', + install_requires=setup_requires, + packages=packages, + ext_modules=ext_modules, + package_data=package_data, + package_dir=package_dir, + scripts=paddle_bins, + distclass=BinaryDistribution, + headers=headers, + cmdclass={ + 'install_headers': InstallHeaders, + 'install': InstallCommand, + }, + entry_points={ + 'console_scripts': + ['fleetrun = paddle.distributed.fleet.launch:launch'] + }) + +# As there are a lot of files in purelib which causes many logs, +# we don't print them on the screen, and you can open `setup.py.log` +# for the full logs. +if os.path.exists('${SETUP_LOG_FILE}'): + os.system('grep -v "purelib" ${SETUP_LOG_FILE}') diff --git a/tools/windows/build_compile_environment.bat b/tools/windows/build_compile_environment.bat new file mode 100644 index 0000000000000000000000000000000000000000..16665ac4aafddca323c2f453f5fcdd78aa0949ed --- /dev/null +++ b/tools/windows/build_compile_environment.bat @@ -0,0 +1,190 @@ +:: Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: +:: =============================== +:: Build Paddle compile enviroment +:: =============================== +:: Description: +:: +:: Install compile enviroment for xly CI. +:: +:: Include: +:: 1. CMake 3.17.0 +:: 2. Git 2.28.0 +:: 3. Python 3.7.8 +:: 4. Visual Studio 2015 with update 3 +:: 5. CUDA 10 [miss cudnn] +:: 6. java jre [not complete] +:: 7. xly agent [not complete] + +:: Echo command is not required. +@echo off + +:: ===== start step 0: wget tool ===== +:: Download wget for windows when there is not wget tool. +echo ">>>>>>>> step [0/7]: wget tool" +wget --help > nul 2> nul || call:install_wget +goto cmake + +:install_wget +echo There is not wget in this PC, will download wget 1.20. +echo Download package from https://eternallybored.org/misc/wget/1.20/64/wget.exe ... +certutil -urlcache -split -f https://eternallybored.org/misc/wget/1.20/64/wget.exe > nul 2> nul +if %errorlevel% == 0 ( + echo Download wget tool into %cd% success. +) else ( + echo Error***** Download wget tool failed, please download it before rerun. + exit /b 1 +) +goto :eof +:: ===== end step 0: wget tool ===== + +:: ===== start step 1: cmake ===== +:: Download CMake-3.17.0 and add in PATH when it not installed. +:: TODO: limit version >= 3.17.0 +:cmake +echo ">>>>>>>> step [1/7]: CMake 3.17.0" +cmake --help > nul 2> nul || call :install_cmake +goto git + +:install_cmake +echo There is not cmake in this PC, will install cmake-3.17.0. +echo Download package from https://cmake.org/files/v3.17/cmake-3.17.0-win64-x64.msi ... +wget -O cmake-3.17.0-win64-x64.msi https://cmake.org/files/v3.17/cmake-3.17.0-win64-x64.msi +echo Install cmake-3.17.0 ... +:: /passive [silent installation] +:: /norestart [do not restart] +:: ADD_CMAKE_TO_PATH = System [add CMake to the system PATH for all users] +start /wait cmake-3.17.0-win64-x64.msi /passive /norestart ADD_CMAKE_TO_PATH=System +if %errorlevel% == 0 ( + echo Install CMake-3.17.0 success! +) else ( + echo Error***** Install Cmake-3.17.0 failed, please re-install it manually. +) +del cmake-3.17.0-win64-x64.msi +goto :eof +:: ===== end step 1: cmake ===== + +:: ===== start step 2: Git ===== +:: Download Git-2.28.0 and add in PATH when it not installed. +:: TODO: limit version >= 2.28.0 +:git +echo ">>>>>>>> step [2/8]: Git 2.28.0" +git --help > nul 2> nul || call :install_git +goto python + +:install_git +echo There is not git in this PC, will install Git-2.28.0. +echo Download package from https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe ... +wget -O Git-2.28.0-64-bit.exe https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe +echo Install Git-2.28.0 ... +:: /SILENT [silent install] +:: /ALLUSERS [add path for all users] +:: /NORESTART [do not restart] +start /wait Git-2.28.0-64-bit.exe /SILENT /ALLUSERS /NORESTART +if %errorlevel% == 0 ( + echo Install Git-2.28.0 success! +) else ( + echo Error***** Install Git-2.28.0 failed, please re-install it manually. +) +del Git-2.28.0-64-bit.exe +goto :eof +:: ===== end step 2: Git ===== + +:: ===== start step 3: Python ===== +:: Download Python-3.7.8 and add in PATH when it not installed. +:: TODO: limit version >= 3.7.8 +:python +echo ">>>>>>>> step [3/7]: Python 3.7.8" +python -V 2>&1 | findstr /C:"Python 3.7.8" > nul 2> nul || call :install_python +goto vs2015 + +:install_python +echo There is not Python in this PC, will install Python-3.7.8. +echo Download package from https://npm.taobao.org/mirrors/python/3.7.8/python-3.7.8-amd64.exe ... +wget -O python-3.7.8-amd64.exe https://npm.taobao.org/mirrors/python/3.7.8/python-3.7.8-amd64.exe +echo Install Python-3.7.8 ... +:: /passive [silent install] +:: InstallAllUsers [add path for all users] +:: PrependPath [add script/install into PATH] +:: TargetDir [install directory] +start /wait python-3.7.8-amd64.exe /passive InstallAllUsers=1 PrependPath=1 TargetDir=C:\Python37 +if %errorlevel% == 0 ( + echo Install python-3.7.8 success! +) else ( + echo Error***** Install python-3.7.8 failed, please re-install it manually. +) +del python-3.7.8-amd64.exe +goto :eof +:: ===== end step 3: Python ===== + +:: ===== start step 4: Visual Studio 2015 ===== +:: Download Visual Studio 2015 when it not installed. +:vs2015 +echo ">>>>>>>> step [4/7]: Visual Studio 2015" +cmd /C "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 > nul 2> nul || call :install_visual_studio +goto :cuda10 + +:install_visual_studio +echo There is not Visual Studio in this PC, will install VS2015. +echo Download package from "https://download.my.visualstudio.com/pr/en_visual_studio_professional_2015_with_update_3_x86_x64_web_installer_8922978.exe" +wget -O vs_installer.exe "https://download.my.visualstudio.com/pr/en_visual_studio_professional_2015_with_update_3_x86_x64_web_installer_8922978.exe?t=9ee7a96d-ca80-4b84-af2c-7dd86996a0aa&e=1600103404&h=3cdea1e81c04aa4e846f5314972c46eb&su=1" +echo Install Visual Studio 2015 ... +:: /passive [silent install] +:: /norestart [no restart] +:: /NoRefresh [no refresh] +:: /InstallSelectableItems NativeLanguageSupport_Group [select Visual C++ for installing] +start /wait visual_installer.exe /passive /norestart /NoRefresh /InstallSelectableItems NativeLanguageSupport_Group +if %errorlevel% == 0 ( + echo Install Visual Studio 2015 success! +) else ( + echo Error***** Install Visual Studio 2015 failed, please re-install it manually. +) +del vs_installer.exe +goto :eof +:: ===== end step 4: Visual Studio 2015 ===== + +:: ===== start step 5: CUDA 10 ===== +:cuda10 +echo ">>>>>>>> step [5/7]: CUDA 10.0" +nvcc --version > nul 2> nul || call :install_cuda +goto java-jre + +:install_cuda +echo There is not CUDA in this PC, will install CUDA-10.0. +echo Download package from "https://developer.download.nvidia.cn/compute/cuda/10.0/secure/Prod/network_installers/cuda_10.0.130_win10_network.exe" +wget -O cuda_installer.exe "https://developer.download.nvidia.cn/compute/cuda/10.0/secure/Prod/network_installers/cuda_10.0.130_win10_network.exe?hG7oBtA2CnxZG7d39onmBdtzrIa2cOukrmW8I0qk3h36vb2Sj0yYGjMElJlxlNhjx8Xu5RlbmdBhCWvP2QcEqMjCoKCXe5lOgr5uIIso_7LqrotgQHbZRZSVBYRT4bIAHPVSPrr4_4KczKvI9Nf3mbO9RJ2Vj6ECD5QphRMJBus0KKNVxO1gsplVL5qaCnE" +echo Install CUDA-10.0 ... +:: -s [silent install] +start /wait cuda_installer.exe -s +if %errorlevel% == 0 ( + echo Install CUDA-10.0 success! +) else ( + echo Error***** Install CUDA-10.0 failed, please re-install it manually. +) +del cuda_installer.exe +goto :eof +:: ===== end step 5: CUDA 10 ===== + +:: ===== start step 6: java jre ===== +:java-jre +echo ">>>>>>>> step [6/7]: java jre" +goto xly-agent +:: ===== end step 6: java jre ===== + +:: ===== start step 7: xly agent ===== +:xly-agent +echo ">>>>>>>> step [7/7]: xly agent" +goto :eof +:: ===== end step 8: xly agent ===== \ No newline at end of file