diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index c76489d183a5f1ccbccb4d36ec40ab200b6072c9..9b111e09e07682b6be0c72a4a0d5a3e86e271e62 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -56,7 +56,7 @@ paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95')) paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2')) paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2')) -paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '582d87b8df75a5a639a107db8ff86f9c')) +paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '70f4f53f13572436ac72d1c8b5efeb9d')) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -109,7 +109,7 @@ paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3')) paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce')) paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6')) -paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'dc7042734c6d8b8ce97321f017f01d6f')) +paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'f1dd22f7351f7f9853212958e0d8aa7a')) paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6')) paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2')) paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571')) @@ -205,7 +205,7 @@ paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e')) paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed')) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f')) -paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7')) +paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', 'ab84fdc6dc60f3ad9aa397e6007e3bf9')) paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35')) paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007')) paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d')) diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc index b90655184bcf31bbfc3100f5993acee655fedc55..644cd4e15083519d6c685ae3e6a0737692018a07 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc @@ -112,19 +112,20 @@ void FusedAllReduceOpHandle::RunImpl() { }); for (size_t k = 1; k < g_tensor.size(); ++k) { - const void *pre_address = g_tensor.at(k - 1).second->data(); + const void *cur_address = g_tensor.at(k - 1).second->data(); int64_t len = g_tensor.at(k - 1).second->numel(); auto offset = len * framework::SizeOfType(dtype); - void *next_address = reinterpret_cast( - reinterpret_cast(pre_address) + offset); - const void *cur_address = g_tensor.at(k).second->data(); - VLOG(10) << k << ", " - << " pre_address(" << g_tensor.at(k - 1).first - << "): " << pre_address << ", cur_address(" - << g_tensor.at(k).first << "): " << cur_address - << ", offset:" << offset << ", " << next_address << ", " - << cur_address; - PADDLE_ENFORCE_EQ(next_address, cur_address); + void *infer_next_address = reinterpret_cast( + reinterpret_cast(cur_address) + offset); + const void *next_address = g_tensor.at(k).second->data(); + + VLOG(10) << string::Sprintf( + "Input[%d](%s) address: 0X%02x, Input[%d](%s) address: 0X%02x, Infer " + "input[%d] address: 0X%02x. The offset: %d", + k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k, + next_address, k, infer_next_address, offset); + PADDLE_ENFORCE_EQ(infer_next_address, next_address, + "The address is not consistent."); } } diff --git a/paddle/fluid/framework/ir/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/cpu_quantize_pass.cc index edfaf47f018a61d72aa3764185f2c185722b553f..ed80f9cae347cfb2bf23859daea2f1f47dba599b 100644 --- a/paddle/fluid/framework/ir/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/cpu_quantize_pass.cc @@ -224,8 +224,8 @@ std::unique_ptr CPUQuantizePass::ApplyImpl( PADDLE_ENFORCE(param_scope()); + QuantizeConv(graph.get(), false /* with_residual_data */); QuantizeConv(graph.get(), true /* with_residual_data */); - QuantizeConv(graph.get()); QuantizePool(graph.get()); return graph; diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 31e259c51d1996bbed33f978013ce5f591436704..555fdc7b7a03ebc99fcc77a26341d291dac2c308 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -599,10 +599,19 @@ bool VarLinksToOp(Node *node, const std::string &op_type) { bool IsNthInput(Node *var, Node *op, const std::string &argument, size_t nth) { PADDLE_ENFORCE(var->IsVar()); PADDLE_ENFORCE(op->IsOp()); - if (op->Op()->Input(argument).size() <= nth) return false; + if (!HasInput(op, argument) || op->Op()->Input(argument).size() <= nth) + return false; return var->Name() == op->Op()->Input(argument)[nth]; } +bool HasInput(Node *op, const std::string &argument) { + PADDLE_ENFORCE(op->IsOp()); + auto const &names = op->Op()->InputNames(); + if (std::find(names.begin(), names.end(), argument) == names.end()) + return false; + return true; +} + bool IsNthOutput(Node *var, Node *op, const std::string &argument, size_t nth) { PADDLE_ENFORCE(var->IsVar()); PADDLE_ENFORCE(op->IsOp()); @@ -1082,8 +1091,15 @@ PDNode *patterns::Conv::operator()() { PDNode *patterns::ConvResidual::operator()(bool with_residual_data) { auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d"); - if (!with_residual_data) - conv_op->assert_op_attr("fuse_residual_connection", false); + if (!with_residual_data) { + conv_op->assert_more([&](Node *x) { + auto node_names = x->Op()->InputNames(); + if (!HasInput(x, "ResidualData") || + x->Op()->Input("ResidualData").size() == 0) + return true; + return false; + }); + } auto input_var = pattern->NewNode(conv_input_repr()) ->AsInput() diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 16cb6fb7aee2c8ac2c59a758aa63001106c816d2..130ddeac4cd1a38516540d175e17d46f877bd909 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -305,6 +305,9 @@ bool VarLinksFromOp(Node* node, const std::string& op_type); // Check whether a var node is a op node's nth input. bool IsNthInput(Node* var, Node* op, const std::string& argument, size_t nth); +// Check whether the op node has input of given name. +bool HasInput(Node* op, const std::string& argument); + // Tell whether a var node is a op node's nth output. bool IsNthOutput(Node* var, Node* op, const std::string& argument, size_t nth); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h index 3d4dc9e2b6ecccddea4d63e45710c80d55ef2772..c071d9aed20bd40f5c1076d2dc5d3098a4e65495 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h @@ -14,12 +14,16 @@ limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/ir/pass.h" namespace paddle { namespace framework { namespace ir { +/* + * Specifies which operators should use MKLDNN. + */ class MKLDNNPlacementPass : public Pass { protected: std::unique_ptr ApplyImpl( diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 992c779711a9b331534b43ac3b6af2df75e88c75..555b57810e3f13357b7f5ebcb2541d763d5cec66 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -136,6 +136,15 @@ struct Argument { // Pass a set of op types to enable its mkldnn kernel DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes, std::unordered_set); + + // A set of op types to enable their quantized kernels + DECL_ARGUMENT_FIELD(quantize_enabled_op_types, QuantizeEnabledOpTypes, + std::unordered_set); + + // A set of op IDs to exclude from enabling their quantized kernels + DECL_ARGUMENT_FIELD(quantize_excluded_op_ids, QuantizeExcludedOpIds, + std::unordered_set); + // Scales for variables to be quantized DECL_ARGUMENT_FIELD(quant_var_scales, QuantVarScales, VarQuantScale); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 882e1d93b96aebbeddd469ac2ceff29a568f151b..86ff06c6ae454b6d830a605f97272671e26c3bb6 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -64,6 +64,13 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("mkldnn_enabled_op_types", new std::unordered_set( argument->mkldnn_enabled_op_types())); + } else if (pass_name == "cpu_quantize_placement_pass") { + pass->Set("quantize_enabled_op_types", + new std::unordered_set( + argument->quantize_enabled_op_types())); + pass->Set( + "quantize_excluded_op_ids", + new std::unordered_set(argument->quantize_excluded_op_ids())); } else if (pass_name == "cpu_quantize_pass") { pass->Set("quant_var_scales", new VarQuantScale(argument->quant_var_scales())); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 28d02e8fea562f3ca9fbe37cfaab47380888b5fb..94e14228df84f2c462881e9a8ce24578c74ffeaa 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -107,6 +107,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { use_gpu_ = true; } +void GpuPassStrategy::EnableQuantizer() { + LOG(ERROR) << "GPU not support quantization yet"; +} + void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { analysis_passes_.push_back(pass); } diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index c93aec1a2253a45a2d0599b7b866e20ae025a99d..65403e790e93b1a4f99803472d77b0e7a02a4fec 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -85,6 +85,10 @@ class PassStrategy : public PaddlePassBuilder { */ virtual void EnableMKLDNN() {} + /** Enable quantize optimization + */ + virtual void EnableQuantizer() {} + bool use_gpu() const { return use_gpu_; } virtual ~PassStrategy() = default; @@ -125,6 +129,16 @@ class CpuPassStrategy : public PassStrategy { use_mkldnn_ = false; #endif } + + void EnableQuantizer() override { + if (!use_quantizer_) { + passes_.push_back("cpu_quantize_placement_pass"); + } + use_quantizer_ = true; + } + + protected: + bool use_quantizer_{false}; }; /** The GPU passes strategy, it is used in AnalysisPredictor with GPU mode. @@ -139,6 +153,7 @@ class GpuPassStrategy : public PassStrategy { } void EnableMKLDNN() override; + void EnableQuantizer() override; virtual ~GpuPassStrategy() = default; }; diff --git a/paddle/fluid/inference/utils/CMakeLists.txt b/paddle/fluid/inference/utils/CMakeLists.txt index c43eaf7f9849ee4a88ed95bdb8b6966da8760435..2104e4ac7222258ee025bd5acd60b1db251df654 100644 --- a/paddle/fluid/inference/utils/CMakeLists.txt +++ b/paddle/fluid/inference/utils/CMakeLists.txt @@ -1,4 +1,2 @@ cc_library(benchmark SRCS benchmark.cc DEPS enforce) cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) -cc_binary(visualizer SRCS visualizer.cc DEPS analysis - paddle_pass_builder ir_pass_manager pass graph_viz_pass analysis_passes) diff --git a/paddle/fluid/inference/utils/visualizer.cc b/paddle/fluid/inference/utils/visualizer.cc deleted file mode 100644 index 7c0dd64dea88e51b24c4bc04818d633ee0d2f722..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/utils/visualizer.cc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/utils/visualizer.h" -#include -#include -#include -#include -#include "paddle/fluid/framework/ir/graph_viz_pass.h" -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" -#include "paddle/fluid/platform/init.h" - -DEFINE_string(model_dir, "", "model directory"); -DEFINE_string(model_program_path, "", "model program path"); -DEFINE_string(model_params_path, "", "model params path"); - -using paddle::inference::analysis::Argument; - -namespace paddle { -namespace inference { -namespace utils { - -void Visualizer::SetArgument(Argument *argument) { argument_ = argument; } - -bool Visualizer::Run() { - paddle::framework::InitDevices(false); - paddle::inference::analysis::Analyzer().Run(argument_); - return true; -} - -} // namespace utils -} // namespace inference -} // namespace paddle - -// Generate a dot file describing the structure of graph. -// To use this tool, run command: ./visualizer [options...] -// Options: -// --model_dir: the directory of model -// --model_program_path: the path of program -// --model_params_path: the path of params -int main(int argc, char *argv[]) { - gflags::ParseCommandLineFlags(&argc, &argv, true); - google::InitGoogleLogging(argv[0]); - - paddle::inference::analysis::Argument argument; - argument.SetUseGPU(false); - argument.SetUseTensorRT(false); - - if (FLAGS_model_dir.empty()) { - if (FLAGS_model_program_path.empty() || FLAGS_model_params_path.empty()) { - LOG(ERROR) << "Please set model_dir" - " or model_program_path and model_params_path"; - return -1; - } else { - argument.SetModelProgramPath(FLAGS_model_program_path); - argument.SetModelParamsPath(FLAGS_model_params_path); - } - } else { - argument.SetModelDir(FLAGS_model_dir); - } - - // Only 1 pass, default filename is 0_ir_origin.dot - // For more details, looking for paddle::inference::analysis::IRPassManager - argument.SetIrAnalysisPasses({"infer_clean_graph_pass", "graph_viz_pass"}); - - std::unique_ptr scope{ - new paddle::framework::Scope()}; - argument.SetScopeNotOwned( - const_cast(scope.get())); - - paddle::inference::utils::Visualizer visualizer; - visualizer.SetArgument(&argument); - visualizer.Run(); - - return 0; -} - -USE_PASS(infer_clean_graph_pass); -USE_PASS(graph_viz_pass); -USE_PASS(graph_to_program_pass); diff --git a/paddle/fluid/inference/utils/visualizer.h b/paddle/fluid/inference/utils/visualizer.h deleted file mode 100644 index be532f92cf60e06094bfcf8cc2be85085795fcf4..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/utils/visualizer.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include "paddle/fluid/inference/analysis/argument.h" - -namespace paddle { -namespace inference { -namespace utils { - -using paddle::inference::analysis::Argument; - -class Visualizer final { - public: - Visualizer() = default; - ~Visualizer() = default; - Visualizer(const Visualizer &) = delete; - Visualizer &operator=(const Visualizer &) = delete; - - void SetArgument(Argument *); - bool Run(); - - private: - Argument *argument_; -}; - -} // namespace utils -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index c3479b47def13e52f5775d31714aeaca21ec08c2..c87e4b22b37027efd1293e74f72598283946e62d 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -76,12 +76,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, const std::string& name) { framework::LibraryType library{framework::LibraryType::kPlain}; framework::DataLayout layout = framework::DataLayout::kAnyLayout; -#ifdef PADDLE_WITH_CUDA - auto it1 = oper.Attrs().find("use_cudnn"); - if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) { - library = framework::LibraryType::kCUDNN; - } -#endif +// FIXME(liuwei1031) temporarily disable the code to unblock users +// TODO(liuwei1031) figure out the reason behind +// https://github.com/PaddlePaddle/Paddle/issues/16096 +// and re-enable this in the future +// #ifdef PADDLE_WITH_CUDA +// auto it1 = oper.Attrs().find("use_cudnn"); +// if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) { +// library = framework::LibraryType::kCUDNN; +// } +// #endif #ifdef PADDLE_WITH_MKLDNN auto it = oper.Attrs().find("use_mkldnn"); if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() && diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index 8944a749674c3ba6c83526e4d66f449075716f43..268a5b894a95df8e27730879473b457a31e18cd6 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -67,6 +67,22 @@ class AffineChannelOp : public framework::OperatorWithKernel { "Input(Bias) of AffineChannelOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of AffineChannelOp should not be null."); + + auto x_dims = ctx->GetInputDim("X"); + auto scale_dims = ctx->GetInputDim("Scale"); + auto b_dims = ctx->GetInputDim("Bias"); + const framework::DataLayout data_layout = framework::StringToDataLayout( + ctx->Attrs().Get("data_layout")); + + const int64_t C = (data_layout == framework::DataLayout::kNCHW + ? x_dims[1] + : x_dims[x_dims.size() - 1]); + + PADDLE_ENFORCE_EQ(scale_dims.size(), 1UL); + PADDLE_ENFORCE_EQ(scale_dims[0], C); + PADDLE_ENFORCE_EQ(b_dims.size(), 1UL); + PADDLE_ENFORCE_EQ(b_dims[0], C); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->ShareLoD("X", "Out"); } @@ -97,6 +113,27 @@ class AffineChannelOpGrad : public framework::OperatorWithKernel { } }; +class AffineChannelGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + std::unique_ptr Apply() const override { + auto* op = new framework::OpDesc(); + op->SetType("affine_channel_grad"); + op->SetInput("X", Input("X")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetInput("Scale", Input("Scale")); + + op->SetAttrMap(Attrs()); + + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Scale"), InputGrad("Scale")); + op->SetOutput(framework::GradVarName("Bias"), InputGrad("Bias")); + + return std::unique_ptr(op); + } +}; + template using EigenArrayMap = Eigen::Map>; @@ -244,8 +281,7 @@ namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR(affine_channel, ops::AffineChannelOp, - ops::AffineChannelOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::AffineChannelOpMaker, ops::AffineChannelGradMaker); REGISTER_OPERATOR(affine_channel_grad, ops::AffineChannelOpGrad); REGISTER_OP_CPU_KERNEL(affine_channel, ops::AffineChannelKernel, diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 7c64eadffffbafe0d7f92ec345d75a92c4fef58e..65c2ff6415c1d51fdc05d6014da589678761b676 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -71,7 +71,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { "1. downgrade_in_infer(default), downgrade the outcome at inference " "time" " train: out = input * mask" - " inference: out = input * dropout_prob" + " inference: out = input * (1.0 - dropout_prob)" "2. upscale_in_train, upscale the outcome at training time, do nothing " "in inference" " train: out = input * mask / ( 1.0 - dropout_prob )" diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index 7c8fe5fbd7629b2d82552135bc1b052dfbabeba0..a814c365d70ae91490e7fb50a0baf8fec05d97ef 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -32,7 +32,10 @@ class LoDResetOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT(level0.size(), 1, "If Input(Y) not provided, the target lod should be " "specified by attribute `target_lod`."); + } else { + ctx->ShareLoD("Y", "Out"); } + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); } diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 035e10dcbe4e2083723e47d7dda75ce267a9f141..1b433067900af71bb8a6833cef019d41f9c76858 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -78,12 +78,6 @@ class PaddingLoDTensorFunctor { "The numel of 'pad_value' can only be 1 or be equal to the " "'step_width'."); - if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) { - TensorCopy(seq_tensor, context.GetPlace(), context, pad_tensor); - pad_tensor->Resize(pad_tensor_dims); - return; - } - const int kBlockSize = 512; /* At least use 32 threads to copy sequence_width elements, @@ -129,12 +123,13 @@ class UnpaddingLoDTensorFunctor { CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, step_width, layout); - + /* if (!norm_by_times && seq_num == 1UL && pad_seq_len == max_seq_len) { TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor); seq_tensor->Resize(seq_tensor_dims); return; } + */ const int kBlockSize = 512; diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 242a1b9ae92ade0caf1b0f1fcb5458b8b7070d84..f18282745200cc8ef9460e60728d777112f2b798 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -290,8 +290,10 @@ class MatMulOp : public framework::OperatorWithKernel { context->Attrs().Get("transpose_Y")); PADDLE_ENFORCE_EQ(mat_dim_x.width_, mat_dim_y.height_); - PADDLE_ENFORCE(mat_dim_x.batch_size_ == mat_dim_y.batch_size_ || - mat_dim_x.batch_size_ == 0 || mat_dim_y.batch_size_ == 0); + if (context->IsRuntime()) { + PADDLE_ENFORCE(mat_dim_x.batch_size_ == mat_dim_y.batch_size_ || + mat_dim_x.batch_size_ == 0 || mat_dim_y.batch_size_ == 0); + } std::vector dim_out; if (mat_dim_x.batch_size_ != 0) { dim_out = framework::vectorize(dim_x); diff --git a/paddle/fluid/operators/squeeze_op.cc b/paddle/fluid/operators/squeeze_op.cc index ecfb4e89566f3d72b3c262946c370bf34ce7515a..dc15df2c3c1b8a2964312d983be8ce362d3ab95d 100644 --- a/paddle/fluid/operators/squeeze_op.cc +++ b/paddle/fluid/operators/squeeze_op.cc @@ -40,7 +40,7 @@ class SqueezeOpInferShape : public framework::InferShapeBase { "tensor's rank."); } - auto out_dims = GetOutputShape(axes, x_dims); + auto out_dims = GetOutputShape(axes, x_dims, false); ctx->SetOutputDim("Out", out_dims); if (x_dims[0] == out_dims[0]) { // Only pass LoD when the first dimension of output and Input(X) @@ -50,7 +50,8 @@ class SqueezeOpInferShape : public framework::InferShapeBase { } static framework::DDim GetOutputShape(const std::vector squeeze_dims, - const framework::DDim &in_dims) { + const framework::DDim &in_dims, + bool is_runtime) { size_t num_squeeze_dims = squeeze_dims.size(); int cnt_squeezed_dims = 0; bool should_squeeze[9] = {false}; @@ -71,9 +72,12 @@ class SqueezeOpInferShape : public framework::InferShapeBase { // Check current index, the upper limit has beed checked in line 36. PADDLE_ENFORCE(current >= 0, "Invalid axis, the negative axis is out of range."); - PADDLE_ENFORCE(in_dims[current] == 1, - "Invalid axis index, the axis that will be squeezed " - "should be equal to 1."); + + if (is_runtime) { + PADDLE_ENFORCE(in_dims[current] == 1, + "Invalid axis index, the axis that will be squeezed " + "should be equal to 1."); + } if (!(should_squeeze[current])) { ++cnt_squeezed_dims; @@ -104,7 +108,7 @@ class SqueezeOp : public framework::OperatorBase { const platform::Place &place) const override { auto &axes = Attr>("axes"); auto x_dims = scope.FindVar(Input("X"))->Get().dims(); - auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims); + auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims, true); framework::AttributeMap attrs; attrs["shape"] = framework::vectorize2int(out_dims); @@ -224,7 +228,7 @@ class Squeeze2Op : public framework::OperatorBase { const platform::Place &place) const override { auto &axes = Attr>("axes"); auto x_dims = scope.FindVar(Input("X"))->Get().dims(); - auto out_dims = Squeeze2OpInferShape::GetOutputShape(axes, x_dims); + auto out_dims = Squeeze2OpInferShape::GetOutputShape(axes, x_dims, true); framework::AttributeMap attrs; attrs["shape"] = framework::vectorize2int(out_dims); diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 9e77f7252de1545e04bd2feaff27374c189dfc48..db763a051d1e08b962a40913d290c69e7c61ec32 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -34,8 +34,11 @@ class TopkOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); PADDLE_ENFORCE_GE(input_dims.size(), 1, "input must have >= 1d shape"); - PADDLE_ENFORCE_GE(input_dims[input_dims.size() - 1], k, - "input must have >= k columns"); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_GE(input_dims[input_dims.size() - 1], k, + "input must have >= k columns"); + } framework::DDim dims = input_dims; dims[dims.size() - 1] = k; diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 162c64552e428e71fc003f90c6cd4a485f72f32f..296ecfc6f07e005a8db1ef81732548b004461bee 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -23,7 +23,9 @@ limitations under the License. */ #include "paddle/fluid/platform/cuda_helper.h" #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" +#if !defined(__APPLE__) && !defined(_WIN32) #include "paddle/fluid/platform/dynload/nccl.h" +#endif #include "paddle/fluid/platform/gpu_info.h" #endif diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index dc0a6dcdedaf39c0c489bb6f6e3eb28e6b58a21d..025528e85c4bf4da63b588dd91681d7bf7bb78fe 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -453,6 +453,7 @@ function assert_api_spec_approvals() { echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. + # approval_user_list: velconia 1979255,panyx0718 2887803,XiaoguangHu01 46782768,chengduoZH 30176695,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,tensor-tang 21351065,jacquesqiao 3048612,typhoonzero 13348433,shanyi15 35982308. if [ "$API_FILE" == "paddle/fluid/API.spec" ];then APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308 46782768 30176695` @@ -462,14 +463,14 @@ function assert_api_spec_approvals() { fi else APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` + python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` fi echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "FALSE" ]; then if [ "$API_FILE" == "paddle/fluid/API.spec" ];then echo "You must have one RD (panyx0718 or chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}" else - echo "You must have panyx0718 approval for the api change! ${API_FILE}" + echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" fi exit 1 fi @@ -479,10 +480,10 @@ function assert_api_spec_approvals() { HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true` if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` + python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "FALSE" ]; then - echo "You must have panyx0718 approval for the const_cast" + echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" exit 1 fi fi diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 1775159798414a98bede4a3db5b577fb5e47e611..326a84d82b5718dad898620a6d9e0490f7519448 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -896,7 +896,7 @@ def save_inference_model(dirname, True is supported. Returns: - None + target_var_name_list(list): The fetch variables' name list Raises: ValueError: If `feed_var_names` is not a list of basestring. @@ -949,11 +949,13 @@ def save_inference_model(dirname, # TODO(Superjomn) add an IR pass to remove 1-scale op. with program_guard(main_program): uniq_target_vars = [] - for var in target_vars: + for i, var in enumerate(target_vars): if isinstance(var, Variable): - var1 = layers.scale(var, 1.) - uniq_target_vars.append(var1) + var = layers.scale( + var, 1., name="save_infer_model/scale_{}".format(i)) + uniq_target_vars.append(var) target_vars = uniq_target_vars + target_var_name_list = [var.name for var in target_vars] # when a pserver and a trainer running on the same machine, mkdir may conflict try: @@ -1010,6 +1012,7 @@ def save_inference_model(dirname, params_filename = os.path.basename(params_filename) save_persistables(executor, dirname, main_program, params_filename) + return target_var_name_list def load_inference_model(dirname, diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index dbe495b75c8876de3c3f0fb0abe0357089413254..31e12f9b27feb134a027d8e150348c1d1da058b3 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1348,7 +1348,7 @@ def dropout(x, 1. downgrade_in_infer(default), downgrade the outcome at inference - train: out = input * mask - - inference: out = input * dropout_prob + - inference: out = input * (1.0 - dropout_prob) (mask is a tensor same shape with input, value is 0 or 1 ratio of 0 is dropout_prob) @@ -4901,6 +4901,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): if len(y_shape) > 2 and len(x_shape) > 2: for i, dim_x in enumerate(x_shape[:-2]): + # don't check neg shape + if dim_x < 0 or y_shape[i] < 0: + continue if dim_x != y_shape[i]: raise ValueError("Invalid inputs for matmul. x(%s), y(%s)" % (x.shape, y.shape)) @@ -9706,7 +9709,12 @@ def sequence_reverse(x, name=None): return out -def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): +def affine_channel(x, + scale=None, + bias=None, + data_layout='NCHW', + name=None, + act=None): """ Applies a separate affine transformation to each channel of the input. Useful for replacing spatial batch norm with its equivalent fixed @@ -9725,6 +9733,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): data_layout (string, default NCHW): NCHW or NHWC. If input is 2D tensor, you can ignore data_layout. name (str, default None): The name of this layer. + act (str, default None): Activation to be applied to the output of this layer. Returns: out (Variable): A tensor of the same shape and data layout with x. @@ -9744,7 +9753,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): 'Bias': bias}, attrs={"data_layout": data_layout}, outputs={"Out": out}) - return out + return helper.append_activation(pre_activation) def similarity_focus(input, axis, indexes, name=None): diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index a1cf5fad138f068c9eac5fe8d681c9f08b192270..b61ef706ba2460a12c4fe659984917b627e20906 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -70,6 +70,7 @@ list(REMOVE_ITEM TEST_OPS test_dist_transpiler) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) +list(REMOVE_ITEM TEST_OPS test_dist_se_resnext_nccl) list(REMOVE_ITEM TEST_OPS test_dist_transformer) list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) @@ -95,11 +96,13 @@ if(WITH_DISTRIBUTE) if(NOT APPLE) set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200) - py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext) - set_tests_properties(test_dist_se_resnext PROPERTIES TIMEOUT 1000) + py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext) + set_tests_properties(test_dist_se_resnext PROPERTIES TIMEOUT 1000) + py_test_modules(test_dist_se_resnext_nccl MODULES test_dist_se_resnext_nccl) + set_tests_properties(test_dist_se_resnext_nccl PROPERTIES TIMEOUT 1000) # FIXME(typhoonzero): add these tests back - # py_test_modules(test_dist_transformer MODULES test_dist_transformer) - # set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000) + # py_test_modules(test_dist_transformer MODULES test_dist_transformer) + # set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000) set_tests_properties(test_dist_ctr test_dist_mnist test_dist_mnist_batch_merge test_dist_save_load test_dist_se_resnext test_dist_simnet_bow test_dist_text_classification test_dist_train test_dist_word2vec PROPERTIES RUN_SERIAL TRUE) endif(NOT APPLE) py_test_modules(test_dist_transpiler MODULES test_dist_transpiler) diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index d98b839e9b3fe10cb5b79c672284f8dfb6fbf141..969f5cb63c9dd2a773be9530abd2a49714202cd1 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -110,7 +110,8 @@ class TestDistRunnerBase(object): trainer_prog = fluid.default_main_program() if args.use_cuda: - place = fluid.CUDAPlace(0) + device_id = int(os.getenv("FLAGS_selected_gpus", "0")) + place = fluid.CUDAPlace(device_id) else: place = fluid.CPUPlace() @@ -256,6 +257,7 @@ class TestDistBase(unittest.TestCase): self._dc_asgd = False # must use with async mode self._use_reader_alloc = True self._nccl2_mode = False + self._mp_mode = False # FIXME(typhoonzero): I added this stupid argument to enable # testing allreduce layers, which users can call layers.allreduce # to accumulate tensors at anywhere. Find a better way to do this @@ -504,6 +506,10 @@ class TestDistBase(unittest.TestCase): env0 = {'CPU_NUM': '1'} env1 = {'CPU_NUM': '1'} + if self._mp_mode: + env0 = {"FLAGS_selected_gpus": "0"} + env1 = {"FLAGS_selected_gpus": "1"} + env0.update(envs) env1.update(envs) diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py new file mode 100644 index 0000000000000000000000000000000000000000..38f7bb80d2f9144800ef8f8fb1402dcf86925067 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py @@ -0,0 +1,63 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase +import os + + +def skip_ci(func): + on_ci = bool(int(os.environ.get("SKIP_UNSTABLE_CI", '0'))) + + def __func__(*args, **kwargs): + if on_ci: + return + return func(*args, **kwargs) + + return __func__ + + +class TestDistSeResneXtNCCL(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reader_alloc = False + self._nccl2_mode = True + + @skip_ci + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_se_resnext.py", delta=1e-5) + + +class TestDistSeResneXtNCCLMP(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reader_alloc = False + self._nccl2_mode = True + self._mp_mode = True + + @skip_ci + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place( + "dist_se_resnext.py", + delta=1e-5, + need_envs={"NCCL_P2P_DISABLE": "1"}) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py index 9548598d75367ed1f1863d1f6ae50b83d58f8c7f..1f23fae92c9d8148efb25facb602cdc4d485865b 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py @@ -13,6 +13,9 @@ # limitations under the License. from __future__ import print_function +import os +os.environ['FLAGS_fuse_parameter_memory_size'] = "131072" +os.environ['FLAGS_fuse_parameter_groups_size'] = "3" import paddle.fluid as fluid import paddle.fluid.layers.ops as ops @@ -22,7 +25,6 @@ import paddle.fluid.core as core from parallel_executor_test_base import TestParallelExecutorBase import unittest import math -import os import numpy as np # FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor @@ -312,17 +314,59 @@ class TestResnet(TestParallelExecutorBase): self.assertAlmostEquals( np.mean(parallel_last_loss), single_last_loss[0], delta=delta2) + def _compare_with_fused_all_reduce(self, + model, + use_cuda, + iter=20, + delta2=1e-5): + if use_cuda and not core.is_compiled_with_cuda(): + return + + global remove_bn + remove_bn = True + + img, label = self._init_data(batch_size=batch_size) + all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( + model, + feed_dict={"image": img, + "label": label}, + iter=iter, + batch_size=batch_size, + use_cuda=use_cuda, + fuse_all_reduce_ops=False, + optimizer=optimizer) + reduce_first_loss, reduce_last_loss = self.check_network_convergence( + model, + feed_dict={"image": img, + "label": label}, + iter=iter, + batch_size=batch_size, + use_cuda=use_cuda, + fuse_all_reduce_ops=True, + optimizer=optimizer) + + for loss in zip(all_reduce_first_loss, reduce_first_loss): + self.assertAlmostEquals(loss[0], loss[1], delta=1e-5) + for loss in zip(all_reduce_last_loss, reduce_last_loss): + self.assertAlmostEquals(loss[0], loss[1], delta=delta2) + def test_seresnext_with_learning_rate_decay(self): self._check_resnet_convergence(model=SE_ResNeXt50Small, use_cuda=True) self._check_resnet_convergence( model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3) - def test_seresnext_with_new_strategy(self): + def test_seresnext_with_reduce(self): self._compare_reduce_and_allreduce( model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-2) self._compare_reduce_and_allreduce( model=SE_ResNeXt50Small, use_cuda=False, iter=5) + def test_seresnext_with_fused_all_reduce(self): + self._compare_with_fused_all_reduce( + model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-3) + self._compare_with_fused_all_reduce( + model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3) + if __name__ == '__main__': unittest.main()