From abaf87be2b9f6510a1992253c8d393262909b305 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 3 Sep 2019 10:09:50 +0800 Subject: [PATCH] Change backward_guard to optimize_guard to maximize the allreduce overlap. (#19506) Change backward_guard to optimize_guard to maximize the allreduce overlap --- .../fluid/framework/operator_kernel_configs.h | 2 ++ paddle/fluid/operators/conv_cudnn_helper.h | 23 +++++++++++++++++++ .../contrib/mixed_precision/fp16_utils.py | 5 ++-- python/paddle/fluid/data_feeder.py | 4 ++-- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/operator_kernel_configs.h b/paddle/fluid/framework/operator_kernel_configs.h index a350b8957d..5c5a742383 100644 --- a/paddle/fluid/framework/operator_kernel_configs.h +++ b/paddle/fluid/framework/operator_kernel_configs.h @@ -81,6 +81,8 @@ TAlgorithm framework::AlgorithmsCache::GetAlgorithm( seed ^= hashFn(static_cast(algorithmFlags)) + 0x9e3779b9 + (seed << 6) + (seed >> 2) + 5; + VLOG(10) << "seed:" << seed << ", hash_.size:" << hash_.size(); + if (seed == 0) return gen_func(); if (hash_.find(seed) == hash_.end()) { diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index 4a5cd32622..5f52042419 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -22,6 +22,14 @@ limitations under the License. */ namespace paddle { namespace operators { +template +std::ostream& operator<<(std::ostream& out, const std::vector& v) { + out << "["; + for (auto const& tmp : v) out << tmp << ","; + out << "]"; + return out; +} + using framework::AlgorithmsCache; struct ConvArgs { @@ -119,6 +127,11 @@ struct SearchAlgorithm { auto x_dims = framework::vectorize(args.x->dims()); auto w_dims = framework::vectorize(args.w->dims()); + VLOG(10) << "cudnnConvolutionFwdAlgoPerf_t algo_cache_id:" + << algo_cache_id << ", x_dims:" << x_dims + << ", w_dims:" << w_dims << ", args.s" << args.s << ", args.p" + << args.p << ", args.d" << args.d; + algo = algo_cache.GetAlgorithm( x_dims, w_dims, args.s, args.p, args.d, 0, [&]() { int returned_algo_count; @@ -247,6 +260,11 @@ struct SearchAlgorithm { auto x_dims = framework::vectorize(args.x->dims()); auto w_dims = framework::vectorize(args.w->dims()); + VLOG(10) << "cudnnConvolutionFwdAlgoPerf_t algo_cache_id:" + << algo_cache_id << ", x_dims:" << x_dims + << ", w_dims:" << w_dims << ", args.s" << args.s << ", args.p" + << args.p << ", args.d" << args.d; + algo = algo_cache.GetAlgorithm( x_dims, w_dims, args.s, args.p, args.d, 0, [&]() { int returned_algo_count; @@ -368,6 +386,11 @@ struct SearchAlgorithm { auto x_dims = framework::vectorize(args.x->dims()); auto w_dims = framework::vectorize(args.w->dims()); + VLOG(10) << "cudnnConvolutionFwdAlgoPerf_t algo_cache_id:" + << algo_cache_id << ", x_dims:" << x_dims + << ", w_dims:" << w_dims << ", args.s" << args.s << ", args.p" + << args.p << ", args.d" << args.d; + algo = algo_cache.GetAlgorithm( x_dims, w_dims, args.s, args.p, args.d, 0, [&]() { int returned_algo_count; diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 51c67cf001..8d9abf0762 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -80,8 +80,9 @@ def create_master_params_grads(params_grads, main_prog, startup_prog, A list of master parameters and gradients. """ master_params_grads = [] - with main_prog._backward_role_guard(): - for p, g in params_grads: + for p, g in params_grads: + # create master parameters + with main_prog._optimized_guard([p, g]): # create master parameters master_param = copy_to_master_param(p, main_prog.global_block()) startup_master_param = startup_prog.global_block()._clone_variable( diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index da2f1678e0..3f9c69f120 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -278,8 +278,8 @@ class DataFeeder(object): for each_sample in iterable: assert len(each_sample) == len(converter), ( - "The number of fields in data (%s) does not match " + - "len(feed_list) (%s)") % (len(each_sample), len(converter)) + "The number of fields in data (%d) does not match " + + "len(feed_list) (%d)") % (len(each_sample), len(converter)) for each_converter, each_slot in six.moves.zip(converter, each_sample): each_converter.feed(each_slot) -- GitLab