diff --git a/paddle/fluid/framework/operator_kernel_configs.h b/paddle/fluid/framework/operator_kernel_configs.h index a350b8957d91ea21375e1942af2968277b10833e..5c5a7423832ae3c0b16df8a98aa3faa8b2983f84 100644 --- a/paddle/fluid/framework/operator_kernel_configs.h +++ b/paddle/fluid/framework/operator_kernel_configs.h @@ -81,6 +81,8 @@ TAlgorithm framework::AlgorithmsCache::GetAlgorithm( seed ^= hashFn(static_cast(algorithmFlags)) + 0x9e3779b9 + (seed << 6) + (seed >> 2) + 5; + VLOG(10) << "seed:" << seed << ", hash_.size:" << hash_.size(); + if (seed == 0) return gen_func(); if (hash_.find(seed) == hash_.end()) { diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index 4a5cd3262217941461f1e950056d64e29834eddb..5f52042419d43f1b3c15762b33bc9f90c2fb1f45 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -22,6 +22,14 @@ limitations under the License. */ namespace paddle { namespace operators { +template +std::ostream& operator<<(std::ostream& out, const std::vector& v) { + out << "["; + for (auto const& tmp : v) out << tmp << ","; + out << "]"; + return out; +} + using framework::AlgorithmsCache; struct ConvArgs { @@ -119,6 +127,11 @@ struct SearchAlgorithm { auto x_dims = framework::vectorize(args.x->dims()); auto w_dims = framework::vectorize(args.w->dims()); + VLOG(10) << "cudnnConvolutionFwdAlgoPerf_t algo_cache_id:" + << algo_cache_id << ", x_dims:" << x_dims + << ", w_dims:" << w_dims << ", args.s" << args.s << ", args.p" + << args.p << ", args.d" << args.d; + algo = algo_cache.GetAlgorithm( x_dims, w_dims, args.s, args.p, args.d, 0, [&]() { int returned_algo_count; @@ -247,6 +260,11 @@ struct SearchAlgorithm { auto x_dims = framework::vectorize(args.x->dims()); auto w_dims = framework::vectorize(args.w->dims()); + VLOG(10) << "cudnnConvolutionFwdAlgoPerf_t algo_cache_id:" + << algo_cache_id << ", x_dims:" << x_dims + << ", w_dims:" << w_dims << ", args.s" << args.s << ", args.p" + << args.p << ", args.d" << args.d; + algo = algo_cache.GetAlgorithm( x_dims, w_dims, args.s, args.p, args.d, 0, [&]() { int returned_algo_count; @@ -368,6 +386,11 @@ struct SearchAlgorithm { auto x_dims = framework::vectorize(args.x->dims()); auto w_dims = framework::vectorize(args.w->dims()); + VLOG(10) << "cudnnConvolutionFwdAlgoPerf_t algo_cache_id:" + << algo_cache_id << ", x_dims:" << x_dims + << ", w_dims:" << w_dims << ", args.s" << args.s << ", args.p" + << args.p << ", args.d" << args.d; + algo = algo_cache.GetAlgorithm( x_dims, w_dims, args.s, args.p, args.d, 0, [&]() { int returned_algo_count; diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 51c67cf0017fb54d21f6402bcec64b07f75c1025..8d9abf0762feec7cadd5c81bfd4e2a010d0a7c5e 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -80,8 +80,9 @@ def create_master_params_grads(params_grads, main_prog, startup_prog, A list of master parameters and gradients. """ master_params_grads = [] - with main_prog._backward_role_guard(): - for p, g in params_grads: + for p, g in params_grads: + # create master parameters + with main_prog._optimized_guard([p, g]): # create master parameters master_param = copy_to_master_param(p, main_prog.global_block()) startup_master_param = startup_prog.global_block()._clone_variable( diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index da2f1678e04a81fd2b20b43d176e084833ae7ec3..3f9c69f120e4f7cfaf1350d78f5283349d37bc2a 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -278,8 +278,8 @@ class DataFeeder(object): for each_sample in iterable: assert len(each_sample) == len(converter), ( - "The number of fields in data (%s) does not match " + - "len(feed_list) (%s)") % (len(each_sample), len(converter)) + "The number of fields in data (%d) does not match " + + "len(feed_list) (%d)") % (len(each_sample), len(converter)) for each_converter, each_slot in six.moves.zip(converter, each_sample): each_converter.feed(each_slot)