diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 5132b3b5e72ca24c04e53d0157d33676d45b2a2a..13c349c2a06dc18d40a78a64258bfbe0b2b1b23c 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -105,6 +105,7 @@ void MemoryOptimizePass::CollectVarMemorySize( "merge_lod_tensor", "equal", "sequence_pool", + "recurrent", "lod_reset"}; for (auto* tmp : node->inputs) { CHECK(tmp->IsOp()); diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 231fb38da272a81ad65efa6d86a51e7182076807..9c158904ea804b3c855370959dcf2507c430b7aa 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -211,9 +211,10 @@ void RecurrentOp::RunImpl(const framework::Scope &scope, auto *block = Attr(kStepBlock); auto *program = block->Program(); - auto ctx = executor.Prepare( - *program, block->ID(), Attr>( - kSkipEagerDeletionVars) /*skip_ref_cnt_vars*/); + auto ctx = executor.Prepare(*program, block->ID(), + Attr>( + kSkipEagerDeletionVars), /*skip_ref_cnt_vars*/ + true); static std::mutex mutex; std::lock_guard lock(mutex); @@ -256,16 +257,6 @@ void RecurrentOp::RunImpl(const framework::Scope &scope, // Link inside::output -> outside::output // outside::output[seq_offset: seq_offset + 1] = inside::output executor.CreateVariables(ctx->prog_, &cur_scope, ctx->block_id_); - if (i > 0) { - LinkTensorWithCallback(scope, Outputs(kOutputs), cur_scope, - Outputs(kOutputs), - [&](const framework::LoDTensor &src_tensor, - framework::LoDTensor *dst_tensor) { - framework::Tensor src_slice = - src_tensor.Slice(seq_offset, seq_offset + 1); - dst_tensor->ShareDataWith(src_slice); - }); - } // Linked now, execute! executor.RunPreparedContext(ctx.get(), &cur_scope, @@ -285,6 +276,14 @@ void RecurrentOp::RunImpl(const framework::Scope &scope, // early. framework::TensorCopy(src_tensor, place, dev_ctx, &dst_out); }); + } else { + LinkTensorWithCallback( + cur_scope, Outputs(kOutputs), scope, Outputs(kOutputs), + [&](const framework::LoDTensor &src_tensor, + framework::LoDTensor *dst_tensor) { + auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1); + framework::TensorCopy(src_tensor, place, dev_ctx, &dst_out); + }); } scopes.ForwardNext(); diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 7ba42f880cd83623b94874832f19477cfb2138c0..4ae528d6654ecb4744dcc09d809ee6047faa0bea 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -189,10 +189,10 @@ def batch_norm(x, if in_dygraph_mode(): # for dygraph need tuple - attrs = ("momentum", momentum, "epsilon", epsilon, "data_layout", - data_format, "use_mkldnn", False, "fuse_with_relu", False, - "use_global_stats", use_global_stats, "trainable_statistics", - trainable_statistics) + attrs = ("momentum", momentum, "epsilon", epsilon, "is_test", + not training, "data_layout", data_format, "use_mkldnn", False, + "fuse_with_relu", False, "use_global_stats", use_global_stats, + "trainable_statistics", trainable_statistics) batch_norm_out, _, _, _, _, _ = core.ops.batch_norm( x, weight, bias, running_mean, running_var, mean_out, variance_out, *attrs) @@ -207,6 +207,7 @@ def batch_norm(x, attrs = { "momentum": momentum, "epsilon": epsilon, + "is_test": not training, "data_layout": data_format, "use_mkldnn": False, "fuse_with_relu": False,