diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h index 37b07e5736312b3050debe745f2d3c108469c5d6..2edb50e0d89044ce9ab6789c416fa8a6a9c1673e 100644 --- a/paddle/fluid/framework/details/execution_strategy.h +++ b/paddle/fluid/framework/details/execution_strategy.h @@ -25,7 +25,10 @@ struct ExecutionStrategy { size_t num_threads_{0}; bool use_cuda_{true}; bool allow_op_delay_{false}; - size_t num_iteration_per_drop_scope_{1}; + // If we set this to 1, we will delete all variables when finish a batch. and + // this will loss 15%+ performance. + // Please be aware about this parameters. + size_t num_iteration_per_drop_scope_{100}; ExecutorType type_{kDefault}; bool dry_run_{false}; }; diff --git a/paddle/fluid/operators/distributed/variable_response.cc b/paddle/fluid/operators/distributed/variable_response.cc index 47ff568a1135f2f0a146faa4d5d6fc422a344f51..7825b4fc82b1f7580fea8ab4961facaf7fd64397 100644 --- a/paddle/fluid/operators/distributed/variable_response.cc +++ b/paddle/fluid/operators/distributed/variable_response.cc @@ -117,8 +117,9 @@ bool VariableResponse::CopyLodTensorData( tensor->mutable_data(ctx.GetPlace(), ToVarType(meta_.data_type())); VLOG(6) << "Tensor.memory_size = " << tensor->memory_size() - << ", Buffer Size = " << length; - PADDLE_ENFORCE_EQ(tensor->memory_size(), static_cast(length)); + << ", Buffer Size = " << length << ", dims:" << dims + << ", numel:" << tensor->numel(); + PADDLE_ENFORCE_GE(tensor->memory_size(), static_cast(length)); return ReadRaw(input, ctx, tensor->place(), tensor_data, length); }