提交 ff77dea9 编写于 作者: L Leo Zhao 提交者: Tao Luo

not use transferscope cache in cpu case (#18578)

* not use transferscope cache in cpu case

test=develop

* adjust variable name and add comments

test=develop

* use correct format for class member in operator.h

* use correct format for class member in operator.cc

test=develop
上级 b414645a
......@@ -885,12 +885,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const {
// To reduce the elapsed time of HasAttr, we use bool variable to record the
// result of HasAttr.
if (!enable_cache_runtime_context && HasAttr(kEnableCacheRuntimeContext))
enable_cache_runtime_context = true;
if (!all_kernels_must_compute_runtime_shape &&
if (!enable_cache_runtime_context_ && HasAttr(kEnableCacheRuntimeContext))
enable_cache_runtime_context_ = true;
if (!all_kernels_must_compute_runtime_shape_ &&
HasAttr(kAllKernelsMustComputeRuntimeShape))
all_kernels_must_compute_runtime_shape = true;
if (!enable_cache_runtime_context) {
all_kernels_must_compute_runtime_shape_ = true;
if (!enable_cache_runtime_context_) {
RuntimeContext ctx(Inputs(), Outputs(), scope);
RunImpl(scope, place, &ctx);
} else {
......@@ -931,7 +931,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx = pool.Get(kernel_type_->place_);
}
if (!all_kernels_must_compute_runtime_shape) {
if (!all_kernels_must_compute_runtime_shape_) {
RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx);
this->InferShape(&infer_shape_ctx);
}
......@@ -981,6 +981,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
}
}
// To solve issue #15032, have a discussion with @Luotao for cpu inference,
// do not cache transfer scope, hence in this case delete transfer scope
// after run to avoid memory leak
if (transfer_scope && !run_by_executor_ && !enable_cache_transfer_scope_) {
scope.DeleteScope(transfer_scope);
}
}
void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
......@@ -1114,9 +1121,18 @@ Scope* OperatorWithKernel::PrepareData(
// If this op is not called by an Executor or ParallelExecutor, it should
// called by a NaiveExecutor, the NaiveExecutor will cache the scopes and
// variables, that behavior a lot different.
if (!run_by_executor_) {
//
// To solve issue #15032, have a discussion with @Luotao for cpu
// inference, for all cpu kernels cases without GPU participation, here
// not do transfer scope caching, and cpu inference performance is not
// impacted by test.
enable_cache_transfer_scope_ = false;
if (!run_by_executor_ &&
(platform::is_gpu_place(kernel_type_for_var.place_) ||
platform::is_gpu_place(expected_kernel_key.place_))) {
new_scope = TryCreateTransferScope(kernel_type_for_var,
expected_kernel_key, &scope);
enable_cache_transfer_scope_ = true;
}
if (!new_scope) {
new_scope = &scope.NewScope();
......@@ -1125,11 +1141,11 @@ Scope* OperatorWithKernel::PrepareData(
// each result of different input will be the same with the first one.
// The reason is that if a gpu tensor is the input of a cpu kernel,
// we will create a new cpu tensor in new scope.
// However, if enable_cache_runtime_context, we get the cpu tensor each
// However, if enable_cache_runtime_context_, we get the cpu tensor each
// time, not the gpu tensor.
// Thus, we set pre_scope_ = nullptr to trigger `new RuntimeContext()` in
// RunImpl().
if (enable_cache_runtime_context) {
if (enable_cache_runtime_context_) {
pre_scope_ = nullptr;
}
......
......@@ -499,9 +499,10 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<OpKernelFunc> kernel_func_;
mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
mutable const Scope* pre_scope_ = nullptr;
mutable bool enable_cache_runtime_context = false;
mutable bool all_kernels_must_compute_runtime_shape = false;
mutable bool enable_cache_runtime_context_ = false;
mutable bool all_kernels_must_compute_runtime_shape_ = false;
mutable std::mutex cache_update_mutex_;
mutable bool enable_cache_transfer_scope_ = false;
};
extern bool OpSupportGPU(const std::string& op_type);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册