未验证 提交 ba653e7b 编写于 作者: W WangZhen 提交者: GitHub

Construct exec and ctx only once in cond op to speed up (#45794)

* Construct exec and ctx only once in cond op to speed up

* Fix construct function error
上级 4bbbed9a
......@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
DECLARE_bool(use_mkldnn);
namespace paddle {
namespace operators {
......@@ -30,6 +32,9 @@ const char ConditionalOp::kCondition[] = "Cond";
const char ConditionalOp::kScope[] = "Scope";
const char ConditionalOp::kSkipEagerDeletionVars[] = "skip_eager_deletion_vars";
using Executor = framework::Executor;
using ExecutorPrepareContext = framework::ExecutorPrepareContext;
class ConditionalBlockOp : public ConditionalOp {
public:
ConditionalBlockOp(const std::string &type,
......@@ -76,22 +81,28 @@ class ConditionalBlockOp : public ConditionalOp {
// Executors (executors declared inside control ops)
platform::DontClearMKLDNNCache(dev_place);
#endif
framework::Executor exec(dev_place);
auto *block = Attr<framework::BlockDesc *>("sub_block");
VLOG(3) << "Conditional block.idx = " << block->ID()
<< ", scope = " << &cur_scope;
auto &skip_vars =
Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
exec.Run(*block->Program(),
&cur_scope,
block->ID(),
false,
true,
skip_vars,
/* force_disable_gc */ false,
/* keep_kid_scopes */ true);
if (!exec || !platform::is_same_place(exec->GetPlace(), dev_place)) {
auto &pdesc = *block->Program();
exec.reset(new Executor(dev_place));
if (FLAGS_use_mkldnn) exec->EnableMKLDNN(pdesc);
ctx = exec->Prepare(pdesc, block->ID(), skip_vars, false);
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(exec.get(), dev_place);
platform::RegisterModelLayout(ctx->ops_, dev_place);
#endif
}
exec->RunPreparedContext(ctx.get(), &cur_scope, false, true, true);
}
}
private:
mutable std::shared_ptr<Executor> exec{nullptr};
mutable std::unique_ptr<ExecutorPrepareContext> ctx{nullptr};
};
class ConditionalBlockInferShape : public framework::InferShapeBase {
......@@ -152,19 +163,21 @@ class ConditionalBlockGradOp : public ConditionalOp {
scopes.size()));
framework::Scope &cur_scope = *scopes[0];
framework::Executor exec(dev_place);
auto *block = Attr<framework::BlockDesc *>("sub_block");
VLOG(3) << "Conditional Grad block.idx = " << block->ID()
<< ", scope = " << &cur_scope;
exec.Run(*block->Program(),
&cur_scope,
block->ID(),
false,
true,
inside_grads,
/* force_disable_gc */ false,
/* keep_kid_scopes */ false);
if (!exec || !platform::is_same_place(exec->GetPlace(), dev_place)) {
auto &pdesc = *block->Program();
exec.reset(new Executor(dev_place));
if (FLAGS_use_mkldnn) exec->EnableMKLDNN(pdesc);
ctx = exec->Prepare(pdesc, block->ID(), inside_grads, false);
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(exec.get(), dev_place);
platform::RegisterModelLayout(ctx->ops_, dev_place);
#endif
}
exec->RunPreparedContext(ctx.get(), &cur_scope, false, true, false);
AssignLocalGradientToParentScope(
dev_place, cur_scope, scope, inside_grads, outside_grads, inputs);
......@@ -174,6 +187,10 @@ class ConditionalBlockGradOp : public ConditionalOp {
AssignZeroToParentScope(dev_place, scope, inputs, outside_grads);
}
private:
mutable std::shared_ptr<Executor> exec{nullptr};
mutable std::unique_ptr<ExecutorPrepareContext> ctx{nullptr};
private:
void AssignLocalGradientToParentScope(
const platform::Place &place,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册