提交 d4674dab 编写于 作者: L Liu Yiqun

Cache the chosen kernel of operators'.

test=develop
上级 31d830de
......@@ -921,12 +921,15 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
if (!kernel_type_) {
// LOG(INFO) << "1, kernel_type is not set.";
// check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_);
if (kernels_iter == all_op_kernels.end()) {
PADDLE_THROW(
"There are no kernels which are registered in the %s operator.", type_);
"There are no kernels which are registered in the %s operator.",
type_);
}
OpKernelMap& kernels = kernels_iter->second;
......@@ -951,27 +954,41 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
KernelTypeToString(expected_kernel_key));
}
kernel_type_.reset(new OpKernelType(expected_kernel_key));
kernel_func_.reset(new OpKernelFunc(kernel_iter->second));
}
// std::shared_ptr<OpKernelType> kernel_type = kernel_type_;
// std::shared_ptr<OpKernelFunc> kernel_func = kernel_func_;
std::vector<KernelConfig>* kernel_configs =
GetKernelConfig(expected_kernel_key);
// GetKernelConfig(expected_kernel_key);
GetKernelConfig(*kernel_type_);
// do data transformScope &transfer_scope;
std::vector<std::string> transfered_inplace_vars;
auto* transfer_scope =
PrepareData(scope, expected_kernel_key, &transfered_inplace_vars, &ctx);
// PrepareData(scope, expected_kernel_key, &transfered_inplace_vars,
// &ctx);
PrepareData(scope, *kernel_type_, &transfered_inplace_vars, &ctx);
// exec scope is the scope that kernel actually executed on.
const Scope& exec_scope =
(transfer_scope == nullptr ? scope : *transfer_scope);
if (!(expected_kernel_key.place_ == dev_ctx->GetPlace())) {
dev_ctx = pool.Get(expected_kernel_key.place_);
// if (!(expected_kernel_key.place_ == dev_ctx->GetPlace())) {
// dev_ctx = pool.Get(expected_kernel_key.place_);
if (!(kernel_type_->place_ == dev_ctx->GetPlace())) {
dev_ctx = pool.Get(kernel_type_->place_);
}
RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx);
this->InferShape(&infer_shape_ctx);
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
// not Scope. Imperative mode only pass inputs and get outputs.
kernel_iter->second(
// kernel_iter->second(
// ExecutionContext(*this, exec_scope, *dev_ctx, ctx, kernel_configs));
(*kernel_func_)(
ExecutionContext(*this, exec_scope, *dev_ctx, ctx, kernel_configs));
if (!transfered_inplace_vars.empty()) {
......
......@@ -541,6 +541,8 @@ class OperatorWithKernel : public OperatorBase {
protected:
mutable OpKernelConfigsMap kernel_configs_map_;
mutable std::shared_ptr<OpKernelType> kernel_type_;
mutable std::shared_ptr<OpKernelFunc> kernel_func_;
};
extern bool OpSupportGPU(const std::string& op_type);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册