diff --git a/doc/fluid/design/multi_devices/kernel_selection.md b/doc/fluid/design/multi_devices/kernel_selection.md index 967317d5d2eeb818ab14faabca342cc8c4ed717e..c8391787f79ad915cb9f392be54c80da97f28ed0 100644 --- a/doc/fluid/design/multi_devices/kernel_selection.md +++ b/doc/fluid/design/multi_devices/kernel_selection.md @@ -74,10 +74,10 @@ void OperatorWithKernel::Run( auto kernel_type_for_var = this->GetKernelTypeForVar(...); if (kernel_type_for_var.place_ != expected_kernel_key.place_) { auto* trans_var = new_scope.Var(var_name); - auto* out = DataTransform(expected_kernel_key, + auto* out = TransferData(expected_kernel_key, kernel_type_for_var, *tensor_in); - CopyVariableWithTensor(...); + SetTensorToVariable(...); } } diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 5f15e20c78fd5a333523fe9e73542c037a161cae..f52350ecb8ab980694ea9589bf2c7e88cf9fb1f0 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -21,14 +21,14 @@ limitations under the License. */ namespace paddle { namespace framework { -static void PassTensorData(Tensor* from, Tensor* to) { +static void PassTensorData(Tensor *from, Tensor *to) { to->ShareDataWith(*from); *from = Tensor(); } -void DataTransform(const OpKernelType& expected_kernel_type, - const OpKernelType& kernel_type_for_var, - const Tensor& input_tensor, Tensor* output_tensor) { +void TransferData(const OpKernelType &expected_kernel_type, + const OpKernelType &kernel_type_for_var, + const Tensor &input_tensor, Tensor *output_tensor) { bool transformed = false; Tensor in; in.ShareDataWith(input_tensor); @@ -89,17 +89,17 @@ void DataTransform(const OpKernelType& expected_kernel_type, output_tensor->ShareDataWith(in); } -void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, - Variable* out_var) { +void SetTensorToVariable(const Variable &in_var, const Tensor &tensor, + Variable *out_var) { if (in_var.IsType()) { - auto& in_lod_tensor = in_var.Get(); - auto* tran_lod_tensor = out_var->GetMutable(); + auto &in_lod_tensor = in_var.Get(); + auto *tran_lod_tensor = out_var->GetMutable(); tran_lod_tensor->set_lod(in_lod_tensor.lod()); tran_lod_tensor->set_layout(in_lod_tensor.layout()); tran_lod_tensor->ShareDataWith(tensor); } else if (in_var.IsType()) { - auto& in_selected_rows = in_var.Get(); - auto* trans_selected_rows = out_var->GetMutable(); + auto &in_selected_rows = in_var.Get(); + auto *trans_selected_rows = out_var->GetMutable(); trans_selected_rows->set_height(in_selected_rows.height()); trans_selected_rows->set_rows(in_selected_rows.rows()); trans_selected_rows->mutable_value()->ShareDataWith(tensor); diff --git a/paddle/fluid/framework/data_transform.h b/paddle/fluid/framework/data_transform.h index dee5d8c7c1126013742460df1d94bb364220ad09..161f1023e339d03a4523048e59b57a6eb0ae27f9 100644 --- a/paddle/fluid/framework/data_transform.h +++ b/paddle/fluid/framework/data_transform.h @@ -30,12 +30,15 @@ limitations under the License. */ namespace paddle { namespace framework { -void DataTransform(const OpKernelType& expected_kernel_type, - const OpKernelType& kernel_type_for_var, - const Tensor& input_tensor, Tensor* out); - -void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, - Variable* out_var); +void TransferData(const OpKernelType &expected_kernel_type, + const OpKernelType &kernel_type_for_var, + const Tensor &input_tensor, Tensor *out); + +/** + * Set OutVar from InVar, except the tensor is shared with `tensor` + */ +void SetTensorToVariable(const Variable &in_var, const Tensor &tensor, + Variable *out_var); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h index f51a184e7bae2283f335fe9462a77b9c5fb831a5..c59b232191c49ccb47bb9f51dcaf2fd9280fae19 100644 --- a/paddle/fluid/framework/op_kernel_type.h +++ b/paddle/fluid/framework/op_kernel_type.h @@ -97,7 +97,7 @@ inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) { return ret; } -inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) { +inline bool NeedTransform(const OpKernelType& l, const OpKernelType& r) { return (!platform::places_are_same_class(l.place_, r.place_)) || (l.data_type_ != r.data_type_) || NeedTransformLayout(l.data_layout_, r.data_layout_); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 122ee1dab35b8c7d42392a983b5b15b7c1be7869..b364ee2c3db553c29d55dde69e79444f16a65649 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -620,8 +620,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, "There are no kernels which are registered in the %s operator.", type_); } - ExecutionContext ctx(*this, scope, *dev_ctx); - OpKernelMap& kernels = kernels_iter->second; // TODO(dzhwinter) : kernel fallback mechanism will be added when all the @@ -631,7 +629,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // Do selection // } - auto expected_kernel_key = this->GetExpectedKernelType(ctx); + auto expected_kernel_key = + this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx)); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; auto kernel_iter = kernels.find(expected_kernel_key); @@ -640,56 +639,34 @@ void OperatorWithKernel::RunImpl(const Scope& scope, KernelTypeToString(expected_kernel_key)); } - // do data transform - Scope& new_scope = scope.NewScope(); + // do data transformScope &transfer_scope; + std::vector transfered_inplace_vars; + auto* transfer_scope = + TryTransferData(scope, expected_kernel_key, &transfered_inplace_vars); - std::vector inplace_vars; - for (auto& var_name_item : this->Inputs()) { - for (auto& var_name : var_name_item.second) { - auto* var = scope.FindVar(var_name); - if (var && VarIsTensor(var)) { - auto* tensor_in = GetTensorFromVar(var); - if (tensor_in->IsInitialized()) { - auto kernel_type_for_var = this->GetKernelTypeForVar( - var_name_item.first, *tensor_in, expected_kernel_key); - if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) { - auto out_var_names = OutputVars(true); - if (std::find(out_var_names.begin(), out_var_names.end(), - var_name) != out_var_names.end()) { - inplace_vars.push_back(var_name); - } - VLOG(3) << "Transform Variable " << var_name << " from " - << kernel_type_for_var << " to " << expected_kernel_key; - auto* trans_var = new_scope.Var(var_name); - std::shared_ptr out(new Tensor); - DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in, - out.get()); - CopyVariableWithTensor(*var, *(out.get()), trans_var); - } - } - } - } + // exec scope is the scope that kernel actually executed on. + const Scope& exec_scope = + (transfer_scope == nullptr ? scope : *transfer_scope); + + if (!(expected_kernel_key.place_ == dev_ctx->GetPlace())) { + dev_ctx = pool.Get(expected_kernel_key.place_); } - auto* new_dev_ctx = pool.Get(expected_kernel_key.place_); - kernel_iter->second->Compute( - ExecutionContext(*this, new_scope, *new_dev_ctx)); + kernel_iter->second->Compute(ExecutionContext(*this, exec_scope, *dev_ctx)); - for (auto& var_name : inplace_vars) { - VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; - auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name)); - auto* transformed_tensor = GetTensorFromVar(new_scope.FindVar(var_name)); - original_tensor->ShareDataWith(*transformed_tensor); + if (!transfered_inplace_vars.empty()) { + // there is inplace variable has been transfered. + TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope); } /*For profiling/benchmark only*/ if (FLAGS_benchmark) { - new_dev_ctx->Wait(); + dev_ctx->Wait(); } if (FLAGS_check_nan_inf) { for (auto& vname : OutputVars(true)) { - auto* var = new_scope.FindVar(vname); + auto* var = exec_scope.FindVar(vname); if (var == nullptr) continue; if (var->IsType()) { CheckTensorNANOrInf(vname, var->Get()); @@ -697,6 +674,64 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } } +void OperatorWithKernel::TransferInplaceVarsBack( + const Scope& scope, const std::vector& inplace_vars, + const Scope& transfer_scope) const { + for (auto& var_name : inplace_vars) { + VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; + auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name)); + auto* transformed_tensor = + GetTensorFromVar(transfer_scope.FindVar(var_name)); + original_tensor->ShareDataWith(*transformed_tensor); + } +} + +Scope* OperatorWithKernel::TryTransferData( + const Scope& scope, const OpKernelType& expected_kernel_key, + std::vector* transfered_inplace_vars) const { + Scope* new_scope = nullptr; + for (auto& var_name_item : Inputs()) { + for (auto& var_name : var_name_item.second) { + auto* var = scope.FindVar(var_name); + // Only tensor can be tranfer to another device. + if (var == nullptr || !VarIsTensor(var)) { + continue; + } + + auto* tensor_in = GetTensorFromVar(var); + if (!tensor_in->IsInitialized()) { + continue; + } + + auto kernel_type_for_var = GetKernelTypeForVar( + var_name_item.first, *tensor_in, expected_kernel_key); + + if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) { + continue; + } + + auto out_var_names = OutputVars(true); + if (std::find(out_var_names.begin(), out_var_names.end(), var_name) != + out_var_names.end()) { + transfered_inplace_vars->emplace_back(var_name); + } + + VLOG(3) << "Transform Variable " << var_name << " from " + << kernel_type_for_var << " to " << expected_kernel_key; + + if (new_scope == nullptr) { + new_scope = &scope.NewScope(); + } + + auto* trans_var = new_scope->Var(var_name); + Tensor out; + TransferData(expected_kernel_key, kernel_type_for_var, *tensor_in, &out); + SetTensorToVariable(*var, out, trans_var); + } + } + + return new_scope; +} proto::VarType::Type OperatorWithKernel::IndicateDataType( const ExecutionContext& ctx) const { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index b1d75d0d0ff3dccc67a1e833ccfe03a4cad8df39..1550d5df172f0599e1b42e7f1ccf51ac4dd1e0c3 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -384,6 +384,20 @@ class OperatorWithKernel : public OperatorBase { // same. proto::VarType::Type IndicateDataType(const ExecutionContext& ctx) const; void RunImpl(const Scope& scope, const platform::Place& place) const final; + + /** + * Transfer data from scope to a transfered scope. If there is no data need to + * be tranfered, it returns nullptr. + * + * * transfered_inplace_vars is a output vector. + */ + Scope* TryTransferData( + const Scope& scope, const OpKernelType& expected_kernel_key, + std::vector* transfered_inplace_vars) const; + + void TransferInplaceVarsBack(const Scope& scope, + const std::vector& inplace_vars, + const Scope& exec_scope) const; }; extern bool OpSupportGPU(const std::string& op_type);