// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/imperative/layer.h" #include #include #include #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/imperative/execution_context.h" #include "paddle/fluid/imperative/infer_shape_context.h" #include "paddle/fluid/imperative/infer_var_type_context.h" #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/prepared_operator.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { namespace imperative { using framework::Variable; void ThreadSafeNameSet::Insert(const std::string& name) { std::lock_guard guard(mtx_); set_.insert(name); } void ThreadSafeNameSet::Remove(const std::string& name) { std::lock_guard guard(mtx_); auto iter = set_.find(name); PADDLE_ENFORCE_EQ(iter != set_.end(), true, "%s does not exist", name); set_.erase(iter); } std::vector ThreadSafeNameSet::Names() const { std::lock_guard guard(mtx_); return std::vector(set_.begin(), set_.end()); } ThreadSafeNameSet VarBase::name_set_; std::vector VarBase::AliveVarNames() { return name_set_.Names(); } static framework::VariableNameMap CreateVarNameMap( const framework::OpInfo& op_info, const std::string& op_type, const NameVarBaseMap& varbase_map, bool is_input) { if (op_info.proto_ == nullptr) { framework::VariableNameMap result; for (auto& it : varbase_map) { auto& var_vector = it.second; std::vector args; args.reserve(var_vector.size()); for (auto& var_base : var_vector) { args.emplace_back(var_base->Name()); } result[it.first] = std::move(args); } return result; } framework::VariableNameMap result; for (auto& var : is_input ? op_info.Proto().inputs() : op_info.Proto().outputs()) { auto it = varbase_map.find(var.name()); if (it == varbase_map.end()) { PADDLE_ENFORCE_EQ( var.dispensable(), true, "Var: %s not dispensable and there are no such var in inputs", var.name()); result[var.name()] = {}; } else { auto& var_vector = it->second; std::vector args; args.reserve(var_vector.size()); for (auto& var_base : var_vector) { args.emplace_back(var_base->Name()); } result[var.name()] = std::move(args); } } return result; } static framework::RuntimeContext PrepareRuntimeContext( const NameVarBaseMap& ins, const NameVarBaseMap& outs) { framework::VariableValueMap inputs, outputs; for (auto& in_pair : ins) { auto& in_ctx = inputs[in_pair.first]; in_ctx.reserve(in_pair.second.size()); for (auto& in_var : in_pair.second) { in_ctx.emplace_back(in_var->MutableVar()); } } for (auto& out_pair : outs) { auto& out_ctx = outputs[out_pair.first]; out_ctx.reserve(out_pair.second.size()); for (auto& out_var : out_pair.second) { out_ctx.emplace_back(out_var->MutableVar()); } } return framework::RuntimeContext(std::move(inputs), std::move(outputs)); } template static std::string DebugString( const std::string& name, const std::vector>& vars) { std::stringstream ss; ss << name << "{"; for (size_t i = 0; i < vars.size(); ++i) { if (i > 0) ss << ", "; if (vars[i] == nullptr) { ss << "NULL"; continue; } ss << vars[i]->Name() << "["; const framework::Variable& var = vars[i]->Var(); if (!var.IsInitialized()) { ss << "NOT_INITED_VAR"; } else if (var.IsType()) { auto& tensor = var.Get(); ss << "LoDTensor<"; if (tensor.IsInitialized()) { ss << framework::DataTypeToString(tensor.type()) << ", "; ss << tensor.place() << ", "; ss << "(" << tensor.dims() << ")"; } else { ss << "NOT_INITED"; } ss << ">"; } else if (var.IsType()) { ss << "SelectedRows<"; auto& selected_rows = var.Get(); auto& tensor = selected_rows.value(); auto& rows = selected_rows.rows(); if (tensor.IsInitialized()) { ss << framework::DataTypeToString(tensor.type()) << ", "; ss << tensor.place() << ", "; ss << "height(" << selected_rows.height() << "), rows("; std::for_each(rows.cbegin(), rows.cend(), [&ss](const int64_t r) { ss << r << " "; }); ss << "), dims(" << tensor.dims() << ")"; } else { ss << "NOT_INITED"; } ss << ">"; } else { ss << "UNRESOLVED_TYPE"; } ss << "]"; } ss << "}"; return ss.str(); } template static std::string LayerDebugStringImpl(const std::string& op_type, const NameVarMap& ins, const NameVarMap& outs) { std::stringstream ss; ss << "Op(" << op_type << "): "; ss << "Inputs: "; size_t i = 0; for (auto& pair : ins) { if (i > 0) ss << ", "; ss << DebugString(pair.first, pair.second); ++i; } ss << ", Outputs: "; i = 0; for (auto& pair : outs) { if (i > 0) ss << ", "; ss << DebugString(pair.first, pair.second); ++i; } return ss.str(); } std::string LayerDebugString(const std::string& op_type, const NameVarMap& ins, const NameVarMap& outs) { return LayerDebugStringImpl(op_type, ins, outs); } std::string LayerDebugString(const std::string& op_type, const NameVarMap& ins, const NameVarMap& outs) { return LayerDebugStringImpl(op_type, ins, outs); } VarBase::VarBase(bool has_grad, const std::shared_ptr& var) : var_(var), grad_node_(var->GetGradNode()) { if (has_grad) { if (auto grad_var = var_->GetGradVar()) { grad_var_ = std::make_shared(false, grad_var); } else { grad_var_ = std::make_shared(false, GradVarName()); var_->SetGradVar(grad_var_->var_); } } if (IsDebugEnabled()) { VLOG(10) << "Construct VarBase: " << Name(); name_set_.Insert(Name()); } } size_t VarBase::GradOpNum() const { return grad_node_ ? grad_node_->size() : 0; } void VarBase::ClearGradient() { if (grad_var_) { if (grad_var_->Var().IsType()) { auto* grad_t = grad_var_->MutableVar()->GetMutable(); if (grad_t->mutable_value()->IsInitialized()) { grad_t->mutable_rows()->clear(); grad_t->mutable_value()->clear(); } } else { auto* grad_t = grad_var_->MutableVar()->GetMutable(); if (grad_t->IsInitialized()) { auto* dev_ctx = platform::DeviceContextPool::Instance().Get(grad_t->place()); operators::math::set_constant(*dev_ctx, grad_t, 0.0); } } } } std::shared_ptr VarBase::NewVarBase(const platform::Place& dst_place, const bool blocking) const { PADDLE_ENFORCE_EQ( Var().IsInitialized() && (Var().IsType() || Var().IsType()), true, platform::errors::InvalidArgument( "Variable is not initialized or Variable's type is not " "LoDTensor or SelectedRows when getting numpy tensor")); if (Var().IsType()) { auto& src_tensor = Var().Get(); // TODO(Jiabin): change this after move unique_name generator to CXX auto new_var = std::make_shared( true, Name() + std::to_string(copied_counter_++)); auto* dst_tensor = new_var->MutableVar()->GetMutable(); dst_tensor->set_lod(src_tensor.lod()); new_var->SetPersistable(Persistable()); new_var->SetDataType(DataType()); new_var->SetType(Type()); framework::TensorCopy(src_tensor, dst_place, dst_tensor); if (blocking) { platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); auto src_place = src_tensor.place(); if (!(src_place == dst_place)) { platform::DeviceContextPool::Instance().Get(src_place)->Wait(); } } if (platform::is_gpu_place(dst_place)) { VLOG(3) << "copy tensor " << Name() << " from gpu"; } return new_var; } else { auto& src_selected_rows = Var().Get(); auto new_var = std::make_shared( false, "Itmp" + std::to_string(copied_counter_++)); new_var->SetType(framework::proto::VarType::SELECTED_ROWS); auto* dst_selected_rows = new_var->MutableVar()->GetMutable(); framework::TensorCopy(src_selected_rows.value(), dst_place, dst_selected_rows->mutable_value()); if (blocking) { platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); auto src_place = src_selected_rows.place(); if (!(src_place == dst_place)) { platform::DeviceContextPool::Instance().Get(src_place)->Wait(); } } dst_selected_rows->set_height(src_selected_rows.height()); dst_selected_rows->set_rows(src_selected_rows.rows()); if (platform::is_gpu_place(dst_place)) { VLOG(3) << "copy selected rows " << Name() << " from gpu"; } return new_var; } } void OpBase::SetType(const std::string& type) { op_ = framework::OpRegistry::CreateOp(type, {}, {}, {}, false); } void OpBase::ClearBackwardTrace() { ins_.clear(); outs_.clear(); } template static void OpBaseRunImpl(const framework::OperatorBase& op, const NameVarMap& ins, const NameVarMap& outs, const framework::AttributeMap& attrs, const platform::Place& place) { auto* op_kernel = dynamic_cast(&op); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); auto& info = op.Info(); if (info.infer_var_type_) { RuntimeInferVarTypeContext infer_var_type_ctx(ins, outs, attrs); info.infer_var_type_(&infer_var_type_ctx); } // Initialize output var type for (auto& var_pair : outs) { for (auto& var : var_pair.second) { if (var) { InitializeVariable(var->MutableVar(), var->Type()); } } } // VLOG(3) << "Running Op " << op.Type(); VLOG(5) << LayerDebugString(op.Type(), ins, outs); auto prepared_op = PreparedOp::Prepare(ins, outs, *op_kernel, place, attrs); prepared_op.Run(ins, outs, attrs); VLOG(4) << LayerDebugString(op.Type(), ins, outs); } void OpBase::Run(const framework::OperatorBase& op, const NameVarMap& ins, const NameVarMap& outs, const framework::AttributeMap& attrs, const platform::Place& place) { OpBaseRunImpl(op, ins, outs, attrs, place); } void OpBase::Run(const framework::OperatorBase& op, const NameVarMap& ins, const NameVarMap& outs, const framework::AttributeMap& attrs, const platform::Place& place) { OpBaseRunImpl(op, ins, outs, attrs, place); } static void ClearNoNeedBufferInputs(OpBase* op) { auto& inferer = op->Info().NoNeedBufferVarsInferer(); if (!inferer) return; auto* ins = op->GetMutableInsMap(); const auto& no_need_buffer_slots = inferer(*ins, op->GetOutsMap(), op->Attrs()); if (no_need_buffer_slots.empty()) return; for (auto& slot : no_need_buffer_slots) { auto iter = ins->find(slot); if (iter == ins->end()) continue; VLOG(2) << "Clear data buffer of " << slot << " in " << op->Type(); PADDLE_ENFORCE_EQ( iter->second.IsGrad(), false, platform::errors::InvalidArgument( "Only forward variable buffers can be clear, this may be a bug")); for (auto& each_var : *(iter->second.MutableVarList())) { if (!each_var) continue; auto& var = each_var->Var(); PADDLE_ENFORCE_EQ(var.IsType(), true, platform::errors::PermissionDenied( "NoNeedBufferVars only support LoDTensor")); // TODO(zjl): support higher order derivatives auto new_var = new VariableWrapper(each_var->Name()); auto* new_tensor = new_var->MutableVar()->GetMutable(); auto& old_tensor = var.Get(); new_tensor->Resize(old_tensor.dims()); new_tensor->set_lod(old_tensor.lod()); each_var.reset(new_var); } } } std::shared_ptr CreateGradOpNode( const framework::OperatorBase& op, const NameVarBaseMap& ins, const NameVarBaseMap& outs, const framework::AttributeMap& attrs, const platform::Place& place) { const auto& info = op.Info(); if (!info.dygraph_grad_op_maker_) { return nullptr; } auto grad_node = info.dygraph_grad_op_maker_(op.Type(), ins, outs, attrs); if (grad_node && !grad_node->empty()) { for (auto& op : *grad_node) { op.SetId(OpBase::GenerateUniqueId()); op.SetPlace(place); ClearNoNeedBufferInputs(&op); } return grad_node; } else { return nullptr; } } } // namespace imperative } // namespace paddle