// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/details/ssa_graph_builder.h" namespace paddle { namespace platform { class NCCLContextMap; } namespace framework { class Scope; namespace details { class MultiDevSSAGraphBuilder : public SSAGraphBuilder { public: #ifdef PADDLE_WITH_CUDA MultiDevSSAGraphBuilder(const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, const std::vector &local_scopes, platform::NCCLContextMap *nccl_ctxs, const BuildStrategy &strategy); #else MultiDevSSAGraphBuilder(const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, const std::vector &local_scopes, const BuildStrategy &strategy); #endif std::unique_ptr Build(const ProgramDesc &program) const override; int GetRemoteVarDeviceId(const std::string &var_name) const override { auto got = remote_vars_devices_.find(var_name); if (got != remote_vars_devices_.end()) { return got->second; } return -1; } private: void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op, size_t place_id) const; private: std::string loss_var_name_; const std::vector &places_; const std::vector &local_scopes_; std::unordered_set grad_names_; #ifdef PADDLE_WITH_CUDA platform::NCCLContextMap *nccl_ctxs_; #endif bool IsScaleLossOp(const OpDesc &op) const; void CreateRPCOp(SSAGraph *result, const OpDesc &op, int place_id) const; void CreateDistTrainOp(SSAGraph *result, const OpDesc &op, int place_id) const; /** * Is this operator as the end-point operator before/after send operator. */ bool IsDistTrainOp(const OpDesc &op, const std::vector &send_vars, const std::vector &recv_vars) const; std::vector FindDistTrainSendVars( const ProgramDesc &program) const; std::vector FindDistTrainRecvVars( const ProgramDesc &program) const; void ConnectOp(SSAGraph *result, OpHandleBase *op, const std::string &prev_op_name) const; void CreateComputationalOps(SSAGraph *result, const OpDesc &op, size_t num_places) const; void CreateScaleLossGradOp(SSAGraph *result) const; VarHandle *CreateReduceOp(SSAGraph *result, const std::string &og, int dst_dev_id) const; void CreateComputationalOp(SSAGraph *result, const OpDesc &op, int dev_id) const; bool IsParameterGradientOnce( const std::string &og, std::unordered_set *og_has_been_broadcast) const; int GetOpDeviceID( const std::vector> &var_name_on_devices, const OpDesc &op) const; void InsertNCCLAllReduceOp(SSAGraph *result, const std::string &og) const; void CreateBroadcastOp(SSAGraph *result, const std::string &p_name, size_t src_dev_id) const; bool IsSparseGradient( const std::unordered_map &all_vars, const std::string &og) const; private: BuildStrategy strategy_; mutable std::unordered_map remote_vars_devices_; }; } // namespace details } // namespace framework } // namespace paddle