diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 3560fabb424375a770432586fe7c8e51210b3d0c..d80bdcf15d798925c137460125964d3d7e65f67e 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -124,16 +124,10 @@ void OpHandleBase::RunAndRecordEvent(const std::function &callback) { #ifdef PADDLE_WITH_CUDA if (!events_.empty()) { // Use event std::function method = callback; - // NOTE(zcd): device context must be ordered here because RecordEvent - // will use a mutex to ensure the safe of multi-threads. - std::map ordered_ctxes; for (auto &p : dev_ctxes_) { - ordered_ctxes.emplace(p.second, p.first); - } - for (auto &p : ordered_ctxes) { method = [method, p, this]() { - static_cast(p.first)->RecordEvent( - events_.at(boost::get(p.second).device), + static_cast(p.second)->RecordEvent( + events_.at(boost::get(p.first).device), method); }; } diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index fbd90a3296bca92b097cab925b218b91e7f4752f..6aec178831161f8ac1306fc3ed72e3267ca3c7e5 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -13,9 +13,9 @@ // limitations under the License. #pragma once +#include #include #include - #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/macros.h" @@ -92,9 +92,7 @@ class OpHandleBase { std::vector inputs_; std::vector outputs_; - std::unordered_map - dev_ctxes_; + std::map dev_ctxes_; #ifdef PADDLE_WITH_CUDA std::unordered_map events_; diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h index a6ffb37313a88120bc9e8d5ce326f60aeebdff69..c0cd873a1d83fa8c2c7b7cd5acfaad9949bcff7d 100644 --- a/paddle/fluid/framework/details/reduce_and_gather.h +++ b/paddle/fluid/framework/details/reduce_and_gather.h @@ -54,8 +54,7 @@ struct ReduceLoDTensor { inline void GatherSelectedRows( const std::vector &src_selecte_rows_, const std::vector &in_places, - const std::unordered_map &dev_ctxes, + const std::map &dev_ctxes, const platform::Place &out_place, SelectedRows *dst_selecte_rows) { PADDLE_ENFORCE(!src_selecte_rows_.empty()); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 6c50ab2685c56bafe146c67fe2ef081ee4c55628..2cc26da013f59f5b7ee1747d57baca9c1c0efe2c 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_context.h" +#include #include #include #include @@ -35,7 +36,7 @@ DeviceContextPool::DeviceContextPool( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); using PtrType = std::unique_ptr; - std::unordered_set set; + std::set set; for (auto& p : places) { set.insert(p); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 292ffef1aef12732812b8c5b0020cad73b1d06fc..88e0383146c1adf2752a362091996bad9cfcce5e 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -27,12 +27,12 @@ limitations under the License. */ #include #endif +#include +#include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" -#include "glog/logging.h" - namespace paddle { namespace platform { @@ -201,9 +201,7 @@ class DeviceContextPool { private: static DeviceContextPool* pool; - std::unordered_map, PlaceHash> - device_contexts_; + std::map> device_contexts_; DISABLE_COPY_AND_ASSIGN(DeviceContextPool); }; diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h index ad54a878996bd36f2d714f6554b44c89dae3fd0c..e3ee504f3d042d6a99036e34507c4c8bee306750 100644 --- a/paddle/fluid/platform/place.h +++ b/paddle/fluid/platform/place.h @@ -30,6 +30,7 @@ struct CPUPlace { // needed for variant equality comparison inline bool operator==(const CPUPlace &) const { return true; } inline bool operator!=(const CPUPlace &) const { return false; } + inline bool operator<(const CPUPlace &) const { return false; } }; struct CUDAPlace { @@ -42,6 +43,7 @@ struct CUDAPlace { return device == o.device; } inline bool operator!=(const CUDAPlace &o) const { return !(*this == o); } + inline bool operator<(const CUDAPlace &o) const { return device < o.device; } int device; }; @@ -52,6 +54,7 @@ struct CUDAPinnedPlace { // needed for variant equality comparison inline bool operator==(const CUDAPinnedPlace &) const { return true; } inline bool operator!=(const CUDAPinnedPlace &) const { return false; } + inline bool operator<(const CUDAPinnedPlace &) const { return false; } }; struct IsCUDAPlace : public boost::static_visitor { @@ -89,18 +92,6 @@ bool is_cuda_pinned_place(const Place &); bool places_are_same_class(const Place &, const Place &); bool is_same_place(const Place &, const Place &); -struct PlaceHash { - std::size_t operator()(const Place &p) const { - constexpr size_t num_dev_bits = 4; - std::hash ihash; - size_t dev_id = 0; - if (is_gpu_place(p)) { - dev_id = boost::get(p).device; - } - return ihash(dev_id << num_dev_bits | p.which()); - } -}; - std::ostream &operator<<(std::ostream &, const Place &); template