diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_resource.cc b/paddle/fluid/framework/fleet/heter_ps/heter_resource.cc index ccdb6c5cdd64e478bea611e3ca23345b1fced23d..cad7559af5742f9accf640cf7aa6a95fb0f17d96 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_resource.cc +++ b/paddle/fluid/framework/fleet/heter_ps/heter_resource.cc @@ -104,6 +104,13 @@ int HeterPsResource::get_index_by_devid(int devid) { int HeterPsResource::total_gpu() { return dev_ids_.size(); } +void HeterPsResource::set_multi_mf(int multi_mf_dim, int max_mf_dim) { + multi_mf_dim_ = multi_mf_dim; + max_mf_dim_ = max_mf_dim; + VLOG(3) << "heter resource set mf dim: " << multi_mf_dim_ + << " max_mf_dim_: " << max_mf_dim_; +} + } // end namespace framework } // end namespace paddle #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_resource.h b/paddle/fluid/framework/fleet/heter_ps/heter_resource.h index 7bc52e52e6887d6fb8f00e908e19a42b4e38efd3..19df8cc70f50efd0130ea68390ce9fd374cfef46 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_resource.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_resource.h @@ -56,6 +56,7 @@ class HeterPsResource { int total_gpu(); int get_index_by_devid(int devid); int dev_id(int num); + void set_multi_mf(int multi_mf_dim, int max_mf_dim); gpuStream_t local_stream(int gpu_num, int stream_num); gpuStream_t remote_stream(int gpu_num, int stream_num); gpuStream_t comm_stream(int gpu_num, int stream_num); @@ -63,6 +64,8 @@ class HeterPsResource { std::vector> resources_; std::vector dev_ids_; std::map devid_2_index_; + int multi_mf_dim_{0}; + int max_mf_dim_{0}; }; } // end namespace framework diff --git a/paddle/fluid/framework/fleet/heter_ps/mem_pool.h b/paddle/fluid/framework/fleet/heter_ps/mem_pool.h index cfaf96ebbe88d942709cfa56071449121e525c7f..9189902c28ffb4796b970baab858d2c99918540d 100644 --- a/paddle/fluid/framework/fleet/heter_ps/mem_pool.h +++ b/paddle/fluid/framework/fleet/heter_ps/mem_pool.h @@ -87,7 +87,7 @@ class HBMMemoryPool : public managed { out << "show: " << x->show << " clk: " << x->clk << " slot: " << x->slot << " lr: " << x->lr << " mf_dim: " << x->mf_size << " mf_size: " << x->mf_size << " mf:"; - for (int i = 0; i < x->mf_dim + 1; ++i) { + for (int i = 0; i < x->mf_size + 1; ++i) { out << " " << x->mf[i]; } out << "\n"; diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.h b/paddle/fluid/framework/fleet/ps_gpu_wrapper.h index c163c2de1101919d7793996cf4b9df4d74258cbe..49fc61b418c147e918ffea1d9495412bf5012504 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.h @@ -34,6 +34,7 @@ limitations under the License. */ #include "paddle/fluid/framework/fleet/heter_context.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_resource.h" +#include "paddle/fluid/framework/fleet/heter_ps/mem_pool.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable_helper.h" @@ -48,6 +49,9 @@ limitations under the License. */ namespace paddle { namespace framework { +#define TYPEALIGN(ALIGNVAL, LEN) \ + (((uint64_t)(LEN) + ((ALIGNVAL)-1)) & ~((uint64_t)((ALIGNVAL)-1))) + class PSGPUWrapper { public: virtual ~PSGPUWrapper() { delete HeterPs_; } @@ -261,6 +265,44 @@ class PSGPUWrapper { slot_vector_ = slot_vector; } + void SetSlotOffsetVector(const std::vector& slot_offset_vector) { + slot_offset_vector_ = slot_offset_vector; + } + + void SetSlotDimVector(const std::vector& slot_mf_dim_vector) { + slot_mf_dim_vector_ = slot_mf_dim_vector; + assert(slot_mf_dim_vector_.size() == slot_vector_.size()); + for (size_t i = 0; i < slot_mf_dim_vector.size(); i++) { + slot_dim_map_[slot_vector_[i]] = slot_mf_dim_vector_[i]; + } + + std::unordered_set dims_set; + for (auto& it : slot_dim_map_) { + dims_set.insert(it.second); + } + size_t num_of_dim = dims_set.size(); + index_dim_vec_.resize(num_of_dim); + index_dim_vec_.assign(dims_set.begin(), dims_set.end()); + std::sort(index_dim_vec_.begin(), index_dim_vec_.end()); + std::unordered_map dim_index_map; + for (size_t i = 0; i < num_of_dim; i++) { + dim_index_map[index_dim_vec_[i]] = i; + } + hbm_pools_.resize(resource_->total_gpu() * num_of_dim); + mem_pools_.resize(resource_->total_gpu() * num_of_dim); + max_mf_dim_ = index_dim_vec_.back(); + multi_mf_dim_ = (dim_index_map.size() >= 1) ? dim_index_map.size() : 0; + resource_->set_multi_mf(multi_mf_dim_, max_mf_dim_); + slot_index_vec_.resize(slot_mf_dim_vector_.size()); + for (size_t i = 0; i < slot_index_vec_.size(); i++) { + slot_index_vec_[i] = dim_index_map[slot_mf_dim_vector_[i]]; + } + val_type_size_ = + TYPEALIGN(8, sizeof(FeatureValue) + sizeof(float) * (max_mf_dim_ + 1)); + grad_type_size_ = + TYPEALIGN(8, sizeof(FeaturePushValue) + (max_mf_dim_ * sizeof(float))); + } + void ShowOneTable(int index) { HeterPs_->show_one_table(index); } private: @@ -274,6 +316,15 @@ class PSGPUWrapper { std::shared_ptr resource_; int32_t sleep_seconds_before_fail_exit_; std::vector slot_vector_; + std::vector slot_offset_vector_; + std::vector slot_mf_dim_vector_; + std::unordered_map slot_dim_map_; + std::vector slot_index_vec_; + std::vector index_dim_vec_; + int multi_mf_dim_{0}; + int max_mf_dim_{0}; + size_t val_type_size_{0}; + size_t grad_type_size_{0}; int multi_node_{0}; int node_size_; uint64_t table_id_; @@ -291,6 +342,10 @@ class PSGPUWrapper { int month_; int day_; + std::vector mem_pools_; + std::vector hbm_pools_; // in multi mfdim, one table need hbm + // pools of totol dims number + std::shared_ptr< paddle::framework::ChannelObject>> data_ready_channel_ = diff --git a/paddle/fluid/pybind/ps_gpu_wrapper_py.cc b/paddle/fluid/pybind/ps_gpu_wrapper_py.cc index 6e98a9479fa26a1d0ace170785685e4a27d2ca0b..629dc2c4037e73b3dfd76126e14bb34c985e38ce 100644 --- a/paddle/fluid/pybind/ps_gpu_wrapper_py.cc +++ b/paddle/fluid/pybind/ps_gpu_wrapper_py.cc @@ -39,6 +39,11 @@ void BindPSGPUWrapper(py::module* m) { .def(py::init([]() { return framework::PSGPUWrapper::GetInstance(); })) .def("set_slot_vector", &framework::PSGPUWrapper::SetSlotVector, py::call_guard()) + .def("set_slot_dim_vector", &framework::PSGPUWrapper::SetSlotDimVector, + py::call_guard()) + .def("set_slot_offset_vector", + &framework::PSGPUWrapper::SetSlotOffsetVector, + py::call_guard()) .def("init_GPU_server", &framework::PSGPUWrapper::InitializeGPUServer, py::call_guard()) .def("set_date", &framework::PSGPUWrapper::SetDate,