From 5ada0329743e035e9c07a909595d7b488a5d1bda Mon Sep 17 00:00:00 2001 From: 123malin Date: Fri, 30 Apr 2021 12:06:22 +0800 Subject: [PATCH] test=develop, optimize index_sampler (#32663) --- .../index_dataset/index_sampler.cc | 27 +++---------------- .../distributed/index_dataset/index_sampler.h | 20 ++++++++++++++ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.cc b/paddle/fluid/distributed/index_dataset/index_sampler.cc index 58f85d98fb0..3e573bbdd2d 100644 --- a/paddle/fluid/distributed/index_dataset/index_sampler.cc +++ b/paddle/fluid/distributed/index_dataset/index_sampler.cc @@ -13,13 +13,10 @@ // limitations under the License. #include "paddle/fluid/distributed/index_dataset/index_sampler.h" -#include "paddle/fluid/operators/math/sampler.h" namespace paddle { namespace distributed { -using Sampler = paddle::operators::math::Sampler; - std::vector> LayerWiseSampler::sample( const std::vector>& user_inputs, const std::vector& target_ids, bool with_hierarchy) { @@ -30,22 +27,7 @@ std::vector> LayerWiseSampler::sample( std::vector(user_feature_num + 2)); auto max_layer = tree_->Height(); - std::vector sampler_vec(max_layer - start_sample_layer_); - std::vector> layer_ids(max_layer - - start_sample_layer_); - - auto layer_index = max_layer - 1; size_t idx = 0; - while (layer_index >= start_sample_layer_) { - auto layer_codes = tree_->GetLayerCodes(layer_index); - layer_ids[idx] = tree_->GetNodes(layer_codes); - sampler_vec[idx] = new paddle::operators::math::UniformSampler( - layer_ids[idx].size() - 1, seed_); - layer_index--; - idx++; - } - - idx = 0; for (size_t i = 0; i < input_num; i++) { auto travel_codes = tree_->GetTravelCodes(target_ids[i], start_sample_layer_); @@ -76,18 +58,15 @@ std::vector> LayerWiseSampler::sample( for (int idx_offset = 0; idx_offset < layer_counts_[j]; idx_offset++) { int sample_res = 0; do { - sample_res = sampler_vec[j]->Sample(); - } while (layer_ids[j][sample_res].id() == travel_path[j].id()); + sample_res = sampler_vec_[j]->Sample(); + } while (layer_ids_[j][sample_res].id() == travel_path[j].id()); outputs[idx + idx_offset][user_feature_num] = - layer_ids[j][sample_res].id(); + layer_ids_[j][sample_res].id(); outputs[idx + idx_offset][user_feature_num + 1] = 0; } idx += layer_counts_[j]; } } - for (size_t i = 0; i < sampler_vec.size(); i++) { - delete sampler_vec[i]; - } return outputs; } diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.h b/paddle/fluid/distributed/index_dataset/index_sampler.h index 66882bedc9b..8813421446a 100644 --- a/paddle/fluid/distributed/index_dataset/index_sampler.h +++ b/paddle/fluid/distributed/index_dataset/index_sampler.h @@ -16,6 +16,7 @@ #include #include "paddle/fluid/distributed/index_dataset/index_wrapper.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/math/sampler.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -83,6 +84,23 @@ class LayerWiseSampler : public IndexSampler { } reverse(layer_counts_.begin(), layer_counts_.end()); VLOG(3) << "sample counts sum: " << layer_counts_sum_; + + auto max_layer = tree_->Height(); + sampler_vec_.clear(); + layer_ids_.clear(); + + auto layer_index = max_layer - 1; + size_t idx = 0; + while (layer_index >= start_sample_layer_) { + auto layer_codes = tree_->GetLayerCodes(layer_index); + layer_ids_.push_back(tree_->GetNodes(layer_codes)); + auto sampler_temp = + std::make_shared( + layer_ids_[idx].size() - 1, seed_); + sampler_vec_.push_back(sampler_temp); + layer_index--; + idx++; + } } std::vector> sample( const std::vector>& user_inputs, @@ -94,6 +112,8 @@ class LayerWiseSampler : public IndexSampler { std::shared_ptr tree_{nullptr}; int seed_{0}; int start_sample_layer_{1}; + std::vector> sampler_vec_; + std::vector> layer_ids_; }; } // end namespace distributed -- GitLab