From 5cfc40dea862d963dd0d2625d780cae4d33cda60 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Thu, 24 Jan 2019 14:11:00 +0800 Subject: [PATCH] nce add check sample lables, test=develop (#15463) * nce add check sample lables, test=develop --- paddle/fluid/operators/math/sampler.cc | 10 +++++++++- paddle/fluid/operators/math/sampler.h | 1 + paddle/fluid/operators/nce_op.h | 5 +++++ python/paddle/fluid/layers/nn.py | 8 ++++---- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/math/sampler.cc b/paddle/fluid/operators/math/sampler.cc index 2708f3bcd8f..238d9f29050 100644 --- a/paddle/fluid/operators/math/sampler.cc +++ b/paddle/fluid/operators/math/sampler.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/sampler.h" +#include #include #include #include @@ -77,7 +78,14 @@ int64_t CustomSampler::Sample() const { auto index = (*int_dist_)(*random_engine_); auto p = (*real_dist_)(*random_engine_); if (p > alias_probs_[index]) { - return alias_[index]; + int alias = alias_[index]; + + if (alias == exceptional_val) { + LOG(WARNING) << "WARNING: CustomSampler get alias " << exceptional_val; + return index; + } + + return alias; } else { return index; } diff --git a/paddle/fluid/operators/math/sampler.h b/paddle/fluid/operators/math/sampler.h index 98e0b898a50..3fa5a7ae336 100644 --- a/paddle/fluid/operators/math/sampler.h +++ b/paddle/fluid/operators/math/sampler.h @@ -116,6 +116,7 @@ class CustomSampler : public Sampler { const float* alias_probs_; const int* alias_; const float* probs_; + const int exceptional_val = -1; std::shared_ptr random_engine_; std::shared_ptr> real_dist_; std::shared_ptr> int_dist_; diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index 2c97eef096e..3e48b67a570 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -119,6 +119,11 @@ class NCEKernel : public framework::OpKernel { PrepareSamples(context, sampler); auto sample_labels = context.Output("SampleLabels"); const int64_t *sample_labels_data = sample_labels->data(); + + for (int x = 0; x < sample_labels->numel(); x++) { + PADDLE_ENFORCE_GE(sample_labels_data[x], 0, "nce sample label %d", x); + } + auto sample_out = context.Output("SampleLogits"); T *sample_out_data = sample_out->mutable_data(context.GetPlace()); auto label = context.Input("Label"); diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e2a4c059263..0116eb10d4a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5146,9 +5146,9 @@ def nce(input, littles = [] for i in range(custom_dist_len): normal_prob = custom_dist[i] * custom_dist_len - if normal_prob - 1.0 > 1e-4: + if normal_prob - 1.0 > 0: bigs.append((i, normal_prob)) - elif 1.0 - normal_prob > 1e-4: + elif 1.0 - normal_prob > 0: littles.append((i, normal_prob)) else: alias_probs_[i] = normal_prob @@ -5164,9 +5164,9 @@ def nce(input, alias_probs_[little[0]] = little[1] alias_[little[0]] = big_idx big_left = big[1] + little[1] - 1 - if big_left - 1.0 > 1e-4: + if big_left - 1.0 > 0: bigs.append((big_idx, big_left)) - elif 1.0 - big_left > 1e-4: + elif 1.0 - big_left > 0: littles.append((big_idx, big_left)) else: alias_probs_[big_idx] = big_left -- GitLab