提交 7eb29f26 编写于 作者: Y Yu Yang 提交者: emailweixu

Try to fix MultinomialSampler (#102)

* Also refine unittest to multiple iteration to prevent luckily random number.
上级 8e957df4
......@@ -19,7 +19,7 @@ namespace paddle {
MultinomialSampler::MultinomialSampler(const real* prob, int size)
: rand_(0.0, size) {
intervals_.reserve(size + 1);
intervals_.resize(size + 1);
double sum = 0;
for (int i = 0; i < size; ++i) {
sum += prob[i];
......@@ -50,12 +50,13 @@ MultinomialSampler::MultinomialSampler(const real* prob, int size)
int bigPos = nextBigPos(0);
auto fillIntervals = [&]() {
while (bigPos < size && smallPos < size) {
while (bigPos < size) {
while (intervals_[bigPos].thresh > 1 && smallPos < size) {
intervals_[smallPos].otherId = bigPos;
intervals_[bigPos].thresh -= 1 - intervals_[smallPos].thresh;
smallPos = nextSmallPos(smallPos + 1);
}
if (smallPos >= size) break;
bigPos = nextBigPos(bigPos + 1);
// If intervals_[bigPos].thresh < 1, it becomes a small interval
}
......
......@@ -41,39 +41,42 @@ public:
TEST(MultinomialSampler, gen) {
int numGrids = 1024 * 1024;
int size = 1024 * 4;
default_random_engine reng;
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}
CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;
vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
for (size_t iter=0; iter < 256; ++iter) {
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}
CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;
vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
}
++counts[ret];
}
++counts[ret];
}
}
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << "i=" << i;
break;
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << iter;
break;
}
}
}
}
......@@ -135,6 +138,7 @@ void benchmarkRandom() {
LOG(INFO) << "sum1=" << sum1;
}
int main(int argc, char** argv) {
initMain(argc, argv);
testing::InitGoogleTest(&argc, argv);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册