diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index efdd3b3cabbdbdec7d311c803ff72339a00a7dd2..9e0e4f0b8cbe0fd1abdc5080308a662f6e7fcd12 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ */ #include "postgres.h" +#include #include #include "access/hash.h" @@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, { int tupsize; double inner_rel_bytes; - double hash_table_bytes; + long hash_table_bytes; + double dtmp; int nbatch; int nbuckets; int totalbuckets; @@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, inner_rel_bytes = ntuples * tupsize * FUDGE_FAC; /* - * Target hashtable size is SortMem kilobytes, but not less than - * sqrt(estimated inner rel size), so as to avoid horrible - * performance. + * Target in-memory hashtable size is SortMem kilobytes. */ - hash_table_bytes = sqrt(inner_rel_bytes); - if (hash_table_bytes < (SortMem * 1024L)) - hash_table_bytes = SortMem * 1024L; + hash_table_bytes = SortMem * 1024L; /* * Count the number of hash buckets we want for the whole relation, * for an average bucket load of NTUP_PER_BUCKET (per virtual - * bucket!). + * bucket!). It has to fit in an int, however. */ - totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); + dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); + if (dtmp < INT_MAX) + totalbuckets = (int) dtmp; + else + totalbuckets = INT_MAX; + if (totalbuckets <= 0) + totalbuckets = 1; /* * Count the number of buckets we think will actually fit in the @@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, * that nbatch doesn't have to have anything to do with the ratio * totalbuckets/nbuckets; in fact, it is the number of groups we * will use for the part of the data that doesn't fall into the - * first nbuckets hash buckets. + * first nbuckets hash buckets. We try to set it to make all the + * batches the same size. But we have to keep nbatch small + * enough to avoid integer overflow in ExecHashJoinGetBatch(). */ - nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) / - hash_table_bytes); + dtmp = ceil((inner_rel_bytes - hash_table_bytes) / + hash_table_bytes); + if (dtmp < INT_MAX / totalbuckets) + nbatch = (int) dtmp; + else + nbatch = INT_MAX / totalbuckets; if (nbatch <= 0) nbatch = 1; }