diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 204bd8e2693351457505478f4d7e43b4f2f0b839..12d2bc9bf99b856d2308f842562dcb61bb07adfc 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.115 1999/07/19 07:07:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.116 1999/08/01 04:54:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -78,7 +78,7 @@ static void vc_vacpage(Page page, VPageDescr vpd); static void vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples); static void vc_scanoneind(Relation indrel, int num_tuples); static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple); -static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len); +static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len); static void vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats); static void vc_delhilowstats(Oid relid, int attcnt, int *attnums); static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl); @@ -473,9 +473,13 @@ vc_vacone(Oid relid, bool analyze, List *va_cols) { pgopform = (Form_pg_operator) GETSTRUCT(func_operator); fmgr_info(pgopform->oprcode, &(stats->f_cmplt)); + stats->op_cmplt = oprid(func_operator); } else + { stats->f_cmplt.fn_addr = NULL; + stats->op_cmplt = InvalidOid; + } func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true); if (func_operator != NULL) @@ -2200,8 +2204,8 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple) { swapDatum(stats->guess1, stats->guess2); swapInt(stats->guess1_len, stats->guess2_len); - stats->guess1_cnt = stats->guess2_hits; swapLong(stats->guess1_hits, stats->guess2_hits); + stats->guess1_cnt = stats->guess1_hits; } if (stats->guess1_cnt > stats->best_cnt) { @@ -2227,7 +2231,7 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple) * */ static void -vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len) +vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len) { if (attr->attbyval && attr->attlen != -1) *bucket = value; @@ -2340,13 +2344,14 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * selratio = 0; else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt) { + /* exact result when there are just 1 or 2 values... */ double min_cnt_d = stats->min_cnt, max_cnt_d = stats->max_cnt, null_cnt_d = stats->null_cnt, - nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */ + nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */ selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / - (nonnullcnt_d + null_cnt_d) / (nonnullcnt_d + null_cnt_d); + (nonnull_cnt_d + null_cnt_d) / (nonnull_cnt_d + null_cnt_d); } else { @@ -2359,7 +2364,9 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * */ selratio = (most * most + 0.20 * most * (total - most)) / total / total; } - if (selratio > 1.0) + if (selratio < 0.0) + selratio = 0.0; + else if (selratio > 1.0) selratio = 1.0; attp->attdisbursion = selratio; @@ -2375,13 +2382,22 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * * doing system relations, especially pg_statistic is a * problem */ - if (VacAttrStatsLtGtValid(stats) && stats->initialized /* && - * !IsSystemRelationName( - * - pgcform->relname.data) */ ) + if (VacAttrStatsLtGtValid(stats) && stats->initialized + /* && !IsSystemRelationName(pgcform->relname.data) + */ ) { + float32data nullratio; + float32data bestratio; FmgrInfo out_function; char *out_string; + double best_cnt_d = stats->best_cnt, + null_cnt_d = stats->null_cnt, + nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */ + + nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d); + bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d); + + fmgr_info(stats->outfunc, &out_function); for (i = 0; i < Natts_pg_statistic; ++i) nulls[i] = ' '; @@ -2391,26 +2407,34 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * * ---------------- */ i = 0; - values[i++] = (Datum) relid; /* 1 */ - values[i++] = (Datum) attp->attnum; /* 2 */ - values[i++] = (Datum) InvalidOid; /* 3 */ - fmgr_info(stats->outfunc, &out_function); - out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid); - values[i++] = (Datum) fmgr(F_TEXTIN, out_string); + values[i++] = (Datum) relid; /* starelid */ + values[i++] = (Datum) attp->attnum; /* staattnum */ + values[i++] = (Datum) stats->op_cmplt; /* staop */ + /* hack: this code knows float4 is pass-by-ref */ + values[i++] = PointerGetDatum(&nullratio); /* stanullfrac */ + values[i++] = PointerGetDatum(&bestratio); /* stacommonfrac */ + out_string = (*fmgr_faddr(&out_function)) (stats->best, stats->attr->atttypid, stats->attr->atttypmod); + values[i++] = PointerGetDatum(textin(out_string)); /* stacommonval */ pfree(out_string); - out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid); - values[i++] = (Datum) fmgr(F_TEXTIN, out_string); + out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid, stats->attr->atttypmod); + values[i++] = PointerGetDatum(textin(out_string)); /* staloval */ + pfree(out_string); + out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid, stats->attr->atttypmod); + values[i++] = PointerGetDatum(textin(out_string)); /* stahival */ pfree(out_string); stup = heap_formtuple(sd->rd_att, values, nulls); /* ---------------- - * insert the tuple in the relation and get the tuple's oid. + * insert the tuple in the relation. * ---------------- */ heap_insert(sd, stup); - pfree(DatumGetPointer(values[3])); - pfree(DatumGetPointer(values[4])); + + /* release allocated space */ + pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval-1])); + pfree(DatumGetPointer(values[Anum_pg_statistic_staloval-1])); + pfree(DatumGetPointer(values[Anum_pg_statistic_stahival-1])); pfree(stup); } } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index db78c48525649537e49dea428040a276a08c8182..0b6afc814b6ed849811f48cf10309e6dedd6ecd6 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6,13 +6,11 @@ * These routines are registered in the operator catalog in the * "oprrest" and "oprjoin" attributes. * - * XXX check all the functions--I suspect them to be 1-based. - * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.35 1999/07/17 20:17:59 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.36 1999/08/01 04:54:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +19,10 @@ #include "access/heapam.h" #include "catalog/catname.h" +#include "catalog/pg_operator.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_type.h" +#include "parser/parse_oper.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -29,24 +30,35 @@ /* N is not a valid var/constant or relation id */ #define NONVALUE(N) ((N) == -1) -/* - * generalize the test for functional index selectivity request - */ -#define FunctionalSelectivity(nIndKeys,attNum) (attNum==InvalidAttrNumber) +/* are we looking at a functional index selectivity request? */ +#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber) -static float32data getattdisbursion(Oid relid, AttrNumber attnum); -static void gethilokey(Oid relid, AttrNumber attnum, Oid opid, - char **high, char **low); +/* default selectivity estimate for inequalities such as "A < b" */ +#define DEFAULT_INEQ_SEL (1.0 / 3.0) + +static void getattproperties(Oid relid, AttrNumber attnum, + Oid *typid, + int *typlen, + bool *typbyval, + int32 *typmod); +static bool getattstatistics(Oid relid, AttrNumber attnum, + Oid typid, int32 typmod, + double *nullfrac, + double *commonfrac, + Datum *commonval, + Datum *loval, + Datum *hival); +static double getattdisbursion(Oid relid, AttrNumber attnum); /* - * eqsel - Selectivity of "=" for any data type. + * eqsel - Selectivity of "=" for any data types. */ float64 eqsel(Oid opid, Oid relid, AttrNumber attno, - char *value, + Datum value, int32 flag) { float64 result; @@ -55,18 +67,124 @@ eqsel(Oid opid, if (NONVALUE(attno) || NONVALUE(relid)) *result = 0.1; else - *result = (float64data) getattdisbursion(relid, (int) attno); + { + Oid typid; + int typlen; + bool typbyval; + int32 typmod; + double nullfrac; + double commonfrac; + Datum commonval; + double selec; + + /* get info about the attribute */ + getattproperties(relid, attno, + &typid, &typlen, &typbyval, &typmod); + + if (getattstatistics(relid, attno, typid, typmod, + &nullfrac, &commonfrac, &commonval, + NULL, NULL)) + { + if (flag & SEL_CONSTANT) + { + /* Is the constant the same as the most common value? */ + HeapTuple oprtuple; + Oid ltype, + rtype; + Operator func_operator; + bool mostcommon = false; + + /* get left and right datatypes of the operator */ + oprtuple = get_operator_tuple(opid); + if (! HeapTupleIsValid(oprtuple)) + elog(ERROR, "eqsel: no tuple for operator %u", opid); + ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft; + rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright; + + /* and find appropriate equality operator (no, it ain't + * necessarily opid itself...) + */ + func_operator = oper("=", ltype, rtype, true); + + if (func_operator != NULL) + { + RegProcedure eqproc = ((Form_pg_operator) GETSTRUCT(func_operator))->oprcode; + if (flag & SEL_RIGHT) /* given value on the right? */ + mostcommon = (bool) + DatumGetUInt8(fmgr(eqproc, commonval, value)); + else + mostcommon = (bool) + DatumGetUInt8(fmgr(eqproc, value, commonval)); + } + + if (mostcommon) + { + /* Search is for the most common value. We know the + * selectivity exactly (or as exactly as VACUUM could + * calculate it, anyway). + */ + selec = commonfrac; + } + else + { + /* Comparison is against a constant that is neither the + * most common value nor null. Its selectivity cannot + * be more than this: + */ + selec = 1.0 - commonfrac - nullfrac; + if (selec > commonfrac) + selec = commonfrac; + /* and in fact it's probably less, so apply a fudge + * factor. + */ + selec *= 0.5; + } + } + else + { + /* Search is for a value that we do not know a priori, + * but we will assume it is not NULL. Selectivity + * cannot be more than this: + */ + selec = 1.0 - nullfrac; + if (selec > commonfrac) + selec = commonfrac; + /* and in fact it's probably less, so apply a fudge + * factor. + */ + selec *= 0.5; + } + + /* result should be in range, but make sure... */ + if (selec < 0.0) + selec = 0.0; + else if (selec > 1.0) + selec = 1.0; + + if (! typbyval) + pfree(DatumGetPointer(commonval)); + } + else + { + /* No VACUUM ANALYZE stats available, so make a guess using + * the disbursion stat (if we have that, which is unlikely...) + */ + selec = getattdisbursion(relid, attno); + } + + *result = (float64data) selec; + } return result; } /* - * neqsel - Selectivity of "!=" for any data type. + * neqsel - Selectivity of "!=" for any data types. */ float64 neqsel(Oid opid, Oid relid, AttrNumber attno, - char *value, + Datum value, int32 flag) { float64 result; @@ -77,96 +195,164 @@ neqsel(Oid opid, } /* - * intltsel - Selectivity of "<" for integers. + * intltsel - Selectivity of "<" (also "<=") for integers. * Should work for both longs and shorts. */ float64 intltsel(Oid opid, Oid relid, AttrNumber attno, - int32 value, + Datum value, int32 flag) { float64 result; - char *highchar, - *lowchar; - long val, - high, - low, - top, - bottom; result = (float64) palloc(sizeof(float64data)); - if (NONVALUE(attno) || NONVALUE(relid)) - *result = 1.0 / 3; + if (! (flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid)) + *result = DEFAULT_INEQ_SEL; else { - /* XXX val = atol(value); */ - val = value; - gethilokey(relid, (int) attno, opid, &highchar, &lowchar); - if (*highchar == 'n' || *lowchar == 'n') + HeapTuple oprtuple; + Oid ltype, + rtype; + Oid typid; + int typlen; + bool typbyval; + int32 typmod; + Datum hival, + loval; + long val, + high, + low, + numerator, + denominator; + + /* get left and right datatypes of the operator */ + oprtuple = get_operator_tuple(opid); + if (! HeapTupleIsValid(oprtuple)) + elog(ERROR, "intltsel: no tuple for operator %u", opid); + ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft; + rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright; + + /* + * TEMPORARY HACK: this code is currently getting called for + * a bunch of non-integral types. Give a default estimate if + * either side is not pass-by-val. Need better solution. + */ + if (! get_typbyval(ltype) || ! get_typbyval(rtype)) { - *result = 1.0 / 3.0; + *result = DEFAULT_INEQ_SEL; return result; } - high = atol(highchar); - low = atol(lowchar); - if ((flag & SEL_RIGHT && val < low) || - (!(flag & SEL_RIGHT) && val > high)) + + /* Deduce type of the constant, and convert to uniform "long" format. + * Note that constant might well be a different type than attribute. + * XXX this ought to use a type-specific "convert to double" op. + */ + typid = (flag & SEL_RIGHT) ? rtype : ltype; + switch (get_typlen(typid)) { - float32data nvals; + case 1: + val = (long) DatumGetUInt8(value); + break; + case 2: + val = (long) DatumGetInt16(value); + break; + case 4: + val = (long) DatumGetInt32(value); + break; + default: + elog(ERROR, "intltsel: unsupported type %u", typid); + *result = DEFAULT_INEQ_SEL; + return result; + } - nvals = getattdisbursion(relid, (int) attno); - if (nvals == 0) - *result = 1.0 / 3.0; - else - { - *result = 3.0 * (float64data) nvals; - if (*result > 1.0) - *result = 1; - } + /* Now get info about the attribute */ + getattproperties(relid, attno, + &typid, &typlen, &typbyval, &typmod); + + if (! getattstatistics(relid, attno, typid, typmod, + NULL, NULL, NULL, + &loval, &hival)) + { + *result = DEFAULT_INEQ_SEL; + return result; + } + /* + * Convert loval/hival to common "long int" representation. + */ + switch (typlen) + { + case 1: + low = (long) DatumGetUInt8(loval); + high = (long) DatumGetUInt8(hival); + break; + case 2: + low = (long) DatumGetInt16(loval); + high = (long) DatumGetInt16(hival); + break; + case 4: + low = (long) DatumGetInt32(loval); + high = (long) DatumGetInt32(hival); + break; + default: + elog(ERROR, "intltsel: unsupported type %u", typid); + *result = DEFAULT_INEQ_SEL; + return result; + } + if (val < low || val > high) + { + /* If given value is outside the statistical range, + * assume we have out-of-date stats and return a default guess. + * We could return a small or large value if we trusted the stats + * more. XXX change this eventually. + */ + *result = DEFAULT_INEQ_SEL; } else { - bottom = high - low; - if (bottom == 0) - ++bottom; + denominator = high - low; + if (denominator <= 0) + denominator = 1; if (flag & SEL_RIGHT) - top = val - low; + numerator = val - low; else - top = high - val; - if (top > bottom) + numerator = high - val; + if (numerator <= 0) /* never return a zero estimate! */ + numerator = 1; + if (numerator >= denominator) *result = 1.0; else - { - if (top == 0) - ++top; - *result = ((1.0 * top) / bottom); - } + *result = (double) numerator / (double) denominator; + } + if (! typbyval) + { + pfree(DatumGetPointer(hival)); + pfree(DatumGetPointer(loval)); } } return result; } /* - * intgtsel - Selectivity of ">" for integers. + * intgtsel - Selectivity of ">" (also ">=") for integers. * Should work for both longs and shorts. */ float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, - int32 value, + Datum value, int32 flag) { float64 result; - int notflag; - if (flag & 0) - notflag = flag & ~SEL_RIGHT; - else - notflag = flag | SEL_RIGHT; - result = intltsel(opid, relid, attno, value, (int32) notflag); + /* Compute selectivity of "<", then invert --- but only if we + * were able to produce a non-default estimate. + */ + result = intltsel(opid, relid, attno, value, flag); + if (*result != DEFAULT_INEQ_SEL) + *result = 1.0 - *result; return result; } @@ -181,7 +367,7 @@ eqjoinsel(Oid opid, AttrNumber attno2) { float64 result; - float32data num1, + float64data num1, num2, max; @@ -191,13 +377,13 @@ eqjoinsel(Oid opid, *result = 0.1; else { - num1 = getattdisbursion(relid1, (int) attno1); - num2 = getattdisbursion(relid2, (int) attno2); + num1 = getattdisbursion(relid1, attno1); + num2 = getattdisbursion(relid2, attno2); max = (num1 > num2) ? num1 : num2; - if (max == 0) + if (max <= 0) *result = 1.0; else - *result = (float64data) max; + *result = max; } return result; } @@ -220,7 +406,7 @@ neqjoinsel(Oid opid, } /* - * intltjoinsel - Join selectivity of "<" + * intltjoinsel - Join selectivity of "<" and "<=" */ float64 intltjoinsel(Oid opid, @@ -232,12 +418,12 @@ intltjoinsel(Oid opid, float64 result; result = (float64) palloc(sizeof(float64data)); - *result = 1.0 / 3.0; + *result = DEFAULT_INEQ_SEL; return result; } /* - * intgtjoinsel - Join selectivity of ">" + * intgtjoinsel - Join selectivity of ">" and ">=" */ float64 intgtjoinsel(Oid opid, @@ -249,129 +435,230 @@ intgtjoinsel(Oid opid, float64 result; result = (float64) palloc(sizeof(float64data)); - *result = 1.0 / 3.0; + *result = DEFAULT_INEQ_SEL; return result; } /* - * getattdisbursion - Retrieves the number of values within an attribute. - * - * Note: - * getattdisbursion and gethilokey both currently use keyed - * relation scans and amgetattr. Alternatively, - * the relation scan could be non-keyed and the tuple - * returned could be cast (struct X *) tuple + tuple->t_hoff. - * The first method is good for testing the implementation, - * but the second may ultimately be faster?!? In any case, - * using the cast instead of amgetattr would be - * more efficient. However, the cast will not work - * for gethilokey which accesses stahikey in struct statistic. + * getattproperties + * Retrieve pg_attribute properties for an attribute, + * including type OID, type len, type byval flag, typmod. */ -static float32data -getattdisbursion(Oid relid, AttrNumber attnum) +static void +getattproperties(Oid relid, AttrNumber attnum, + Oid *typid, int *typlen, bool *typbyval, int32 *typmod) { HeapTuple atp; - float32data nvals; - int32 ntuples; + Form_pg_attribute att_tup; atp = SearchSysCacheTuple(ATTNUM, ObjectIdGetDatum(relid), Int16GetDatum(attnum), 0, 0); - if (!HeapTupleIsValid(atp)) - { - elog(ERROR, "getattdisbursion: no attribute tuple %u %d", - relid, attnum); - return 0; - } - nvals = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion; - if (nvals > 0) - return nvals; - - atp = SearchSysCacheTuple(RELOID, - ObjectIdGetDatum(relid), - 0, 0, 0); - - /* - * XXX -- use number of tuples as number of distinctive values just - * for now, in case number of distinctive values is not cached - */ - if (!HeapTupleIsValid(atp)) - { - elog(ERROR, "getattdisbursion: no relation tuple %u", relid); - return 0; - } - ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples; - /* Look above how nvals is used. - vadim 04/09/97 */ - if (ntuples > 0) - nvals = 1.0 / ntuples; - - return nvals; + if (! HeapTupleIsValid(atp)) + elog(ERROR, "getattproperties: no attribute tuple %u %d", + relid, (int) attnum); + att_tup = (Form_pg_attribute) GETSTRUCT(atp); + + *typid = att_tup->atttypid; + *typlen = att_tup->attlen; + *typbyval = att_tup->attbyval; + *typmod = att_tup->atttypmod; } /* - * gethilokey - Returns a pointer to strings containing - * the high and low keys within an attribute. + * getattstatistics + * Retrieve the pg_statistic data for an attribute. + * Returns 'false' if no stats are available. + * + * Inputs: + * 'relid' and 'attnum' are the relation and attribute number. + * 'typid' and 'typmod' are the type and typmod of the column, + * which the caller must already have looked up. * - * Currently returns "0", and "0" in high and low if the statistic - * catalog does not contain the proper tuple. Eventually, the - * statistic demon should have the tuple maintained, and it should - * elog() if the tuple is missing. + * Outputs: + * The available stats are nullfrac, commonfrac, commonval, loval, hival. + * The caller need not retrieve all five --- pass NULL pointers for the + * unwanted values. * - * XXX Question: is this worth sticking in the catalog caches, - * or will this get invalidated too often? + * commonval, loval, hival are returned as Datums holding the internal + * representation of the values. (Note that these should be pfree'd + * after use if the data type is not by-value.) + * + * XXX currently, this does a linear search of pg_statistic because there + * is no index nor syscache for pg_statistic. FIX THIS! */ -static void -gethilokey(Oid relid, - AttrNumber attnum, - Oid opid, - char **high, - char **low) +static bool +getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod, + double *nullfrac, + double *commonfrac, + Datum *commonval, + Datum *loval, + Datum *hival) { Relation rel; HeapScanDesc scan; - static ScanKeyData key[3] = { + static ScanKeyData key[2] = { {0, Anum_pg_statistic_starelid, F_OIDEQ, {0, 0, F_OIDEQ}}, - {0, Anum_pg_statistic_staattnum, F_INT2EQ, {0, 0, F_INT2EQ}}, - {0, Anum_pg_statistic_staop, F_OIDEQ, {0, 0, F_OIDEQ}} + {0, Anum_pg_statistic_staattnum, F_INT2EQ, {0, 0, F_INT2EQ}} }; bool isnull; HeapTuple tuple; + HeapTuple typeTuple; + FmgrInfo inputproc; rel = heap_openr(StatisticRelationName); key[0].sk_argument = ObjectIdGetDatum(relid); key[1].sk_argument = Int16GetDatum((int16) attnum); - key[2].sk_argument = ObjectIdGetDatum(opid); - scan = heap_beginscan(rel, 0, SnapshotNow, 3, key); + + scan = heap_beginscan(rel, 0, SnapshotNow, 2, key); tuple = heap_getnext(scan, 0); if (!HeapTupleIsValid(tuple)) { - *high = "n"; - *low = "n"; + /* no such stats entry */ + heap_endscan(scan); + heap_close(rel); + return false; + } - /* - * XXX elog(ERROR, "gethilokey: statistic tuple not - * found"); - */ - return; + /* We assume that there will only be one entry in pg_statistic + * for the given rel/att. Someday, VACUUM might store more than one... + */ + if (nullfrac) + *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac; + if (commonfrac) + *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac; + + /* Get the type input proc for the column datatype */ + typeTuple = SearchSysCacheTuple(TYPOID, + ObjectIdGetDatum(typid), + 0, 0, 0); + if (! HeapTupleIsValid(typeTuple)) + elog(ERROR, "getattstatistics: Cache lookup failed for type %u", + typid); + fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc); + + /* Values are variable-length fields, so cannot access as struct fields. + * Must do it the hard way with heap_getattr. + */ + if (commonval) + { + text *val = (text *) heap_getattr(tuple, + Anum_pg_statistic_stacommonval, + RelationGetDescr(rel), + &isnull); + if (isnull) + { + elog(DEBUG, "getattstatistics: stacommonval is null"); + *commonval = PointerGetDatum(NULL); + } + else + { + char *strval = textout(val); + *commonval = (Datum) + (*fmgr_faddr(&inputproc)) (strval, typid, typmod); + pfree(strval); + } } - *high = textout((struct varlena *) - heap_getattr(tuple, - Anum_pg_statistic_stahikey, - RelationGetDescr(rel), - &isnull)); - if (isnull) - elog(DEBUG, "gethilokey: high key is null"); - *low = textout((struct varlena *) - heap_getattr(tuple, - Anum_pg_statistic_stalokey, - RelationGetDescr(rel), - &isnull)); - if (isnull) - elog(DEBUG, "gethilokey: low key is null"); + + if (loval) + { + text *val = (text *) heap_getattr(tuple, + Anum_pg_statistic_staloval, + RelationGetDescr(rel), + &isnull); + if (isnull) + { + elog(DEBUG, "getattstatistics: staloval is null"); + *loval = PointerGetDatum(NULL); + } + else + { + char *strval = textout(val); + *loval = (Datum) + (*fmgr_faddr(&inputproc)) (strval, typid, typmod); + pfree(strval); + } + } + + if (hival) + { + text *val = (text *) heap_getattr(tuple, + Anum_pg_statistic_stahival, + RelationGetDescr(rel), + &isnull); + if (isnull) + { + elog(DEBUG, "getattstatistics: stahival is null"); + *hival = PointerGetDatum(NULL); + } + else + { + char *strval = textout(val); + *hival = (Datum) + (*fmgr_faddr(&inputproc)) (strval, typid, typmod); + pfree(strval); + } + } + heap_endscan(scan); heap_close(rel); + return true; +} + +/* + * getattdisbursion + * Retrieve the disbursion statistic for an attribute, + * or produce an estimate if no info is available. + */ +static double +getattdisbursion(Oid relid, AttrNumber attnum) +{ + HeapTuple atp; + double disbursion; + int32 ntuples; + + atp = SearchSysCacheTuple(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum), + 0, 0); + if (!HeapTupleIsValid(atp)) + { + /* this should not happen */ + elog(ERROR, "getattdisbursion: no attribute tuple %u %d", + relid, attnum); + return 0.1; + } + + disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion; + if (disbursion > 0.0) + return disbursion; + + /* VACUUM ANALYZE has not stored a disbursion statistic for us. + * Produce an estimate = 1/numtuples. This may produce + * unreasonably small estimates for large tables, so limit + * the estimate to no less than 0.01. + */ + atp = SearchSysCacheTuple(RELOID, + ObjectIdGetDatum(relid), + 0, 0, 0); + if (!HeapTupleIsValid(atp)) + { + /* this should not happen */ + elog(ERROR, "getattdisbursion: no relation tuple %u", relid); + return 0.1; + } + + ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples; + + if (ntuples > 0) + disbursion = 1.0 / (double) ntuples; + + if (disbursion < 0.01) + disbursion = 0.01; + + return disbursion; } float64 diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index 19b87b68b1b09c37d5fa8fe876308bdb239fd3ae..1c719443282f433c295918d23d9c91695e745f2b 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -7,7 +7,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_statistic.h,v 1.6 1999/02/13 23:21:15 momjian Exp $ + * $Id: pg_statistic.h,v 1.7 1999/08/01 04:54:21 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -32,11 +32,32 @@ */ CATALOG(pg_statistic) { - Oid starelid; - int2 staattnum; - Oid staop; - text stalokey; /* VARIABLE LENGTH FIELD */ - text stahikey; /* VARIABLE LENGTH FIELD */ + /* These fields form the unique key for the entry: */ + Oid starelid; /* relation containing attribute */ + int2 staattnum; /* attribute (column) stats are for */ + Oid staop; /* '<' comparison op used for lo/hi vals */ + /* Note: the current VACUUM code will never produce more than one entry + * per column, but in theory there could be multiple entries if a datatype + * has more than one useful ordering operator. Also, the current code + * will not write an entry unless it found at least one non-NULL value + * in the column; so the remaining fields will never be NULL. + */ + + /* These fields contain the stats about the column indicated by the key */ + float4 stanullfrac; /* the fraction of the entries that are NULL */ + float4 stacommonfrac; /* the fraction that are the most common val */ + + /* THE REST OF THESE ARE VARIABLE LENGTH FIELDS. + * They cannot be accessed as C struct entries; you have to use the + * full field access machinery (heap_getattr) for them. + * + * All three of these are text representations of data values of the + * column's data type. To re-create the actual Datum, do + * datatypein(textout(givenvalue)). + */ + text stacommonval; /* most common non-null value in column */ + text staloval; /* smallest non-null value in column */ + text stahival; /* largest non-null value in column */ } FormData_pg_statistic; /* ---------------- @@ -50,11 +71,14 @@ typedef FormData_pg_statistic *Form_pg_statistic; * compiler constants for pg_statistic * ---------------- */ -#define Natts_pg_statistic 5 +#define Natts_pg_statistic 8 #define Anum_pg_statistic_starelid 1 #define Anum_pg_statistic_staattnum 2 #define Anum_pg_statistic_staop 3 -#define Anum_pg_statistic_stalokey 4 -#define Anum_pg_statistic_stahikey 5 +#define Anum_pg_statistic_stanullfrac 4 +#define Anum_pg_statistic_stacommonfrac 5 +#define Anum_pg_statistic_stacommonval 6 +#define Anum_pg_statistic_staloval 7 +#define Anum_pg_statistic_stahival 8 #endif /* PG_STATISTIC_H */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 59a72bbb15b949228ed7b059964f61f419a44027..59a7fe4a5063e4e82e7a0ee8c854b4f4238b5cc1 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: vacuum.h,v 1.22 1999/07/15 15:21:03 momjian Exp $ + * $Id: vacuum.h,v 1.23 1999/08/01 04:54:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,22 +67,23 @@ typedef struct guess2, max, min; - int16 best_len, + int best_len, guess1_len, guess2_len, max_len, min_len; - int32 best_cnt, + long best_cnt, guess1_cnt, guess1_hits, guess2_hits, null_cnt, - nonnull_cnt; - int32 max_cnt, + nonnull_cnt, + max_cnt, min_cnt; FmgrInfo f_cmpeq, f_cmplt, f_cmpgt; + Oid op_cmplt; regproc outfunc; bool initialized; } VacAttrStats; diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index e6a0b4157d5ea5ce013ed614513925f0975bbd8b..dfe1897cbe22e7d364972c0c4cbcafafd86ec260 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: builtins.h,v 1.84 1999/07/16 17:07:39 momjian Exp $ + * $Id: builtins.h,v 1.85 1999/08/01 04:54:20 tgl Exp $ * * NOTES * This should normally only be included by fmgr.h. @@ -372,10 +372,10 @@ extern Oid regproctooid(RegProcedure rp); #define RegprocToOid(rp) regproctooid(rp) /* selfuncs.c */ -extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag); -extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag); -extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag); -extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag); +extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag); +extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag); +extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag); +extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag); extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2); extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2); extern float64 intltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);