diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index cf99ba2c85f505c0e845f3ea910b7a384020d8f9..86ccac9448cf2d58039ba94b53e4e1e298af49db 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -2133,7 +2133,7 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows not had a column-specific target set via ALTER TABLE SET STATISTICS. Larger values increase the time needed to do ANALYZE, but might improve the quality of the - planner's estimates. The default is 10. For more information + planner's estimates. The default is 100. For more information on the use of statistics by the PostgreSQL query planner, refer to . diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index be5ec9f2c2ea43595367f88537f4c6005fc9aec7..aea529552c6aebbf2aed5b917e1dcafa24ba64ce 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1,4 +1,4 @@ - + Performance Tips @@ -562,7 +562,7 @@ SELECT attname, n_distinct, most_common_vals FROM pg_stats WHERE tablename = 'ro column-by-column basis using the ALTER TABLE SET STATISTICS command, or globally by setting the configuration variable. - The default limit is presently 10 entries. Raising the limit + The default limit is presently 100 entries. Raising the limit might allow more accurate planner estimates to be made, particularly for columns with irregular data distributions, at the price of consuming more space in pg_statistic and slightly more diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 61cd403fcfd6b9de6e39eedd355ef3476d0ef6db..b19c89f421141098900ffb212368a227fffb7547 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -1,5 +1,5 @@ @@ -142,7 +142,7 @@ where action is one of: This form sets the per-column statistics-gathering target for subsequent operations. - The target can be set in the range 0 to 1000; alternatively, set it + The target can be set in the range 0 to 10000; alternatively, set it to -1 to revert to using the system default statistics target (). For more information on the use of statistics by the diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml index ad8514058560d70f2036d8d24bfac18f48bd1540..418082e6d931b4f056652bffaf09a703812ba5c5 100644 --- a/doc/src/sgml/ref/analyze.sgml +++ b/doc/src/sgml/ref/analyze.sgml @@ -1,5 +1,5 @@ @@ -132,10 +132,10 @@ ANALYZE [ VERBOSE ] [ table [ ( ANALYZE is run, even if the actual table contents did not change. This might result in small changes in the planner's estimated costs shown by - . In rare situations, this - non-determinism will cause the query optimizer to choose a - different query plan between runs of ANALYZE. To - avoid this, raise the amount of statistics collected by + . + In rare situations, this non-determinism will cause the planner's + choices of query plans to change after ANALYZE is run. + To avoid this, raise the amount of statistics collected by ANALYZE, as described below. @@ -148,7 +148,7 @@ ANALYZE [ VERBOSE ] [ table [ ( ). The target value sets the maximum number of entries in the most-common-value list and the maximum number of bins in the histogram. The default target value - is 10, but this can be adjusted up or down to trade off accuracy of + is 100, but this can be adjusted up or down to trade off accuracy of planner estimates against the time taken for ANALYZE and the amount of space occupied in pg_statistic. In particular, setting the diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 6b95075be1f01de38e5d6e18c681e7dd0fe5953c..2b3af54ff054e5aea99de00ce2b4bd24977f931a 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.128 2008/11/10 00:49:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.129 2008/12/13 19:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,7 +67,7 @@ typedef struct AnlIndexData /* Default statistics target (GUC parameter) */ -int default_statistics_target = 10; +int default_statistics_target = 100; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -1531,10 +1531,10 @@ std_typanalyze(VacAttrStats *stats) * error in bin size f, and error probability gamma, the minimum * random sample size is * r = 4 * k * ln(2*n/gamma) / f^2 - * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain + * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain * r = 305.82 * k * Note that because of the log function, the dependence on n is - * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59 + * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66 * bin size error with probability 0.99. So there's no real need to * scale for n, which is a good thing because we don't necessarily * know it at this point. diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 8e12a77d11c397d65eb56602963600d40e2380bf..6c60ddd5c104f79909116f844a56df99bc7747bb 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.272 2008/12/06 23:22:46 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.273 2008/12/13 19:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3942,9 +3942,9 @@ ATExecSetStatistics(Relation rel, const char *colName, Node *newValue) errmsg("statistics target %d is too low", newtarget))); } - else if (newtarget > 1000) + else if (newtarget > 10000) { - newtarget = 1000; + newtarget = 10000; ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("lowering statistics target to %d", diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 199432097bc969b53e327357a841d316253b26ea..d30afe352c5de182768bd554aa49596fa80e0851 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.3 2008/11/27 21:17:39 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.4 2008/12/13 19:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,7 +63,7 @@ ts_typanalyze(PG_FUNCTION_ARGS) attr->attstattarget = default_statistics_target; stats->compute_stats = compute_tsvector_stats; - /* see comment about the choice of minrows from analyze.c */ + /* see comment about the choice of minrows in commands/analyze.c */ stats->minrows = 300 * attr->attstattarget; PG_RETURN_BOOL(true); @@ -105,8 +105,8 @@ ts_typanalyze(PG_FUNCTION_ARGS) * is no more than a few times w. * * We use a hashtable for the D structure and a bucket width of - * statistic_target * 100, where 100 is an arbitrarily chosen constant, meant - * to approximate the number of lexemes in a single tsvector. + * statistics_target * 100, where 100 is an arbitrarily chosen constant, + * meant to approximate the number of lexemes in a single tsvector. */ static void compute_tsvector_stats(VacAttrStats *stats, @@ -130,7 +130,7 @@ compute_tsvector_stats(VacAttrStats *stats, LexemeHashKey hash_key; TrackItem *item; - /* We want statistic_target * 100 lexemes in the MCELEM array */ + /* We want statistics_target * 100 lexemes in the MCELEM array */ num_mcelem = stats->attr->attstattarget * 100; /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 00dac28c27e14f32d89ad0790694cc2bd0140e25..8e3be6fe371537ccc0cc071f7081b99f89d02b93 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.482 2008/12/02 02:00:32 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.483 2008/12/13 19:13:44 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -1245,7 +1245,7 @@ static struct config_int ConfigureNamesInt[] = "column-specific target set via ALTER TABLE SET STATISTICS.") }, &default_statistics_target, - 10, 1, 1000, NULL, NULL + 100, 1, 10000, NULL, NULL }, { {"from_collapse_limit", PGC_USERSET, QUERY_TUNING_OTHER, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index f886ef74b218cce575364de8e51dfe552866500b..bf85ae083c6b03268c22250e1540408300367eb1 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -210,7 +210,7 @@ # - Other Planner Options - -#default_statistics_target = 10 # range 1-1000 +#default_statistics_target = 100 # range 1-10000 #constraint_exclusion = off #cursor_tuple_fraction = 0.1 # range 0.0-1.0 #from_collapse_limit = 8