From d29b66882a5de0878c496bd71d0c69af17197ab7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 11 Jul 2006 21:05:57 +0000 Subject: [PATCH] Tweak fillfactor code as per my recent proposal. Fix nbtsort.c so that it can handle small fillfactors for ordinary-sized index entries without failing on large ones; fix nbtinsert.c to distinguish leaf and nonleaf pages; change the minimum fillfactor to 10% for all index types. --- doc/src/sgml/ref/create_index.sgml | 8 ++++---- src/backend/access/gin/ginutil.c | 4 ++-- src/backend/access/nbtree/nbtinsert.c | 11 ++++++++--- src/backend/access/nbtree/nbtsort.c | 26 +++++++++++++++----------- src/include/access/gist_private.h | 4 ++-- src/include/access/hash.h | 4 ++-- src/include/access/nbtree.h | 12 ++++++++---- 7 files changed, 41 insertions(+), 28 deletions(-) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index d3e9b1dc66..20a89713bf 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -1,5 +1,5 @@ @@ -221,17 +221,17 @@ CREATE [ UNIQUE ] INDEX name ON The fillfactor for an index is a percentage that determines how full - the index method will try to pack index pages. For B-trees, pages + the index method will try to pack index pages. For B-trees, leaf pages are filled to this percentage during initial index build, and also when extending the index at the right (largest key values). If pages subsequently become completely full, they will be split, leading to gradual degradation in the index's efficiency. B-trees use a default - fillfactor of 90, but any value from 70 to 100 can be selected. + fillfactor of 90, but any value from 10 to 100 can be selected. If the table is static then fillfactor 100 is best to minimize the index's physical size, but for heavily updated tables a smaller fillfactor is better to minimize the need for page splits. The other index methods use fillfactor in different but roughly analogous - ways; the default fillfactor and allowed range varies. + ways; the default fillfactor varies between methods. diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 544956c84e..3c665a1722 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.3 2006/07/03 22:45:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.4 2006/07/11 21:05:57 tgl Exp $ *------------------------------------------------------------------------- */ @@ -213,7 +213,7 @@ ginoptions(PG_FUNCTION_ARGS) * It's not clear that fillfactor is useful for GIN, but for the moment * we'll accept it anyway. (It won't do anything...) */ -#define GIN_MIN_FILLFACTOR 50 +#define GIN_MIN_FILLFACTOR 10 #define GIN_DEFAULT_FILLFACTOR 100 result = default_reloptions(reloptions, validate, diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index a176888691..afe49ce682 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.139 2006/07/03 22:45:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.140 2006/07/11 21:05:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -991,7 +991,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, * inserting successively increasing keys (consider sequences, timestamps, * etc) we will end up with a tree whose pages are about fillfactor% full, * instead of the 50% full result that we'd get without this special case. - * This is the same as nbtsort.c produces for a newly-created tree. + * This is the same as nbtsort.c produces for a newly-created tree. Note + * that leaf and nonleaf pages use different fillfactors. * * We are passed the intended insert position of the new tuple, expressed as * the offsetnumber of the tuple it must go in front of. (This could be @@ -1025,10 +1026,14 @@ _bt_findsplitloc(Relation rel, /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */ newitemsz += sizeof(ItemIdData); state.newitemsz = newitemsz; - state.fillfactor = RelationGetFillFactor(rel, BTREE_DEFAULT_FILLFACTOR); state.is_leaf = P_ISLEAF(opaque); state.is_rightmost = P_RIGHTMOST(opaque); state.have_split = false; + if (state.is_leaf) + state.fillfactor = RelationGetFillFactor(rel, + BTREE_DEFAULT_FILLFACTOR); + else + state.fillfactor = BTREE_NONLEAF_FILLFACTOR; /* Total free space available on a btree page, after fixed overhead */ leftspace = rightspace = diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 1dd3be0a63..f2b1116b72 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -27,10 +27,10 @@ * insertion would cause a split (and not only of the leaf page; the need * for a split would cascade right up the tree). The steady-state load * factor for btrees is usually estimated at 70%. We choose to pack leaf - * pages to the user-controllable fill factor while upper pages are always - * packed to 70%. This gives us reasonable density (there aren't many upper - * pages if the keys are reasonable-size) without incurring a lot of cascading - * splits during early insertions. + * pages to the user-controllable fill factor (default 90%) while upper pages + * are always packed to 70%. This gives us reasonable density (there aren't + * many upper pages if the keys are reasonable-size) without risking a lot of + * cascading splits during early insertions. * * Formerly the index pages being built were kept in shared buffers, but * that is of no value (since other backends have no interest in them yet) @@ -57,7 +57,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.104 2006/07/03 22:45:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.105 2006/07/11 21:05:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -349,7 +349,7 @@ _bt_pagestate(BTWriteState *wstate, uint32 level) state->btps_level = level; /* set "full" threshold based on level. See notes at head of file. */ if (level > 0) - state->btps_full = (BLCKSZ * (100 - BTREE_MIN_FILLFACTOR) / 100); + state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100); else state->btps_full = RelationGetTargetPageFreeSpace(wstate->index, BTREE_DEFAULT_FILLFACTOR); @@ -499,11 +499,16 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) "Consider a function index of an MD5 hash of the value, " "or use full text indexing."))); - if (pgspc < itupsz || pgspc < state->btps_full) + /* + * Check to see if page is "full". It's definitely full if the item + * won't fit. Otherwise, compare to the target freespace derived from + * the fillfactor. However, we must put at least two items on each + * page, so disregard fillfactor if we don't have that many. + */ + if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY)) { /* - * Item won't fit on this page, or we feel the page is full enough - * already. Finish off the page and write it out. + * Finish off the page and write it out. */ Page opage = npage; BlockNumber oblkno = nblkno; @@ -522,8 +527,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) * rearrange the old page so that the 'last item' becomes its high key * rather than a true data item. There had better be at least two * items on the page already, else the page would be empty of useful - * data. (Hence, we must allow pages to be packed at least 2/3rds - * full; the 70% figure used above is close to minimum.) + * data. */ Assert(last_off > P_FIRSTKEY); ii = PageGetItemId(opage, last_off); diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 1df82fe9fb..d6e5cf0603 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.21 2006/07/11 17:04:13 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.22 2006/07/11 21:05:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -272,7 +272,7 @@ extern Datum gistgetmulti(PG_FUNCTION_ARGS); #define GiSTPageSize \ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) ) -#define GIST_MIN_FILLFACTOR 50 +#define GIST_MIN_FILLFACTOR 10 #define GIST_DEFAULT_FILLFACTOR 90 extern Datum gistoptions(PG_FUNCTION_ARGS); diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 10646522d9..a845809a5d 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.71 2006/07/03 22:45:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.72 2006/07/11 21:05:57 tgl Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -167,7 +167,7 @@ typedef HashMetaPageData *HashMetaPage; MAXALIGN(sizeof(HashPageOpaqueData)) - \ sizeof(ItemIdData)) -#define HASH_MIN_FILLFACTOR 50 +#define HASH_MIN_FILLFACTOR 10 #define HASH_DEFAULT_FILLFACTOR 75 /* diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 43bd49a7bb..b22422ffd6 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.100 2006/07/03 22:45:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.101 2006/07/11 21:05:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -107,11 +107,15 @@ typedef struct BTMetaPageData MAXALIGN(sizeof(BTPageOpaqueData))) / 3 - sizeof(ItemIdData)) /* - * Because of above, min fillfactor can't be less than 2/3rds; see notes in - * nbtsort.c before you change these! + * The leaf-page fillfactor defaults to 90% but is user-adjustable. + * For pages above the leaf level, we use a fixed 70% fillfactor. + * The fillfactor is applied during index build and when splitting + * a rightmost page; when splitting non-rightmost pages we try to + * divide the data equally. */ -#define BTREE_MIN_FILLFACTOR 70 +#define BTREE_MIN_FILLFACTOR 10 #define BTREE_DEFAULT_FILLFACTOR 90 +#define BTREE_NONLEAF_FILLFACTOR 70 /* * Test whether two btree entries are "the same". -- GitLab