diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 0f643836a1ccb7ee68ff37cd9191a453320562bd..b9569e58af454853bf5e31acf61a3f5a714dcaaf 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.61 2006/11/19 21:33:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.62 2007/01/03 18:11:01 tgl Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -533,10 +533,8 @@ fail: * * This does not need to initialize the new bucket pages; we'll do that as * each one is used by _hash_expandtable(). But we have to extend the logical - * EOF to the end of the splitpoint; otherwise the first overflow page - * allocated beyond the splitpoint will represent a noncontiguous access, - * which can confuse md.c (and will probably be forbidden by future changes - * to md.c). + * EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in + * sync with ours, so that overflow-page allocation works correctly. * * We do this by writing a page of zeroes at the end of the splitpoint range. * We expect that the filesystem will ensure that the intervening pages read @@ -559,7 +557,6 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks) { BlockNumber firstblock; BlockNumber lastblock; - BlockNumber endblock; char zerobuf[BLCKSZ]; /* @@ -577,24 +574,9 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks) if (lastblock < firstblock || lastblock == InvalidBlockNumber) return InvalidBlockNumber; - /* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */ - MemSet(zerobuf, 0, sizeof(zerobuf)); - /* - * XXX If the extension results in creation of new segment files, - * we have to make sure that each non-last file is correctly filled out to - * RELSEG_SIZE blocks. This ought to be done inside mdextend, but - * changing the smgr API seems best left for development cycle not late - * beta. Temporary fix for bug #2737. - */ -#ifndef LET_OS_MANAGE_FILESIZE - for (endblock = firstblock | (RELSEG_SIZE - 1); - endblock < lastblock; - endblock += RELSEG_SIZE) - smgrextend(rel->rd_smgr, endblock, zerobuf, rel->rd_istemp); -#endif - + /* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */ smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp); return firstblock; diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 4951dca2182be1def65689c8a87f798dfae27e45..4f886e8b07e4d9e33b177fbe6e69a2bf28e6e0c0 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -36,9 +36,9 @@ * that is of no value (since other backends have no interest in them yet) * and it created locking problems for CHECKPOINT, because the upper-level * pages were held exclusive-locked for long periods. Now we just build - * the pages in local memory and smgrwrite() them as we finish them. They - * will need to be re-read into shared buffers on first use after the build - * finishes. + * the pages in local memory and smgrwrite or smgrextend them as we finish + * them. They will need to be re-read into shared buffers on first use after + * the build finishes. * * Since the index will never be used unless it is completely built, * from a crash-recovery point of view there is no need to WAL-log the @@ -57,7 +57,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.108 2007/01/03 18:11:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -309,9 +309,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) { if (!wstate->btws_zeropage) wstate->btws_zeropage = (Page) palloc0(BLCKSZ); - smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++, - (char *) wstate->btws_zeropage, - true); + smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++, + (char *) wstate->btws_zeropage, + true); } /* @@ -319,10 +319,17 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) * index, because there's no need for smgr to schedule an fsync for this * write; we'll do it ourselves before ending the build. */ - smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true); - if (blkno == wstate->btws_pages_written) + { + /* extending the file... */ + smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true); wstate->btws_pages_written++; + } + else + { + /* overwriting a block we zero-filled before */ + smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true); + } pfree(page); } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index c30aa69c5558f5b9542b0c745829188bede86983..5de8e96f5fda1569400b73df9bad42755faa9543 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.208 2006/12/30 21:21:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.209 2007/01/03 18:11:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -6083,7 +6083,7 @@ copy_relation_data(Relation rel, SMgrRelation dst) * rel, because there's no need for smgr to schedule an fsync for this * write; we'll do it ourselves below. */ - smgrwrite(dst, blkno, buf, true); + smgrextend(dst, blkno, buf, true); } /* diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index f58ab03ce426c5e3dba61fbc00f655c1ae32128d..e0899a546008a3e79ba534697b1bc165acfbd65b 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.123 2006/11/20 01:07:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.124 2007/01/03 18:11:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,7 @@ #include "miscadmin.h" #include "postmaster/bgwriter.h" #include "storage/fd.h" +#include "storage/bufmgr.h" #include "storage/smgr.h" #include "utils/hsearch.h" #include "utils/memutils.h" @@ -108,9 +109,16 @@ typedef struct static HTAB *pendingOpsTable = NULL; +typedef enum /* behavior for mdopen & _mdfd_getseg */ +{ + EXTENSION_FAIL, /* ereport if segment not present */ + EXTENSION_RETURN_NULL, /* return NULL if not present */ + EXTENSION_CREATE /* create new segments as needed */ +} ExtensionBehavior; + /* local routines */ -static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound); -static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg); +static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior); +static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg); static MdfdVec *_fdvec_alloc(void); #ifndef LET_OS_MANAGE_FILESIZE @@ -118,14 +126,14 @@ static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags); #endif static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, - bool allowNotFound); -static BlockNumber _mdnblocks(File file, Size blcksz); + bool isTemp, ExtensionBehavior behavior); +static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg); /* * mdinit() -- Initialize private state for magnetic disk storage manager. */ -bool +void mdinit(void) { MdCxt = AllocSetContextCreate(TopMemoryContext, @@ -154,8 +162,6 @@ mdinit(void) &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); } - - return true; } /* @@ -163,14 +169,14 @@ mdinit(void) * * If isRedo is true, it's okay for the relation to exist already. */ -bool +void mdcreate(SMgrRelation reln, bool isRedo) { char *path; File fd; if (isRedo && reln->md_fd != NULL) - return true; /* created and opened already... */ + return; /* created and opened already... */ Assert(reln->md_fd == NULL); @@ -193,11 +199,15 @@ mdcreate(SMgrRelation reln, bool isRedo) if (fd < 0) { pfree(path); - /* be sure to return the error reported by create, not open */ + /* be sure to report the error reported by create, not open */ errno = save_errno; - return false; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create relation %u/%u/%u: %m", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); } - errno = 0; } pfree(path); @@ -209,8 +219,6 @@ mdcreate(SMgrRelation reln, bool isRedo) #ifndef LET_OS_MANAGE_FILESIZE reln->md_fd->mdfd_chain = NULL; #endif - - return true; } /* @@ -220,12 +228,12 @@ mdcreate(SMgrRelation reln, bool isRedo) * there won't be an SMgrRelation hashtable entry anymore. * * If isRedo is true, it's okay for the relation to be already gone. + * Also, any failure should be reported as WARNING not ERROR, because + * we are usually not in a transaction anymore when this is called. */ -bool +void mdunlink(RelFileNode rnode, bool isRedo) { - bool status = true; - int save_errno = 0; char *path; path = relpath(rnode); @@ -234,15 +242,17 @@ mdunlink(RelFileNode rnode, bool isRedo) if (unlink(path) < 0) { if (!isRedo || errno != ENOENT) - { - status = false; - save_errno = errno; - } + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove relation %u/%u/%u: %m", + rnode.spcNode, + rnode.dbNode, + rnode.relNode))); } #ifndef LET_OS_MANAGE_FILESIZE /* Delete the additional segments, if any */ - if (status) + else { char *segpath = (char *) palloc(strlen(path) + 12); BlockNumber segno; @@ -258,10 +268,13 @@ mdunlink(RelFileNode rnode, bool isRedo) { /* ENOENT is expected after the last segment... */ if (errno != ENOENT) - { - status = false; - save_errno = errno; - } + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove segment %u of relation %u/%u/%u: %m", + segno, + rnode.spcNode, + rnode.dbNode, + rnode.relNode))); break; } } @@ -270,29 +283,44 @@ mdunlink(RelFileNode rnode, bool isRedo) #endif pfree(path); - - errno = save_errno; - return status; } /* * mdextend() -- Add a block to the specified relation. * - * The semantics are basically the same as mdwrite(): write at the - * specified position. However, we are expecting to extend the - * relation (ie, blocknum is >= the current EOF), and so in case of - * failure we clean up by truncating. - * - * This routine returns true or false, with errno set as appropriate. + * The semantics are nearly the same as mdwrite(): write at the + * specified position. However, this is to be used for the case of + * extending a relation (i.e., blocknum is at or beyond the current + * EOF). Note that we assume writing a block beyond current EOF + * causes intervening file space to become filled with zeroes. */ -bool +void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) { long seekpos; int nbytes; MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, false); + /* This assert is too expensive to have on normally ... */ +#ifdef CHECK_WRITE_VS_EXTEND + Assert(blocknum >= mdnblocks(reln)); +#endif + + /* + * If a relation manages to grow to 2^32-1 blocks, refuse to extend it + * any more --- we mustn't create a block whose number + * actually is InvalidBlockNumber. + */ + if (blocknum == InvalidBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot extend relation %u/%u/%u beyond %u blocks", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + InvalidBlockNumber))); + + v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE); #ifndef LET_OS_MANAGE_FILESIZE seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); @@ -302,52 +330,64 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) #endif /* - * Note: because caller obtained blocknum by calling _mdnblocks, which did - * a seek(SEEK_END), this seek is often redundant and will be optimized - * away by fd.c. It's not redundant, however, if there is a partial page - * at the end of the file. In that case we want to try to overwrite the - * partial page with a full page. It's also not redundant if bufmgr.c had - * to dump another buffer of the same file to make room for the new page's - * buffer. + * Note: because caller usually obtained blocknum by calling mdnblocks, + * which did a seek(SEEK_END), this seek is often redundant and will be + * optimized away by fd.c. It's not redundant, however, if there is a + * partial page at the end of the file. In that case we want to try to + * overwrite the partial page with a full page. It's also not redundant + * if bufmgr.c had to dump another buffer of the same file to make room + * for the new page's buffer. */ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return false; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek to block %u of relation %u/%u/%u: %m", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { - if (nbytes > 0) - { - int save_errno = errno; - - /* Remove the partially-written page */ - FileTruncate(v->mdfd_vfd, seekpos); - FileSeek(v->mdfd_vfd, seekpos, SEEK_SET); - errno = save_errno; - } - return false; + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not extend relation %u/%u/%u: %m", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode), + errhint("Check free disk space."))); + /* short write: complain appropriately */ + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nbytes, BLCKSZ, blocknum), + errhint("Check free disk space."))); } if (!isTemp) - { - if (!register_dirty_segment(reln, v)) - return false; - } + register_dirty_segment(reln, v); #ifndef LET_OS_MANAGE_FILESIZE - Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); #endif - - return true; } /* - * mdopen() -- Open the specified relation. ereport's on failure. - * (Optionally, can return NULL instead of ereport for ENOENT.) + * mdopen() -- Open the specified relation. * * Note we only open the first segment, when there are multiple segments. + * + * If first segment is not present, either ereport or return NULL according + * to "behavior". We treat EXTENSION_CREATE the same as EXTENSION_FAIL; + * EXTENSION_CREATE means it's OK to extend an existing relation, not to + * invent one out of whole cloth. */ static MdfdVec * -mdopen(SMgrRelation reln, bool allowNotFound) +mdopen(SMgrRelation reln, ExtensionBehavior behavior) { MdfdVec *mdfd; char *path; @@ -374,7 +414,7 @@ mdopen(SMgrRelation reln, bool allowNotFound) if (fd < 0) { pfree(path); - if (allowNotFound && errno == ENOENT) + if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT) return NULL; ereport(ERROR, (errcode_for_file_access(), @@ -393,7 +433,7 @@ mdopen(SMgrRelation reln, bool allowNotFound) mdfd->mdfd_segno = 0; #ifndef LET_OS_MANAGE_FILESIZE mdfd->mdfd_chain = NULL; - Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE)); #endif return mdfd; @@ -401,17 +441,15 @@ mdopen(SMgrRelation reln, bool allowNotFound) /* * mdclose() -- Close the specified relation, if it isn't closed already. - * - * Returns true or false with errno set as appropriate. */ -bool +void mdclose(SMgrRelation reln) { MdfdVec *v = reln->md_fd; /* No work if already closed */ if (v == NULL) - return true; + return; reln->md_fd = NULL; /* prevent dangling pointer after error */ @@ -432,22 +470,19 @@ mdclose(SMgrRelation reln) FileClose(v->mdfd_vfd); pfree(v); #endif - - return true; } /* * mdread() -- Read the specified block from a relation. */ -bool +void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - bool status; long seekpos; int nbytes; MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, false); + v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL); #ifndef LET_OS_MANAGE_FILESIZE seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); @@ -457,39 +492,66 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return false; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek to block %u of relation %u/%u/%u: %m", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); - status = true; if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read block %u of relation %u/%u/%u: %m", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); /* - * If we are at or past EOF, return zeroes without complaining. Also - * substitute zeroes if we found a partial block at EOF. - * - * XXX this is really ugly, bad design. However the current - * implementation of hash indexes requires it, because hash index - * pages are initialized out-of-order. + * Short read: we are at or past EOF, or we read a partial block at + * EOF. Normally this is an error; upper levels should never try to + * read a nonexistent block. However, if zero_damaged_pages is ON + * or we are InRecovery, we should instead return zeroes without + * complaining. This allows, for example, the case of trying to + * update a block that was later truncated away. */ - if (nbytes == 0 || - (nbytes > 0 && mdnblocks(reln) == blocknum)) + if (zero_damaged_pages || InRecovery) MemSet(buffer, 0, BLCKSZ); else - status = false; + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nbytes, BLCKSZ))); } - - return status; } /* * mdwrite() -- Write the supplied block at the appropriate location. + * + * This is to be used only for updating already-existing blocks of a + * relation (ie, those before the current EOF). To extend a relation, + * use mdextend(). */ -bool +void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) { long seekpos; + int nbytes; MdfdVec *v; - v = _mdfd_getseg(reln, blocknum, false); + /* This assert is too expensive to have on normally ... */ +#ifdef CHECK_WRITE_VS_EXTEND + Assert(blocknum < mdnblocks(reln)); +#endif + + v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL); #ifndef LET_OS_MANAGE_FILESIZE seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); @@ -499,18 +561,38 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return false; - - if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) - return false; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek to block %u of relation %u/%u/%u: %m", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); - if (!isTemp) + if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { - if (!register_dirty_segment(reln, v)) - return false; + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write block %u of relation %u/%u/%u: %m", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); + /* short write: complain appropriately */ + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nbytes, BLCKSZ), + errhint("Check free disk space."))); } - return true; + if (!isTemp) + register_dirty_segment(reln, v); } /* @@ -520,13 +602,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain. - * - * Returns # of blocks, or InvalidBlockNumber on error. */ BlockNumber mdnblocks(SMgrRelation reln) { - MdfdVec *v = mdopen(reln, false); + MdfdVec *v = mdopen(reln, EXTENSION_FAIL); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber nblocks; @@ -552,7 +632,7 @@ mdnblocks(SMgrRelation reln) for (;;) { - nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ); + nblocks = _mdnblocks(reln, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) @@ -573,22 +653,26 @@ mdnblocks(SMgrRelation reln) */ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == NULL) - return InvalidBlockNumber; /* failed? */ + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open segment %u of relation %u/%u/%u: %m", + segno, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); } v = v->mdfd_chain; } #else - return _mdnblocks(v->mdfd_vfd, BLCKSZ); + return _mdnblocks(reln, v); #endif } /* * mdtruncate() -- Truncate relation to specified number of blocks. - * - * Returns # of blocks or InvalidBlockNumber on error. */ -BlockNumber +void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) { MdfdVec *v; @@ -603,14 +687,22 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * that truncation loop will get them all! */ curnblk = mdnblocks(reln); - if (curnblk == InvalidBlockNumber) - return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) - return InvalidBlockNumber; /* bogus request */ + { + /* Bogus request ... but no complaint if InRecovery */ + if (InRecovery) + return; + ereport(ERROR, + (errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nblocks, curnblk))); + } if (nblocks == curnblk) - return nblocks; /* no work */ + return; /* no work */ - v = mdopen(reln, false); + v = mdopen(reln, EXTENSION_FAIL); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; @@ -626,12 +718,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * not delete it, for reasons explained in the header comments. */ if (FileTruncate(v->mdfd_vfd, 0) < 0) - return InvalidBlockNumber; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nblocks))); if (!isTemp) - { - if (!register_dirty_segment(reln, v)) - return InvalidBlockNumber; - } + register_dirty_segment(reln, v); v = v->mdfd_chain; Assert(ov != reln->md_fd); /* we never drop the 1st segment */ pfree(ov); @@ -649,12 +744,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) - return InvalidBlockNumber; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nblocks))); if (!isTemp) - { - if (!register_dirty_segment(reln, v)) - return InvalidBlockNumber; - } + register_dirty_segment(reln, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; } @@ -670,15 +768,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) } #else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) - return InvalidBlockNumber; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode, + nblocks))); if (!isTemp) - { - if (!register_dirty_segment(reln, v)) - return InvalidBlockNumber; - } + register_dirty_segment(reln, v); #endif - - return nblocks; } /* @@ -687,7 +786,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * Note that only writes already issued are synced; this routine knows * nothing of dirty buffers that may exist inside the buffer manager. */ -bool +void mdimmedsync(SMgrRelation reln) { MdfdVec *v; @@ -698,24 +797,32 @@ mdimmedsync(SMgrRelation reln) * that fsync loop will get them all! */ curnblk = mdnblocks(reln); - if (curnblk == InvalidBlockNumber) - return false; /* mdnblocks failed */ - v = mdopen(reln, false); + v = mdopen(reln, EXTENSION_FAIL); #ifndef LET_OS_MANAGE_FILESIZE while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) - return false; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync segment %u of relation %u/%u/%u: %m", + v->mdfd_segno, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); v = v->mdfd_chain; } #else if (FileSync(v->mdfd_vfd) < 0) - return false; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync segment %u of relation %u/%u/%u: %m", + v->mdfd_segno, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); #endif - - return true; } /* @@ -724,7 +831,7 @@ mdimmedsync(SMgrRelation reln) * This is only called during checkpoints, and checkpoints should only * occur in processes that have created a pendingOpsTable. */ -bool +void mdsync(void) { HASH_SEQ_STATUS hstat; @@ -732,7 +839,7 @@ mdsync(void) int absorb_counter; if (!pendingOpsTable) - return false; + elog(ERROR, "cannot sync without a pendingOpsTable"); /* * If we are in the bgwriter, the sync had better include all fsync @@ -795,21 +902,18 @@ mdsync(void) */ seg = _mdfd_getseg(reln, entry->segno * ((BlockNumber) RELSEG_SIZE), - true); + false, EXTENSION_RETURN_NULL); if (seg) { if (FileSync(seg->mdfd_vfd) < 0 && errno != ENOENT) - { - ereport(LOG, + ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync segment %u of relation %u/%u/%u: %m", entry->segno, entry->rnode.spcNode, entry->rnode.dbNode, entry->rnode.relNode))); - return false; - } } } @@ -818,8 +922,6 @@ mdsync(void) HASH_REMOVE, NULL) == NULL) elog(ERROR, "pendingOpsTable corrupted"); } - - return true; } /* @@ -830,11 +932,8 @@ mdsync(void) * to the background writer process. If that fails, just do the fsync * locally before returning (we expect this will not happen often enough * to be a performance problem). - * - * A false result implies I/O failure during local fsync. errno will be - * valid for error reporting. */ -static bool +static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg) { if (pendingOpsTable) @@ -847,17 +946,21 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg) entry.segno = seg->mdfd_segno; (void) hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL); - return true; } else { if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno)) - return true; - } + return; /* passed it off successfully */ - if (FileSync(seg->mdfd_vfd) < 0) - return false; - return true; + if (FileSync(seg->mdfd_vfd) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync segment %u of relation %u/%u/%u: %m", + seg->mdfd_segno, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); + } } /* @@ -931,7 +1034,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) v->mdfd_vfd = fd; v->mdfd_segno = segno; v->mdfd_chain = NULL; - Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); /* all done */ return v; @@ -940,51 +1043,66 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) /* * _mdfd_getseg() -- Find the segment of the relation holding the - * specified block. ereport's on failure. - * (Optionally, can return NULL instead of ereport for ENOENT.) + * specified block. + * + * If the segment doesn't exist, we ereport, return NULL, or create the + * segment, according to "behavior". Note: isTemp need only be correct + * in the EXTENSION_CREATE case. */ static MdfdVec * -_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound) +_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, + ExtensionBehavior behavior) { - MdfdVec *v = mdopen(reln, allowNotFound); + MdfdVec *v = mdopen(reln, behavior); #ifndef LET_OS_MANAGE_FILESIZE - BlockNumber segstogo; + BlockNumber targetseg; BlockNumber nextsegno; if (!v) - return NULL; /* only possible if allowNotFound */ + return NULL; /* only possible if EXTENSION_RETURN_NULL */ - for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1; - segstogo > 0; - nextsegno++, segstogo--) + targetseg = blkno / ((BlockNumber) RELSEG_SIZE); + for (nextsegno = 1; nextsegno <= targetseg; nextsegno++) { + Assert(nextsegno == v->mdfd_segno + 1); + if (v->mdfd_chain == NULL) { /* - * We will create the next segment only if the target block is - * within it. This prevents Sorcerer's Apprentice syndrome if a - * bug at higher levels causes us to be handed a ridiculously - * large blkno --- otherwise we could create many thousands of - * empty segment files before reaching the "target" block. We - * should never need to create more than one new segment per call, - * so this restriction seems reasonable. + * Normally we will create new segments only if authorized by + * the caller (i.e., we are doing mdextend()). But when doing + * WAL recovery, create segments anyway; this allows cases such as + * replaying WAL data that has a write into a high-numbered + * segment of a relation that was later deleted. We want to go + * ahead and create the segments so we can finish out the replay. * - * BUT: when doing WAL recovery, disable this logic and create - * segments unconditionally. In this case it seems better to - * assume the given blkno is good (it presumably came from a - * CRC-checked WAL record); furthermore this lets us cope in the - * case where we are replaying WAL data that has a write into a - * high-numbered segment of a relation that was later deleted. We - * want to go ahead and create the segments so we can finish out - * the replay. + * We have to maintain the invariant that segments before the + * last active segment are of size RELSEG_SIZE; therefore, pad + * them out with zeroes if needed. (This only matters if caller + * is extending the relation discontiguously, but that can happen + * in hash indexes.) */ - v->mdfd_chain = _mdfd_openseg(reln, - nextsegno, - (segstogo == 1 || InRecovery) ? O_CREAT : 0); + if (behavior == EXTENSION_CREATE || InRecovery) + { + if (_mdnblocks(reln, v) < RELSEG_SIZE) + { + char *zerobuf = palloc0(BLCKSZ); + + mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, + zerobuf, isTemp); + pfree(zerobuf); + } + v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT); + } + else + { + /* We won't create segment if not existent */ + v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0); + } if (v->mdfd_chain == NULL) { - if (allowNotFound && errno == ENOENT) + if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT) return NULL; ereport(ERROR, (errcode_for_file_access(), @@ -1007,12 +1125,19 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound) * Get number of blocks present in a single disk file */ static BlockNumber -_mdnblocks(File file, Size blcksz) +_mdnblocks(SMgrRelation reln, MdfdVec *seg) { long len; - len = FileSeek(file, 0L, SEEK_END); + len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END); if (len < 0) - return 0; /* on failure, assume file is empty */ - return (BlockNumber) (len / blcksz); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m", + seg->mdfd_segno, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode))); + /* note that this calculation will ignore any partial block at EOF */ + return (BlockNumber) (len / BLCKSZ); } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 0ceb800b363eae6329ddd809a1ab943f2e13739b..1a3a00f2951fc4f34d6b12635056fda642fab194 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.101 2006/10/04 00:29:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.102 2007/01/03 18:11:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,30 +31,33 @@ /* * This struct of function pointers defines the API between smgr.c and * any individual storage manager module. Note that smgr subfunctions are - * generally expected to return TRUE on success, FALSE on error. (For - * nblocks and truncate we instead say that returning InvalidBlockNumber - * indicates an error.) + * generally expected to report problems via elog(ERROR). An exception is + * that smgr_unlink should use elog(WARNING), rather than erroring out, + * because we normally unlink relations during post-commit/abort cleanup, + * and so it's too late to raise an error. Also, various conditions that + * would normally be errors should be allowed during bootstrap and/or WAL + * recovery --- see comments in md.c for details. */ typedef struct f_smgr { - bool (*smgr_init) (void); /* may be NULL */ - bool (*smgr_shutdown) (void); /* may be NULL */ - bool (*smgr_close) (SMgrRelation reln); - bool (*smgr_create) (SMgrRelation reln, bool isRedo); - bool (*smgr_unlink) (RelFileNode rnode, bool isRedo); - bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, + void (*smgr_init) (void); /* may be NULL */ + void (*smgr_shutdown) (void); /* may be NULL */ + void (*smgr_close) (SMgrRelation reln); + void (*smgr_create) (SMgrRelation reln, bool isRedo); + void (*smgr_unlink) (RelFileNode rnode, bool isRedo); + void (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); - bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, + void (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, + void (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); BlockNumber (*smgr_nblocks) (SMgrRelation reln); - BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks, - bool isTemp); - bool (*smgr_immedsync) (SMgrRelation reln); - bool (*smgr_commit) (void); /* may be NULL */ - bool (*smgr_abort) (void); /* may be NULL */ - bool (*smgr_sync) (void); /* may be NULL */ + void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks, + bool isTemp); + void (*smgr_immedsync) (SMgrRelation reln); + void (*smgr_commit) (void); /* may be NULL */ + void (*smgr_abort) (void); /* may be NULL */ + void (*smgr_sync) (void); /* may be NULL */ } f_smgr; @@ -152,12 +155,7 @@ smgrinit(void) for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_init) - { - if (!(*(smgrsw[i].smgr_init)) ()) - elog(FATAL, "smgr initialization failed on %s: %m", - DatumGetCString(DirectFunctionCall1(smgrout, - Int16GetDatum(i)))); - } + (*(smgrsw[i].smgr_init)) (); } /* register the shutdown proc */ @@ -175,12 +173,7 @@ smgrshutdown(int code, Datum arg) for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_shutdown) - { - if (!(*(smgrsw[i].smgr_shutdown)) ()) - elog(FATAL, "smgr shutdown failed on %s: %m", - DatumGetCString(DirectFunctionCall1(smgrout, - Int16GetDatum(i)))); - } + (*(smgrsw[i].smgr_shutdown)) (); } } @@ -256,13 +249,7 @@ smgrclose(SMgrRelation reln) { SMgrRelation *owner; - if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not close relation %u/%u/%u: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + (*(smgrsw[reln->smgr_which].smgr_close)) (reln); owner = reln->smgr_owner; @@ -354,13 +341,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) reln->smgr_rnode.dbNode, isRedo); - if (!(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create relation %u/%u/%u: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo); if (isRedo) return; @@ -482,38 +463,26 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) /* * And delete the physical files. * - * Note: we treat deletion failure as a WARNING, not an error, because - * we've already decided to commit or abort the current xact. + * Note: smgr_unlink must treat deletion failure as a WARNING, not an + * ERROR, because we've already decided to commit or abort the current + * xact. */ - if (!(*(smgrsw[which].smgr_unlink)) (rnode, isRedo)) - ereport(WARNING, - (errcode_for_file_access(), - errmsg("could not remove relation %u/%u/%u: %m", - rnode.spcNode, - rnode.dbNode, - rnode.relNode))); + (*(smgrsw[which].smgr_unlink)) (rnode, isRedo); } /* * smgrextend() -- Add a new block to a file. * - * The semantics are basically the same as smgrwrite(): write at the - * specified position. However, we are expecting to extend the - * relation (ie, blocknum is the current EOF), and so in case of - * failure we clean up by truncating. + * The semantics are nearly the same as smgrwrite(): write at the + * specified position. However, this is to be used for the case of + * extending a relation (i.e., blocknum is at or beyond the current + * EOF). Note that we assume writing a block beyond current EOF + * causes intervening file space to become filled with zeroes. */ void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) { - if (!(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, - isTemp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not extend relation %u/%u/%u: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode), - errhint("Check free disk space."))); + (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp); } /* @@ -527,19 +496,16 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - if (!(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read block %u of relation %u/%u/%u: %m", - blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer); } /* * smgrwrite() -- Write the supplied buffer out. * + * This is to be used only for updating already-existing blocks of a + * relation (ie, those before the current EOF). To extend a relation, + * use smgrextend(). + * * This is not a synchronous write -- the block is not necessarily * on disk at return, only dumped out to the kernel. However, * provisions will be made to fsync the write before the next checkpoint. @@ -551,60 +517,26 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) { - if (!(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, - isTemp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write block %u of relation %u/%u/%u: %m", - blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp); } /* * smgrnblocks() -- Calculate the number of blocks in the * supplied relation. - * - * Returns the number of blocks on success, aborts the current - * transaction on failure. */ BlockNumber smgrnblocks(SMgrRelation reln) { - BlockNumber nblocks; - - nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln); - - /* - * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would - * fail --- but that's a good thing, because it would stop us from - * extending the rel another block and having a block whose number - * actually is InvalidBlockNumber. - */ - if (nblocks == InvalidBlockNumber) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not count blocks of relation %u/%u/%u: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); - - return nblocks; + return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln); } /* * smgrtruncate() -- Truncate supplied relation to the specified number * of blocks - * - * Returns the number of blocks on success, aborts the current - * transaction on failure. */ -BlockNumber +void smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) { - BlockNumber newblks; - /* * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. @@ -619,16 +551,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks); /* Do the truncation */ - newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, - isTemp); - if (newblks == InvalidBlockNumber) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, - nblocks))); + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp); if (!isTemp) { @@ -642,7 +565,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) XLogRecData rdata; xl_smgr_truncate xlrec; - xlrec.blkno = newblks; + xlrec.blkno = nblocks; xlrec.rnode = reln->smgr_rnode; rdata.data = (char *) &xlrec; @@ -653,8 +576,6 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN, &rdata); } - - return newblks; } /* @@ -683,13 +604,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) void smgrimmedsync(SMgrRelation reln) { - if (!(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not sync relation %u/%u/%u: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode))); + (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln); } @@ -843,12 +758,7 @@ smgrcommit(void) for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_commit) - { - if (!(*(smgrsw[i].smgr_commit)) ()) - elog(ERROR, "transaction commit failed on %s: %m", - DatumGetCString(DirectFunctionCall1(smgrout, - Int16GetDatum(i)))); - } + (*(smgrsw[i].smgr_commit)) (); } } @@ -863,12 +773,7 @@ smgrabort(void) for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_abort) - { - if (!(*(smgrsw[i].smgr_abort)) ()) - elog(ERROR, "transaction abort failed on %s: %m", - DatumGetCString(DirectFunctionCall1(smgrout, - Int16GetDatum(i)))); - } + (*(smgrsw[i].smgr_abort)) (); } } @@ -883,12 +788,7 @@ smgrsync(void) for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_sync) - { - if (!(*(smgrsw[i].smgr_sync)) ()) - elog(ERROR, "storage sync failed on %s: %m", - DatumGetCString(DirectFunctionCall1(smgrout, - Int16GetDatum(i)))); - } + (*(smgrsw[i].smgr_sync)) (); } } @@ -910,7 +810,6 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record); SMgrRelation reln; - BlockNumber newblks; reln = smgropen(xlrec->rnode); @@ -931,17 +830,9 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) FreeSpaceMapTruncateRel(&reln->smgr_rnode, xlrec->blkno); /* Do the truncation */ - newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, - xlrec->blkno, - false); - if (newblks == InvalidBlockNumber) - ereport(WARNING, - (errcode_for_file_access(), - errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, - xlrec->blkno))); + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, + xlrec->blkno, + false); /* Also tell xlogutils.c about it */ XLogTruncateRelation(xlrec->rnode, xlrec->blkno); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index b768a5b53044d766e7751cb6946cc849701f3e30..1c8963ec21208f2e351ed92406a539faad3c4aa2 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.55 2006/03/24 04:32:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.56 2007/01/03 18:11:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,7 +72,7 @@ extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer); extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); extern BlockNumber smgrnblocks(SMgrRelation reln); -extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks, +extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp); extern void smgrimmedsync(SMgrRelation reln); extern void smgrDoPendingDeletes(bool isCommit); @@ -91,20 +91,19 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec); /* internals: move me elsewhere -- ay 7/94 */ /* in md.c */ -extern bool mdinit(void); -extern bool mdclose(SMgrRelation reln); -extern bool mdcreate(SMgrRelation reln, bool isRedo); -extern bool mdunlink(RelFileNode rnode, bool isRedo); -extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, +extern void mdinit(void); +extern void mdclose(SMgrRelation reln); +extern void mdcreate(SMgrRelation reln, bool isRedo); +extern void mdunlink(RelFileNode rnode, bool isRedo); +extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); -extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer); -extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, +extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); extern BlockNumber mdnblocks(SMgrRelation reln); -extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks, - bool isTemp); -extern bool mdimmedsync(SMgrRelation reln); -extern bool mdsync(void); +extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp); +extern void mdimmedsync(SMgrRelation reln); +extern void mdsync(void); extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);