smgr.c 11.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * smgr.c
4
 *	  public interface routines to storage manager switch.
5
 *
6 7
 *	  All file system operations in POSTGRES dispatch through these
 *	  routines.
8
 *
B
Add:  
Bruce Momjian 已提交
9 10
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
11 12 13
 *
 *
 * IDENTIFICATION
V
WAL  
Vadim B. Mikheev 已提交
14
 *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.42 2000/10/28 16:20:57 vadim Exp $
15 16 17 18 19
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

M
Marc G. Fournier 已提交
20
#include "storage/smgr.h"
21

22
static void smgrshutdown(void);
23 24 25

typedef struct f_smgr
{
26 27
	int			(*smgr_init) (void);	/* may be NULL */
	int			(*smgr_shutdown) (void);		/* may be NULL */
28 29 30 31 32 33
	int			(*smgr_create) (Relation reln);
	int			(*smgr_unlink) (Relation reln);
	int			(*smgr_extend) (Relation reln, char *buffer);
	int			(*smgr_open) (Relation reln);
	int			(*smgr_close) (Relation reln);
	int			(*smgr_read) (Relation reln, BlockNumber blocknum,
34
										  char *buffer);
35
	int			(*smgr_write) (Relation reln, BlockNumber blocknum,
36
										   char *buffer);
37
	int			(*smgr_flush) (Relation reln, BlockNumber blocknum,
38
										   char *buffer);
39 40
	int			(*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno, 
										char *buffer, bool dofsync);
41
	int			(*smgr_markdirty) (Relation reln, BlockNumber blkno);
42
	int			(*smgr_blindmarkdirty) (RelFileNode, BlockNumber blkno);
43 44
	int			(*smgr_nblocks) (Relation reln);
	int			(*smgr_truncate) (Relation reln, int nblocks);
45 46
	int			(*smgr_commit) (void);	/* may be NULL */
	int			(*smgr_abort) (void);	/* may be NULL */
V
WAL  
Vadim B. Mikheev 已提交
47 48 49
#ifdef XLOG
	int			(*smgr_sync) (void);
#endif
50
} f_smgr;
51 52

/*
53 54
 *	The weird placement of commas in this init block is to keep the compiler
 *	happy, regardless of what storage managers we have (or don't have).
55 56
 */

57
static f_smgr smgrsw[] = {
58

59 60
	/* magnetic disk */
	{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
61
		mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty,
V
WAL  
Vadim B. Mikheev 已提交
62 63 64
#ifdef XLOG
	mdnblocks, mdtruncate, mdcommit, mdabort, mdsync},
#else
65
	mdnblocks, mdtruncate, mdcommit, mdabort},
V
WAL  
Vadim B. Mikheev 已提交
66
#endif
67

68
#ifdef STABLE_MEMORY_STORAGE
69 70
	/* main memory */
	{mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
71 72
		mmread, mmwrite, mmflush, mmblindwrt, mmmarkdirty, mmblindmarkdirty,
	mmnblocks, NULL, mmcommit, mmabort},
73

74
#endif
75 76 77
};

/*
78 79 80 81
 *	This array records which storage managers are write-once, and which
 *	support overwrite.	A 'true' entry means that the storage manager is
 *	write-once.  In the best of all possible worlds, there would be no
 *	write-once storage managers.
82 83
 */

84
#ifdef NOT_USED
85
static bool smgrwo[] = {
86
	false,						/* magnetic disk */
87
#ifdef STABLE_MEMORY_STORAGE
88
	false,						/* main memory */
89
#endif
90
};
B
Bruce Momjian 已提交
91

92 93
#endif

94
static int	NSmgr = lengthof(smgrsw);
95 96

/*
97 98
 *	smgrinit(), smgrshutdown() -- Initialize or shut down all storage
 *								  managers.
99 100 101 102 103
 *
 */
int
smgrinit()
{
104
	int			i;
105 106 107 108 109 110

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_init)
		{
			if ((*(smgrsw[i].smgr_init)) () == SM_FAIL)
111
				elog(FATAL, "initialization failed on %s: %m",
112 113
					 DatumGetCString(DirectFunctionCall1(smgrout,
														 Int16GetDatum(i))));
114
		}
115 116
	}

117
	/* register the shutdown proc */
118
	on_proc_exit(smgrshutdown, 0);
119

120
	return SM_SUCCESS;
121 122
}

123
static void
124
smgrshutdown(void)
125
{
126
	int			i;
127 128 129 130 131 132

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_shutdown)
		{
			if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL)
133
				elog(FATAL, "shutdown failed on %s: %m",
134 135
					 DatumGetCString(DirectFunctionCall1(smgrout,
														 Int16GetDatum(i))));
136
		}
137 138 139 140
	}
}

/*
141
 *	smgrcreate() -- Create a new relation.
142
 *
143 144
 *		This routine takes a reldesc, creates the relation on the appropriate
 *		device, and returns a file descriptor for it.
145 146 147 148
 */
int
smgrcreate(int16 which, Relation reln)
{
149
	int			fd;
150

151
	if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
152
		elog(ERROR, "cannot create %s: %m", RelationGetRelationName(reln));
153

154
	return fd;
155 156 157
}

/*
158
 *	smgrunlink() -- Unlink a relation.
159
 *
160
 *		The relation is removed from the store.
161 162 163 164
 */
int
smgrunlink(int16 which, Relation reln)
{
165
	int			status;
166

167
	if ((status = (*(smgrsw[which].smgr_unlink)) (reln)) == SM_FAIL)
168
		elog(ERROR, "cannot unlink %s: %m", RelationGetRelationName(reln));
169

170
	return status;
171 172 173
}

/*
174
 *	smgrextend() -- Add a new block to a file.
175
 *
176 177
 *		Returns SM_SUCCESS on success; aborts the current transaction on
 *		failure.
178 179 180 181
 */
int
smgrextend(int16 which, Relation reln, char *buffer)
{
182
	int			status;
183

184
	status = (*(smgrsw[which].smgr_extend)) (reln, buffer);
185

186
	if (status == SM_FAIL)
187
		elog(ERROR, "cannot extend %s: %m.\n\tCheck free disk space.",
188
			 RelationGetRelationName(reln));
189

190
	return status;
191 192 193
}

/*
194
 *	smgropen() -- Open a relation using a particular storage manager.
195
 *
196 197
 *		Returns the fd for the open relation on success, aborts the
 *		transaction on failure.
198 199 200 201
 */
int
smgropen(int16 which, Relation reln)
{
202
	int			fd;
203

204 205
	if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0 &&
		!reln->rd_unlinked)
206
		elog(ERROR, "cannot open %s: %m", RelationGetRelationName(reln));
207

208
	return fd;
209 210 211
}

/*
212
 *	smgrclose() -- Close a relation.
213
 *
214 215 216 217 218
 *		NOTE: underlying manager should allow case where relation is
 *		already closed.  Indeed relation may have been unlinked!
 *		This is currently called only from RelationFlushRelation() when
 *		the relation cache entry is about to be dropped; could be doing
 *		simple relation cache clear, or finishing up DROP TABLE.
219
 *
220
 *		Returns SM_SUCCESS on success, aborts on failure.
221 222 223 224
 */
int
smgrclose(int16 which, Relation reln)
{
225
	if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL)
226
		elog(ERROR, "cannot close %s: %m", RelationGetRelationName(reln));
227

228
	return SM_SUCCESS;
229 230 231
}

/*
232 233
 *	smgrread() -- read a particular block from a relation into the supplied
 *				  buffer.
234
 *
235 236 237 238 239
 *		This routine is called from the buffer manager in order to
 *		instantiate pages in the shared buffer cache.  All storage managers
 *		return pages in the format that POSTGRES expects.  This routine
 *		dispatches the read.  On success, it returns SM_SUCCESS.  On failure,
 *		the current transaction is aborted.
240 241 242 243
 */
int
smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
244
	int			status;
245

246
	status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer);
247

248
	if (status == SM_FAIL)
249
		elog(ERROR, "cannot read block %d of %s: %m",
250
			 blocknum, RelationGetRelationName(reln));
251

252
	return status;
253 254 255
}

/*
256
 *	smgrwrite() -- Write the supplied buffer out.
257
 *
258 259 260 261
 *		This is not a synchronous write -- the interface for that is
 *		smgrflush().  The buffer is written out via the appropriate
 *		storage manager.  This routine returns SM_SUCCESS or aborts
 *		the current transaction.
262 263 264 265
 */
int
smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
266
	int			status;
267

268
	status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer);
269

270
	if (status == SM_FAIL)
271
		elog(ERROR, "cannot write block %d of %s: %m",
272
			 blocknum, RelationGetRelationName(reln));
273

274
	return status;
275 276 277
}

/*
278
 *	smgrflush() -- A synchronous smgrwrite().
279 280 281 282
 */
int
smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
283
	int			status;
284

285
	status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer);
286

287
	if (status == SM_FAIL)
288
		elog(ERROR, "cannot flush block %d of %s to stable store: %m",
289
			 blocknum, RelationGetRelationName(reln));
290

291
	return status;
292 293 294
}

/*
295
 *	smgrblindwrt() -- Write a page out blind.
296
 *
297 298 299 300 301 302 303
 *		In some cases, we may find a page in the buffer cache that we
 *		can't make a reldesc for.  This happens, for example, when we
 *		want to reuse a dirty page that was written by a transaction
 *		that has not yet committed, which created a new relation.  In
 *		this case, the buffer manager will call smgrblindwrt() with
 *		the name and OID of the database and the relation to which the
 *		buffer belongs.  Every storage manager must be able to force
304
 *		this page down to stable storage in this circumstance.	The
305
 *		write should be synchronous if dofsync is true.
306
 */
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
int
smgrblindwrt(int16 which,
			 RelFileNode rnode,
			 BlockNumber blkno,
			 char *buffer,
			 bool dofsync)
{
	int			status;

	status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer, dofsync);

	if (status == SM_FAIL)
		elog(ERROR, "cannot write block %d of %u/%u blind: %m",
			 blkno, rnode.tblNode, rnode.relNode);

	return status;
}

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
/*
 *	smgrmarkdirty() -- Mark a page dirty (needs fsync).
 *
 *		Mark the specified page as needing to be fsync'd before commit.
 *		Ordinarily, the storage manager will do this implicitly during
 *		smgrwrite().  However, the buffer manager may discover that some
 *		other backend has written a buffer that we dirtied in the current
 *		transaction.  In that case, we still need to fsync the file to be
 *		sure the page is down to disk before we commit.
 */
int
smgrmarkdirty(int16 which,
			  Relation reln,
			  BlockNumber blkno)
{
	int			status;

	status = (*(smgrsw[which].smgr_markdirty)) (reln, blkno);

	if (status == SM_FAIL)
345
		elog(ERROR, "cannot mark block %d of %s: %m",
346 347 348 349 350 351 352 353 354 355
			 blkno, RelationGetRelationName(reln));

	return status;
}

/*
 *	smgrblindmarkdirty() -- Mark a page dirty, "blind".
 *
 *		Just like smgrmarkdirty, except we don't have a reldesc.
 */
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
int
smgrblindmarkdirty(int16 which,
				   RelFileNode rnode,
				   BlockNumber blkno)
{
	int			status;

	status = (*(smgrsw[which].smgr_blindmarkdirty)) (rnode, blkno);

	if (status == SM_FAIL)
		elog(ERROR, "cannot mark block %d of %u/%u blind: %m",
			 blkno, rnode.tblNode, rnode.relNode);

	return status;
}

372
/*
373 374
 *	smgrnblocks() -- Calculate the number of POSTGRES blocks in the
 *					 supplied relation.
375
 *
376 377
 *		Returns the number of blocks on success, aborts the current
 *		transaction on failure.
378 379 380 381
 */
int
smgrnblocks(int16 which, Relation reln)
{
382
	int			nblocks;
383

384
	if ((nblocks = (*(smgrsw[which].smgr_nblocks)) (reln)) < 0)
385
		elog(ERROR, "cannot count blocks for %s: %m",
386
			 RelationGetRelationName(reln));
387

388
	return nblocks;
389 390
}

391
/*
392 393
 *	smgrtruncate() -- Truncate supplied relation to a specified number
 *						of blocks
394
 *
395 396
 *		Returns the number of blocks on success, aborts the current
 *		transaction on failure.
397 398 399 400
 */
int
smgrtruncate(int16 which, Relation reln, int nblocks)
{
401
	int			newblks;
402 403 404 405 406

	newblks = nblocks;
	if (smgrsw[which].smgr_truncate)
	{
		if ((newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks)) < 0)
407
			elog(ERROR, "cannot truncate %s to %d blocks: %m",
408
				 RelationGetRelationName(reln), nblocks);
409 410
	}

411
	return newblks;
412 413
}

414
/*
415 416
 *	smgrcommit(), smgrabort() -- Commit or abort changes made during the
 *								 current transaction.
417 418 419 420
 */
int
smgrcommit()
{
421
	int			i;
422 423 424 425 426 427

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_commit)
		{
			if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL)
428
				elog(FATAL, "transaction commit failed on %s: %m",
429 430
					 DatumGetCString(DirectFunctionCall1(smgrout,
														 Int16GetDatum(i))));
431
		}
432 433
	}

434
	return SM_SUCCESS;
435 436 437 438 439
}

int
smgrabort()
{
440
	int			i;
441 442 443 444 445 446

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_abort)
		{
			if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL)
447
				elog(FATAL, "transaction abort failed on %s: %m",
448 449
					 DatumGetCString(DirectFunctionCall1(smgrout,
														 Int16GetDatum(i))));
450
		}
451 452
	}

453
	return SM_SUCCESS;
454
}
455

V
WAL  
Vadim B. Mikheev 已提交
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
#ifdef XLOG
int
smgrsync()
{
	int			i;

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_sync)
		{
			if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL)
				elog(STOP, "storage sync failed on %s: %m",
					 DatumGetCString(DirectFunctionCall1(smgrout,
														 Int16GetDatum(i))));
		}
	}

	return SM_SUCCESS;
}
#endif

477
#ifdef NOT_USED
478 479 480
bool
smgriswo(int16 smgrno)
{
481
	if (smgrno < 0 || smgrno >= NSmgr)
482
		elog(ERROR, "illegal storage manager number %d", smgrno);
483

484
	return smgrwo[smgrno];
485
}
B
Bruce Momjian 已提交
486

487
#endif
V
WAL  
Vadim B. Mikheev 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510

#ifdef XLOG
#include "access/xlog.h"

void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
void smgr_undo(XLogRecPtr lsn, XLogRecord *record);
void smgr_desc(char *buf, uint8 xl_info, char* rec);

void
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
{
}

void
smgr_undo(XLogRecPtr lsn, XLogRecord *record)
{
}
 
void
smgr_desc(char *buf, uint8 xl_info, char* rec)
{
}
#endif