From 6b9d44c96c809c2e77be83baec2ab8cb0426041a Mon Sep 17 00:00:00 2001 From: "Ashwin Agrawal, Asim R P and Xin Zhang" Date: Thu, 16 Feb 2017 15:13:25 -0800 Subject: [PATCH] Do not initialize relcache before xlog replay. The need for heap access methods before xlog replay is removed by commit e2d6aa1481f6cdbd846d4b17b68eb4387dae9211. This commit simply moves the relcache initialization to pass4, where it is really needed. Do not bother to remove relcache init file at the end of crash recovery pass2. Error out if relation cache initialized at wrong time. --- src/backend/access/transam/xlog.c | 91 ++++++++++--------- src/backend/utils/cache/relcache.c | 7 ++ .../crashrecovery/test_reindex_pg_class.py | 3 - 3 files changed, 54 insertions(+), 47 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e9bcbc819f..0f573019c5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7725,6 +7725,35 @@ void StartupXLOG_Pass4(void) { bool doPTCatVerification = false; + char *fullpath; + + /* + * In order to access the catalog, we need a database, and a + * tablespace; our access to the heap is going to be slightly + * limited, so we'll just use some defaults. + */ + if (!XLogStartup_DoNextPTCatVerificationIteration()) + { + MyDatabaseId = TemplateDbOid; + MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; + } + else + { + MyDatabaseId = XLogCtl->currentDatabaseToVerify; + MyDatabaseTableSpace = XLogCtl->tablespaceOfCurrentDatabaseToVerify; + } + + /* + * Now we can mark our PGPROC entry with the database ID + * (We assume this is an atomic store so no lock is needed) + */ + MyProc->databaseId = MyDatabaseId; + + fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); + + SetDatabasePath(fullpath); + + RelationCacheInitializePhase3(); /* * Start with the basic Pass 4 integrity checks. If requested (GUC & No In-Doubt @@ -11176,8 +11205,6 @@ HandleCrash(SIGNAL_ARGS) void StartupProcessMain(int passNum) { - char *fullpath; - am_startup = true; /* * If possible, make this process a group leader, so that the postmaster @@ -11294,7 +11321,24 @@ StartupProcessMain(int passNum) */ InitBufferPoolBackend(); - /* heap access requires the rel-cache */ + /* heap access requires the rel-cache. + * + * Pass 2 uses heap API to insert/update/delete from persistent + * tables. In order to use the heap API, RelationDescriptor is + * required. In pass 2, persistent tables are accessed using + * DirectOpen API to obtain the RelationDescriptor. Hence, we + * don't need to load full relcache as in + * RelationCacheInitializePhase3(). + * + * However, there is cache invalidation logic within heap API + * needs basic data structures for catalog cache to be + * initialized. Hence, we need to do RelationCacheInitialize(), + * InitCatalogCache(), and RelationCacheInitializePhase2() + * before StartupXLOG_Pass2(). + * + * Pass 4 needs RelationCacheInitializePhase3() to do catalog + * validation, after xlog replay is complete. + */ RelationCacheInitialize(); InitCatalogCache(); @@ -11306,50 +11350,9 @@ StartupProcessMain(int passNum) */ RelationCacheInitializePhase2(); - /* - * In order to access the catalog, we need a database, and a - * tablespace; our access to the heap is going to be slightly - * limited, so we'll just use some defaults. - */ - if (!XLogStartup_DoNextPTCatVerificationIteration()) - { - MyDatabaseId = TemplateDbOid; - MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; - } - else - { - MyDatabaseId = XLogCtl->currentDatabaseToVerify; - MyDatabaseTableSpace = XLogCtl->tablespaceOfCurrentDatabaseToVerify; - } - - /* - * Now we can mark our PGPROC entry with the database ID */ - /* (We assume this is an atomic store so no lock is needed) - */ - MyProc->databaseId = MyDatabaseId; - - fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); - - SetDatabasePath(fullpath); - - RelationCacheInitializePhase3(); - if (passNum == 2) { StartupXLOG_Pass2(); - /* - * The cache init file created by - * RelationCacheInitializePhase3() contains critical - * relations and index entries from template1 database. - * XLOG replay in pass 3 may invalidate these entries, - * e.g. redo records for reindex operation on a system - * table in template1. Therefore, delete the file now - * and let pass 4 rebuild it. Note that pass 3 does not - * need relcache to operate as it uses resource manager - * redo (rm_redo()) methods to replay xlog rather than - * regular access methods. - */ - RelationCacheInitFileRemove(); } else { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 16b5764d6d..41f28b0df7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3349,6 +3349,13 @@ RelationCacheInitializePhase3(void) MemoryContext oldcxt; bool needNewCacheFile = !criticalSharedRelcachesBuilt; + /* + * Relation cache initialization or any sort of heap access is + * dangerous before recovery is finished. + */ + if (!IsBootstrapProcessingMode() && RecoveryInProgress()) + elog(ERROR, "relation cache initialization during recovery or non-bootstrap processes."); + /* * switch to cache memory context */ diff --git a/src/test/tinc/tincrepo/mpp/gpdb/tests/storage/crashrecovery/test_reindex_pg_class.py b/src/test/tinc/tincrepo/mpp/gpdb/tests/storage/crashrecovery/test_reindex_pg_class.py index 0a0523300b..62df632d3c 100644 --- a/src/test/tinc/tincrepo/mpp/gpdb/tests/storage/crashrecovery/test_reindex_pg_class.py +++ b/src/test/tinc/tincrepo/mpp/gpdb/tests/storage/crashrecovery/test_reindex_pg_class.py @@ -72,9 +72,6 @@ class ReindexPgClass(TINCTestCase): def test_reindex_pg_class_template1(self): """Test that relcache does not contain stale refilenodes after crash recovery. - - This used to happen before we started deleting relcache init - file at the end of crash recovery pass 2. """ self.dbname = "template1" self.test_reindex_pg_class() -- GitLab