提交 73434db2 编写于 作者: D Daniel Gustafsson

Ensure preassigned Oids aren't reused during upgrade

During an upgrade, there are numerous new objects created in the new
cluster which either doesn't have an Oids preallocated from the old,
or which never existed in the old cluster to begin with. These objects
are assigned Oids in the new cluster which may collide with Oids which
are preassigned from the old cluster, but which hasn't yet been used
in the restore process. This is because the restore process is doing
the preallocation just before the object creation.

To avoid collisions, the Oids which will be preassigned are tracked
during schema dump, and are injected into the Oid dispatch machinery
before any object is restored such that the list can be queried. This
requires a new mode of dumping in pg_dump where an object is recorded
first in the dependency chain but is dumped last. To support this, a
new API for amending ArchiveEntry objects has been added. Currently
it only supports changing the definition but it can easily be extended
to cover future usecases.

The performance of this patch is a TODO, passing all the Oids in an
array in a single call is unlikely to scale to real-world scenarios
but it's better to subject this to a wider audience sooner rather
than later.

Also fix up since-long outdated documentation comments in oid_dispatch.c
and some minor style nits while in there.
Reviewed-by: NJacob Champion <pchampion@pivotal.io>
上级 ab3dc0ff
......@@ -97,6 +97,12 @@ install_support_functions_in_new_db(const char *db_name)
"RETURNS VOID "
"AS '$libdir/pg_upgrade_support' "
"LANGUAGE C STRICT;"));
PQclear(executeQueryOrDie(conn,
"CREATE OR REPLACE FUNCTION "
"binary_upgrade.set_preassigned_oids(oid[]) "
"RETURNS VOID "
"AS '$libdir/pg_upgrade_support' "
"LANGUAGE C STRICT;"));
PQfinish(conn);
}
......
......@@ -11,6 +11,7 @@
#include "postgres.h"
#include "fmgr.h"
#include "access/transam.h"
#include "catalog/binary_upgrade.h"
#include "catalog/namespace.h"
#include "catalog/oid_dispatch.h"
......@@ -51,6 +52,8 @@ PG_FUNCTION_INFO_V1(create_empty_extension);
PG_FUNCTION_INFO_V1(set_next_pg_namespace_oid);
PG_FUNCTION_INFO_V1(set_preassigned_oids);
Datum
set_next_pg_type_oid(PG_FUNCTION_ARGS)
{
......@@ -229,3 +232,24 @@ set_next_pg_namespace_oid(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
Datum
set_preassigned_oids(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
Datum *oids;
int nelems;
int i;
deconstruct_array(array, OIDOID, sizeof(Oid), true, 'i',
&oids, NULL, &nelems);
for (i = 0; i < nelems; i++)
{
Datum oid = DatumGetObjectId(oids[i]);
MarkOidPreassignedFromBinaryUpgrade(oid);
}
PG_RETURN_VOID();
}
......@@ -45,24 +45,18 @@
* nodes when the DDL command is dispatched, and for the QE nodes to use the
* same, pre-assigned, OIDs for the objects.
*
* This same mechanism can be used to preserve OIDs during pg_upgrade. In
* PostgreSQL, pg_upgrade only needs to preserve the OIDs of a few objects,
* like types, but in GPDB we need to preserve most OIDs, because they need
* to be kept in sync between the nodes. (Strictly speaking, we only need to
* ensure that all the nodes use the same OIDs in the upgraded clusters, but
* they wouldn't need to be the same as before upgrade. However, the most
* straightforward way to achieve that is to use the same OIDs as before
* upgrade.)
* This same mechanism is used to preserve OIDs when upgrading a GPDB cluster
* using pg_upgrade. pg_upgrade in PostgreSQL is using a set of global vars to
* communicate the next OID for an object during upgrade, a strategy GPDB
* doesn't employ due to the need for multiple OIDs for auxiliary objects.
* pg_upgrade records the OIDs from the old cluster and inserts them into the
* same 'preassigned_oids' list to restore them, that we use to assign specific
* OIDs in a QE node at dispatch. Additionally, to ensure that object creation
* that isn't bound by preassigned OIDs isn't consuming an OID that will later
* in the restore process be preassigned, a separate list of all such OIDs is
* maintained and queried before assigning a new non-preassigned OID.
*
* pg_upgrade has its own mechanism to record the OIDs from the old cluster
* but when restoring the schema in the new cluster, it uses the same
* 'preassigned_oids' list to restore them, that we use to assign specific
* OIDs in a QE node at dispatch.
*
* (XXX: All the pg_upgrade code described above is to-be-done, as of
* this writing),
*
* Portions Copyright 2016 Pivotal Software, Inc.
* Portions Copyright 2016-Present Pivotal Software, Inc.
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
......@@ -148,6 +142,12 @@ static List *preassigned_oids = NIL;
*/
static List *dispatch_oids = NIL;
/*
* These will be used by the schema restoration process during binary upgrade,
* so any new object must not use any Oid on this list or else there will be
* collisions.
*/
static List *binary_upgrade_preassigned_oids = NIL;
/*
* Create an OidAssignment struct, for a catalog table tuple.
......@@ -449,7 +449,7 @@ CreateKeyFromCatalogTuple(Relation catalogrel, HeapTuple tuple,
*exempt = true;
break;
/* Event triggers are only stored and fired in the QD. */
/* Event triggers are only stored and fired in the QD. */
case EventTriggerRelationId:
*exempt = true;
break;
......@@ -465,12 +465,12 @@ CreateKeyFromCatalogTuple(Relation catalogrel, HeapTuple tuple,
break;
/*
* These objects need to have their OIDs synchronized, but there is bespoken
* code to deal with it.
* These objects need to have their OIDs synchronized, but there is
* bespoken code to deal with it.
*/
case TriggerRelationId:
*exempt = true;
break;
break;
default:
*recognized = false;
......@@ -632,10 +632,11 @@ GetPreassignedOidForTuple(Relation catalogrel, HeapTuple tuple)
if ((oid = GetPreassignedOid(&searchkey)) == InvalidOid)
{
/*
* When binary-upgrading the QD node, we must preserve the OIDs of
* types, relations and enums from the old cluster, so we should have
* pre-assigned OIDs for them. For now we don't enforce the OIDs for
* these objects here, consider that a future TODO.
* During normal operation, all OIDs are preassigned unless the object
* type is exempt (in which case we should never reach here). During
* upgrades we do however allow objects to be created with new OIDs
* since objects may be created in new cluster which didn't exist in
* the old cluster.
*/
if (!IsBinaryUpgrade)
elog(ERROR, "no pre-assigned OID for %s tuple \"%s\" (namespace:%u keyOid1:%u keyOid2:%u)",
......@@ -734,6 +735,36 @@ GetPreassignedOidForType(Oid namespaceOid, const char *typname,
* ----------------------------------------------------------------
*/
/*
* Remember an Oid which will be used in schema restoration during binary
* upgrade, such that we can prohibit any new object to consume Oids which
* will lead to collision.
*/
void
MarkOidPreassignedFromBinaryUpgrade(Oid oid)
{
MemoryContext oldcontext;
if (!IsBinaryUpgrade)
elog(ERROR, "MarkOidPreassignedFromBinaryUpgrade called, but not in binary upgrade mode");
if (oid == InvalidOid)
return;
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
/*
* A list is hardly the best data structure for this as the number of OIDs
* kept here can be quite high for a large schema. Implementing a better
* store which enables quick lookups is a TODO for now.
*/
binary_upgrade_preassigned_oids =
lappend_oid(binary_upgrade_preassigned_oids, oid);
MemoryContextSwitchTo(oldcontext);
}
/*
* Remember an OID which is set from loading a database dump performed
* using the binary-upgrade flag.
......@@ -936,5 +967,5 @@ IsOidAcceptable(Oid oid)
return false;
}
return true;
return !(list_member_oid(binary_upgrade_preassigned_oids, oid));
}
......@@ -50,6 +50,7 @@ static DumpableObject **oprinfoindex;
static DumpableObject **collinfoindex;
static DumpableObject **nspinfoindex;
static DumpableObject **extinfoindex;
static DumpableObject **binaryupgradeinfoindex;
static int numTables;
static int numTypes;
static int numFuncs;
......@@ -80,7 +81,7 @@ static int strInArray(const char *pattern, char **arr, int arr_size);
* Collect information about all potentially dumpable objects
*/
TableInfo *
getSchemaData(Archive *fout, int *numTablesPtr)
getSchemaData(Archive *fout, int *numTablesPtr, int binary_upgrade)
{
TableInfo *tblinfo;
TypeInfo *typinfo;
......@@ -110,6 +111,17 @@ getSchemaData(Archive *fout, int *numTablesPtr)
/* GPDB specific variables */
int numExtProtocols;
if (binary_upgrade)
{
BinaryUpgradeInfo *binfo;
if (g_verbose)
write_msg(NULL, "identifying required binary upgrade calls\n");
binfo = getBinaryUpgradeObjects();
binaryupgradeinfoindex = buildIndexArray(binfo, 1, sizeof(BinaryUpgradeInfo));
}
/*
* We must read extensions and extension membership info first, because
* extension membership needs to be consultable during decisions about
......
......@@ -181,6 +181,8 @@ extern void ArchiveEntry(Archive *AHX,
const DumpId *deps, int nDeps,
DataDumperPtr dumpFn, void *dumpArg);
extern void AmendArchiveEntry(Archive *AHX, DumpId dumpId, const char *defn);
/* Called to write *data* to the archive */
extern void WriteData(Archive *AH, const void *data, size_t dLen);
......
......@@ -875,6 +875,37 @@ WriteData(Archive *AHX, const void *data, size_t dLen)
return;
}
/*
* Amend an existing TOC entry by changing its definition. This can be used
* in situations where the TOC entry must be restored first, but dumped last.
* By first issuing the ArchiveEntry() to create a TOC with a placeholder
* defn, the defn can be updated with the actual contents later using this.
* The current usecase is binary upgrade Oid preassignment where we need to
* restore the preassignments before any object that allocate Oids has been
* created, but the definition of the preassignments can only be dumped last
* when we've seen all the relevant Oids.
*/
void
AmendArchiveEntry(Archive *AHX, DumpId dumpId, const char *defn)
{
ArchiveHandle *AH = (ArchiveHandle *) AHX;
TocEntry *toc = AH->toc;
while (toc)
{
if (toc->dumpId == dumpId)
{
if (toc->defn)
pg_free(toc->defn);
toc->defn = pg_strdup(defn);
return;
}
toc = toc->next;
}
}
/*
* Create a new TOC entry. The TOC was designed as a TOC, but is now the
* repository for all metadata. But the name has stuck.
......
......@@ -130,6 +130,8 @@ static SimpleStringList relid_string_list = {NULL, NULL};
static SimpleStringList funcid_string_list = {NULL, NULL};
static SimpleOidList function_include_oids = {NULL, NULL};
static SimpleOidList preassigned_oids = {NULL, NULL};
/* default, if no "inclusion" switches appear, is to dump everything */
static bool include_everything = true;
......@@ -160,6 +162,8 @@ static bool gp_partitioning_available = false;
/* flag indicating whether or not this GP database supports column encoding */
static bool gp_attribute_encoding_available = false;
static DumpId binary_upgrade_dumpid;
static void help(const char *progname);
static void setup_connection(Archive *AH, const char *dumpencoding,
char *use_role);
......@@ -267,6 +271,8 @@ static char *getFormattedTypeName(Archive *fout, Oid oid, OidOptions opts);
static void getBlobs(Archive *fout);
static void dumpBlob(Archive *fout, BlobInfo *binfo);
static int dumpBlobs(Archive *fout, void *arg);
static void dumpPreassignedOidArchiveEntry(Archive *fout, BinaryUpgradeInfo *binfo);
static void dumpPreassignedOidDefinition(Archive *fout, BinaryUpgradeInfo *binfo);
static void dumpDatabase(Archive *AH);
static void dumpEncoding(Archive *AH);
static void dumpStdStrings(Archive *AH);
......@@ -990,7 +996,7 @@ main(int argc, char **argv)
* Now scan the database and create DumpableObject structs for all the
* objects we intend to dump.
*/
tblinfo = getSchemaData(fout, &numTables);
tblinfo = getSchemaData(fout, &numTables, binary_upgrade);
if (fout->remoteVersion < 80400)
guessConstraintInheritance(tblinfo, numTables);
......@@ -1058,9 +1064,18 @@ main(int argc, char **argv)
if (include_everything && !dataOnly)
dumpDatabase(fout);
int binfo_index = -1;
/* Now the rearrangeable objects. */
for (i = 0; i < numObjs; i++)
{
dumpDumpableObject(fout, dobjs[i]);
if (dobjs[i]->objType == DO_BINARY_UPGRADE)
binfo_index = i;
}
/* Amend the Oid preassignment TOC with the actual Oids gathered */
if (binary_upgrade && binfo_index >= 0)
dumpPreassignedOidDefinition(fout, (BinaryUpgradeInfo *) dobjs[binfo_index]);
/*
* Set up options info to ensure we dump what we want.
......@@ -2558,6 +2573,65 @@ guessConstraintInheritance(TableInfo *tblinfo, int numTables)
}
}
static void
dumpPreassignedOidArchiveEntry(Archive *fout, BinaryUpgradeInfo *binfo)
{
PQExpBuffer setoidquery;
CatalogId maxoidid;
setoidquery = createPQExpBuffer();
appendPQExpBufferStr(setoidquery,
"-- Placeholder for binary_upgrade.set_preassigned_oids()\n\n");
maxoidid.oid = 0;
maxoidid.tableoid = 0;
char *tag = pg_strdup("binary_upgrade");
ArchiveEntry(fout,
maxoidid, /* catalog ID */
binfo->dobj.dumpId, /* dump ID */
tag, /* Name */
NULL, /* Namespace */
NULL, /* Tablespace */
"", /* Owner */
false, /* with oids */
tag, /* Desc */
SECTION_PRE_DATA, /* Section */
setoidquery->data, /* Create */
"", /* Del */
NULL, /* Copy */
NULL, /* Deps */
0, /* # Deps */
NULL, /* Dumper */
NULL); /* Dumper Arg */
destroyPQExpBuffer(setoidquery);
free(tag);
}
static void
dumpPreassignedOidDefinition(Archive *fout, BinaryUpgradeInfo *binfo)
{
PQExpBuffer setoidquery;
SimpleOidListCell *cell;
setoidquery = createPQExpBuffer();
appendPQExpBufferStr(setoidquery,
"SELECT binary_upgrade.set_preassigned_oids(ARRAY[");
for (cell = preassigned_oids.head; cell; cell = cell->next)
{
appendPQExpBuffer(setoidquery, "%u%s",
cell->val, (cell->next ? "," : ""));
}
appendPQExpBufferStr(setoidquery, "]::pg_catalog.oid[]);\n\n");
AmendArchiveEntry(fout, binfo->dobj.dumpId, setoidquery->data);
destroyPQExpBuffer(setoidquery);
}
/*
* dumpDatabase:
......@@ -3127,6 +3201,7 @@ binary_upgrade_set_namespace_oid(Archive *fout, PQExpBuffer upgrade_buffer,
upgrade_res = ExecuteSqlQueryForSingleRow(fout, upgrade_query->data);
pg_nspname = PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "nspname"));
simple_oid_list_append(&preassigned_oids, pg_namespace_oid);
appendPQExpBuffer(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_namespace oid\n");
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_pg_namespace_oid('%u'::pg_catalog.oid, "
......@@ -3148,6 +3223,7 @@ binary_upgrade_set_type_oids_by_type_oid(Archive *fout,
Oid pg_type_array_nsoid;
char *pg_type_array_name;
simple_oid_list_append(&preassigned_oids, pg_type_oid);
appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_type oid\n");
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_pg_type_oid('%u'::pg_catalog.oid, "
......@@ -3171,6 +3247,7 @@ binary_upgrade_set_type_oids_by_type_oid(Archive *fout,
if (OidIsValid(pg_type_array_oid))
{
simple_oid_list_append(&preassigned_oids, pg_type_array_oid);
appendPQExpBufferStr(upgrade_buffer,
"\n-- For binary upgrade, must preserve pg_type array oid\n");
appendPQExpBuffer(upgrade_buffer,
......@@ -3259,6 +3336,7 @@ binary_upgrade_set_type_oids_by_rel_oid_impl(Archive *fout,
* owner's OID, but the new cluster will be using the correct name, and
* it's the new cluster's name that we have to use in preassignment.
*/
simple_oid_list_append(&preassigned_oids, pg_type_toast_oid);
appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_type toast oid\n");
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_toast_pg_type_oid('%u'::pg_catalog.oid, "
......@@ -3411,6 +3489,7 @@ binary_upgrade_set_pg_class_oids_impl(Archive *fout,
if (!is_index)
{
simple_oid_list_append(&preassigned_oids, pg_class_oid);
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_heap_pg_class_oid('%u'::pg_catalog.oid, "
"'%u'::pg_catalog.oid, $$%s$$::text);\n",
......@@ -3434,11 +3513,13 @@ binary_upgrade_set_pg_class_oids_impl(Archive *fout,
* in preassignment.
*/
simple_oid_list_append(&preassigned_oids, pg_class_reltoastrelid);
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_toast_pg_class_oid('%u'::pg_catalog.oid, '%u'::pg_catalog.oid, $$pg_toast_%u$$::text);\n",
pg_class_reltoastrelid, pg_class_reltoastnamespace, pg_class_oid);
/* every toast table has an index */
simple_oid_list_append(&preassigned_oids, pg_index_indexrelid);
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_index_pg_class_oid('%u'::pg_catalog.oid , '%u'::pg_catalog.oid, $$pg_toast_%u_index$$::text);\n",
pg_index_indexrelid, pg_class_reltoastnamespace, pg_class_oid);
......@@ -3481,6 +3562,7 @@ binary_upgrade_set_pg_class_oids_impl(Archive *fout,
}
else
{
simple_oid_list_append(&preassigned_oids, pg_class_oid);
appendPQExpBuffer(upgrade_buffer,
"SELECT binary_upgrade.set_next_index_pg_class_oid('%u'::pg_catalog.oid, '%u'::pg_catalog.oid, $$%s$$::text);\n",
pg_class_oid, pg_class_relnamespace, pg_class_relname);
......@@ -3749,6 +3831,22 @@ getExtensions(Archive *fout, int *numExtensions)
return extinfo;
}
BinaryUpgradeInfo *
getBinaryUpgradeObjects(void)
{
BinaryUpgradeInfo *binfo;
binfo = (BinaryUpgradeInfo *) pg_malloc0(sizeof(BinaryUpgradeInfo));
binfo->dobj.objType = DO_BINARY_UPGRADE;
AssignDumpId(&binfo->dobj);
binfo->dobj.name = pg_strdup("__binary_upgrade");
binary_upgrade_dumpid = binfo->dobj.dumpId;
return binfo;
}
/*
* getTypes:
* read all types in the system catalogs and return them in the
......@@ -8197,6 +8295,10 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
case DO_POST_DATA_BOUNDARY:
/* never dumped, nothing to do */
break;
case DO_BINARY_UPGRADE:
dumpPreassignedOidArchiveEntry(fout, (BinaryUpgradeInfo *) dobj);
break;
}
}
......@@ -8244,7 +8346,7 @@ dumpNamespace(Archive *fout, NamespaceInfo *nspinfo)
nspinfo->rolname,
false, "SCHEMA", SECTION_PRE_DATA,
q->data, delq->data, NULL,
NULL, 0,
&(binary_upgrade_dumpid), 1,
NULL, NULL);
/* Dump Schema Comments and Security Labels */
......@@ -16570,6 +16672,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
case DO_BLOB:
case DO_EXTPROTOCOL:
case DO_TYPE_STORAGE_OPTIONS:
case DO_BINARY_UPGRADE:
/* Pre-data objects: must come before the pre-data boundary */
addObjectDependency(preDataBound, dobj->dumpId);
break;
......
......@@ -117,7 +117,9 @@ typedef enum
DO_PRE_DATA_BOUNDARY,
DO_POST_DATA_BOUNDARY,
DO_EVENT_TRIGGER,
DO_REFRESH_MATVIEW
DO_REFRESH_MATVIEW,
DO_BINARY_UPGRADE
} DumpableObjectType;
typedef struct _dumpableObject
......@@ -134,6 +136,11 @@ typedef struct _dumpableObject
int allocDeps; /* allocated size of dependencies[] */
} DumpableObject;
typedef struct _binaryupgradeinfo
{
DumpableObject dobj;
} BinaryUpgradeInfo;
typedef struct _namespaceInfo
{
DumpableObject dobj;
......@@ -561,7 +568,7 @@ extern const char *EXT_PARTITION_NAME_POSTFIX;
struct Archive;
typedef struct Archive Archive;
extern TableInfo *getSchemaData(Archive *, int *numTablesPtr);
extern TableInfo *getSchemaData(Archive *, int *numTablesPtr, int binary_upgrade);
typedef enum _OidOptions
{
......@@ -649,6 +656,7 @@ extern EventTriggerInfo *getEventTriggers(Archive *fout, int *numEventTriggers);
/* START MPP ADDITION */
extern TypeStorageOptions *getTypeStorageOptions(Archive *fout, int *numTypes);
extern ExtProtInfo *getExtProtocols(Archive *fout, int *numExtProtocols);
extern BinaryUpgradeInfo *getBinaryUpgradeObjects(void);
extern bool testExtProtocolSupport(Archive *fout);
/* END MPP ADDITION */
......
......@@ -122,7 +122,8 @@ static const int newObjectTypePriority[] =
22, /* DO_PRE_DATA_BOUNDARY */
25, /* DO_POST_DATA_BOUNDARY */
32, /* DO_EVENT_TRIGGER */
33 /* DO_REFRESH_MATVIEW */
33, /* DO_REFRESH_MATVIEW */
1 /* DO_BINARY_UPGRADE */
};
static DumpId preDataBoundId;
......@@ -1426,6 +1427,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
"POST-DATA BOUNDARY (ID %d)",
obj->dumpId);
return;
case DO_BINARY_UPGRADE:
snprintf(buf, bufsize,
"BINARY UPGRADE (ID %d)",
obj->dumpId);
return;
}
/* shouldn't get here */
snprintf(buf, bufsize,
......
......@@ -32,6 +32,7 @@ extern Oid GetPreassignedOidForDatabase(const char *datname);
/* Functions used in binary upgrade */
extern bool IsOidAcceptable(Oid oid);
extern void MarkOidPreassignedFromBinaryUpgrade(Oid oid);
extern void AtEOXact_DispatchOids(bool isCommit);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册