提交 53210fb2 编写于 作者: H Heikki Linnakangas 提交者: Daniel Gustafsson

Disable write support for old-format AO table segments.

Segments that are still in an old format are treated as read-only. All new
data go to new segments, in new format. This allows us to eventually get
rid of the old format completely.

This is hypothetical until we have pg_upgrade working for GPDB 4.3 -> 5.0
upgrade, as you can't have old-format tables or segments at all in a
cluster that's initialized with 5.0. Stay tuned for pg_upgrade, but this
is preparatory work for that.
上级 53590b35
......@@ -65,11 +65,8 @@ NewAOCSFileSegInfo(int4 segno, int4 nvp)
seginfo->vpinfo.nEntry = nvp;
seginfo->state = AOSEG_STATE_DEFAULT;
/*
* New segments are created in the latest format. For testing purposes,
* though, you can force a different version, by settting this GUC.
*/
seginfo->formatversion = test_appendonly_version_default;
/* New segments are always created in the latest format */
seginfo->formatversion = AORelationVersion_GetLatest();
return seginfo;
}
......@@ -84,11 +81,8 @@ InsertInitialAOCSFileSegInfo(Relation prel, int4 segno, int4 nvp)
Relation segrel;
int16 formatVersion;
/*
* New segments are created in the latest format. For testing purposes,
* though, you can force a different version, by settting this GUC.
*/
formatVersion = test_appendonly_version_default;
/* New segments are always created in the latest format */
formatVersion = AORelationVersion_GetLatest();
segrel = heap_open(prel->rd_appendonly->segrelid, RowExclusiveLock);
......@@ -650,7 +644,7 @@ ClearAOCSFileSegInfo(Relation prel, int segno, FileSegInfoState newState)
repl[Anum_pg_aocs_varblockcount-1] = true;
/* When the segment is later recreated, it will be in new format */
d[Anum_pg_aocs_formatversion-1] = Int16GetDatum(test_appendonly_version_default);
d[Anum_pg_aocs_formatversion-1] = Int16GetDatum(AORelationVersion_GetLatest());
repl[Anum_pg_aocs_formatversion-1] = true;
/* We do not reset the modcount here */
......
......@@ -70,12 +70,8 @@ NewFileSegInfo(int segno)
fsinfo = (FileSegInfo *) palloc0(sizeof(FileSegInfo));
fsinfo->segno = segno;
fsinfo->state = AOSEG_STATE_DEFAULT;
/*
* New segments are created in the latest format. For testing purposes,
* though, you can force a different version, by settting this GUC.
*/
fsinfo->formatversion = test_appendonly_version_default;
/* New segments are always created in the latest format */
fsinfo->formatversion = AORelationVersion_GetLatest();
return fsinfo;
}
......@@ -100,11 +96,8 @@ InsertInitialSegnoEntry(Relation parentrel, int segno)
Datum *values;
int16 formatVersion;
/*
* New segments are created in the latest format. For testing purposes,
* though, you can force a different version, by settting this GUC.
*/
formatVersion = test_appendonly_version_default;
/* New segments are always created in the latest format */
formatVersion = AORelationVersion_GetLatest();
InsertFastSequenceEntry(parentrel->rd_appendonly->segrelid,
(int64)segno,
......@@ -601,7 +594,7 @@ ClearFileSegInfo(Relation parentrel,
new_record_repl[Anum_pg_aoseg_eofuncompressed - 1] = true;
/* When the segment is later recreated, it will be in new format */
new_record[Anum_pg_aoseg_formatversion - 1] = Int16GetDatum(test_appendonly_version_default);
new_record[Anum_pg_aoseg_formatversion - 1] = Int16GetDatum(AORelationVersion_GetLatest());
new_record_repl[Anum_pg_aoseg_formatversion - 1] = true;
/* We do not reset the modcount here */
......
......@@ -973,6 +973,73 @@ AppendOnlyExecutorReadBlock_ResetCounts(
executorReadBlock->totalRowsScannned = 0;
}
/*
* Given a tuple in 'formatversion', convert it to a format that is
* understood by the rest of the system.
*/
static MemTuple
upgrade_tuple(MemTuple mtup, MemTupleBinding *pbind, int formatversion, bool *shouldFree)
{
TupleDesc tupdesc = pbind->tupdesc;
const int natts = tupdesc->natts;
MemTuple newtuple;
static Datum *values = NULL;
static bool *isnull = NULL;
static int nallocated = 0;
bool convert_alignment = false;
/*
* MPP-7372: If the AO table was created before the fix for this issue, it may
* contain tuples with misaligned bindings. Here we check if the stored memtuple
* is problematic and then create a clone of the tuple with properly aligned
* bindings to be used by the executor.
*/
if (formatversion < AORelationVersion_Aligned64bit &&
memtuple_has_misaligned_attribute(mtup, pbind))
convert_alignment = true;
if (!convert_alignment)
{
/* No conversion required. Return the original tuple unmodified. */
*shouldFree = false;
return mtup;
}
/* Conversion is needed. */
/* enlarge the arrays if needed */
if (natts > nallocated)
{
if (values)
pfree(values);
if (isnull)
pfree(values);
values = (Datum *) MemoryContextAlloc(TopMemoryContext, natts * sizeof(Datum));
isnull = (bool *) MemoryContextAlloc(TopMemoryContext, natts * sizeof(bool));
nallocated = natts;
}
if (convert_alignment)
{
/* get attribute values form mis-aligned tuple */
memtuple_deform_misaligned(mtup, pbind, values, isnull);
/* Form a new, properly-aligned, tuple */
newtuple = memtuple_form_to(pbind, values, isnull, NULL, NULL, true);
}
else
{
/*
* make a modifiable copy
*/
newtuple = memtuple_copy_to(mtup, pbind, NULL, NULL);
}
*shouldFree = true;
return newtuple;
}
static bool
AppendOnlyExecutorReadBlock_ProcessTuple(
AppendOnlyExecutorReadBlock *executorReadBlock,
......@@ -985,36 +1052,22 @@ AppendOnlyExecutorReadBlock_ProcessTuple(
{
bool valid = true; // Assume for HeapKeyTestUsingSlot define.
AOTupleId *aoTupleId = (AOTupleId*)&executorReadBlock->cdb_fake_ctid;
int formatVersion = executorReadBlock->storageRead->formatVersion;
AORelationVersion_CheckValid(formatVersion);
AOTupleIdInit_Init(aoTupleId);
AOTupleIdInit_segmentFileNum(aoTupleId, executorReadBlock->segmentFileNum);
AOTupleIdInit_rowNum(aoTupleId, rowNum);
if(slot)
if (slot)
{
/*
* MPP-7372: If the AO table was created before the fix for this issue, it may
* contain tuples with misaligned bindings. Here we check if the stored memtuple
* is problematic and then create a clone of the tuple with properly aligned
* bindings to be used by the executor.
*/
if (!IsAOBlockAndMemtupleAlignmentFixed(executorReadBlock->storageRead->formatVersion) &&
memtuple_has_misaligned_attribute(tuple, slot->tts_mt_bind))
{
/*
* Create a properly aligned clone of the memtuple.
* We p'alloc memory for the clone, so the slot is
* responsible for releasing the allocated memory.
*/
tuple = memtuple_aligned_clone(tuple, slot->tts_mt_bind, true /* upgrade */);
Assert(tuple);
ExecStoreMinimalTuple(tuple, slot, true /* shouldFree */);
}
else
{
ExecStoreMinimalTuple(tuple, slot, false);
}
bool shouldFree = false;
/* If the tuple is not in the latest format, convert it */
if (formatVersion < AORelationVersion_GetLatest())
tuple = upgrade_tuple(tuple, slot->tts_mt_bind, formatVersion, &shouldFree);
ExecStoreMinimalTuple(tuple, slot, shouldFree);
slot_set_ctid(slot, &(executorReadBlock->cdb_fake_ctid));
}
......@@ -2874,25 +2927,6 @@ appendonly_insert_init(Relation rel, int segno, bool update_mode)
else
tup = instup;
/*
* MPP-7372: If the AO table was created before the fix for this issue, it may contain
* tuples with misaligned bindings. Here we check if the memtuple to be stored is
* problematic and then create a clone of the tuple with the old (misaligned) bindings
* to preserve consistency.
*/
if (!IsAOBlockAndMemtupleAlignmentFixed(aoInsertDesc->storageWrite.formatVersion) &&
memtuple_has_misaligned_attribute(tup, aoInsertDesc->mt_bind))
{
/* Create a clone of the memtuple using misaligned bindings. */
MemTuple tuple = memtuple_aligned_clone(tup, aoInsertDesc->mt_bind, false /* downgrade */);
Assert(tuple);
if(tup != instup)
{
pfree(tup);
}
tup = tuple;
}
/*
* get space to insert our next item (tuple)
*/
......
......@@ -259,26 +259,30 @@ AORelCreateHashEntry(Oid relid)
aoHashEntry->relsegfiles[i].total_tupcount = 0;
aoHashEntry->relsegfiles[i].tupsadded = 0;
aoHashEntry->relsegfiles[i].aborted = false;
aoHashEntry->relsegfiles[i].formatversion = AORelationVersion_GetLatest();
}
/*
* update the tupcount of each 'segment' file in the append
* update the tupcount and formatVersion of each 'segment' file in the append
* only hash according to the information in the pg_aoseg table.
*/
for (i = 0 ; i < total_segfiles; i++)
{
int segno;
int64 total_tupcount;
int16 formatversion;
if (allfsinfo)
{
segno = allfsinfo[i]->segno;
total_tupcount = allfsinfo[i]->total_tupcount;
formatversion = allfsinfo[i]->formatversion;
}
else
{
Assert(aocsallfsinfo);
segno = aocsallfsinfo[i]->segno;
total_tupcount = aocsallfsinfo[i]->total_tupcount;
formatversion = aocsallfsinfo[i]->formatversion;
}
if (awaiting_drop[segno])
......@@ -289,6 +293,7 @@ AORelCreateHashEntry(Oid relid)
aoHashEntry->relsegfiles[segno].state = AWAITING_DROP_READY;
}
aoHashEntry->relsegfiles[segno].total_tupcount = total_tupcount;
aoHashEntry->relsegfiles[segno].formatversion = formatversion;
}
/* record the fact that another hash entry is now taken */
......@@ -1002,6 +1007,7 @@ SetSegnoForCompactionInsert(Relation rel,
if (segfilestat->total_tupcount < min_tupcount &&
segfilestat->state == AVAILABLE &&
segfilestat->formatversion == AORelationVersion_GetLatest() &&
!usedByConcurrentTransaction(segfilestat, i) &&
!in_compaction_list)
{
......@@ -1142,6 +1148,7 @@ SetSegnoForWrite(Relation rel, int existingsegno)
if(!segfilestat->isfull)
{
if (segfilestat->state == AVAILABLE &&
segfilestat->formatversion == AORelationVersion_GetLatest() &&
!segno_chosen &&
!usedByConcurrentTransaction(segfilestat, i))
{
......
......@@ -443,8 +443,7 @@ static uint32 compute_memtuple_size_using_bind(
int nullbit_extra,
uint32 *nullsaves,
MemTupleBindingCols *colbind,
TupleDesc tupdesc,
bool use_null_saves_aligned)
TupleDesc tupdesc)
{
uint32 data_length = colbind->var_start;
int i;
......@@ -466,14 +465,7 @@ static uint32 compute_memtuple_size_using_bind(
Assert(bind->len_aligned >= 0);
Assert(bind->len_aligned >= bind->len);
if (use_null_saves_aligned)
{
len = bind->len_aligned;
}
else
{
len = bind->len;
}
len = bind->len_aligned;
*nullsaves += len;
data_length -= len;
......@@ -510,15 +502,15 @@ static uint32 compute_memtuple_size_using_bind(
/* Compute the memtuple size.
* nullsave is an output param
*/
uint32 compute_memtuple_size(MemTupleBinding *pbind, Datum *values, bool *isnull, bool hasnull, uint32 *nullsaves, bool use_null_saves_aligned)
uint32 compute_memtuple_size(MemTupleBinding *pbind, Datum *values, bool *isnull, bool hasnull, uint32 *nullsaves)
{
uint32 ret_len = 0;
ret_len = compute_memtuple_size_using_bind(values, isnull, hasnull, pbind->null_bitmap_extra_size, nullsaves, &pbind->bind, pbind->tupdesc, use_null_saves_aligned);
ret_len = compute_memtuple_size_using_bind(values, isnull, hasnull, pbind->null_bitmap_extra_size, nullsaves, &pbind->bind, pbind->tupdesc);
if(ret_len <= MEMTUPLE_LEN_FITSHORT)
return ret_len;
ret_len = compute_memtuple_size_using_bind(values, isnull, hasnull, pbind->null_bitmap_extra_size, nullsaves, &pbind->large_bind, pbind->tupdesc, use_null_saves_aligned);
ret_len = compute_memtuple_size_using_bind(values, isnull, hasnull, pbind->null_bitmap_extra_size, nullsaves, &pbind->large_bind, pbind->tupdesc);
Assert(ret_len > MEMTUPLE_LEN_FITSHORT);
return ret_len;
......@@ -557,15 +549,13 @@ static inline int memtuple_get_nullp_len(MemTuple mtup __attribute__((unused)),
/* form a memtuple from values and isnull, to a prespecified buffer */
static
MemTuple memtuple_form_to_align(
MemTuple memtuple_form_to(
MemTupleBinding *pbind,
Datum *values,
bool *isnull,
MemTuple mtup,
uint32 *destlen,
bool inline_toast,
bool use_null_saves_aligned)
bool inline_toast)
{
bool hasnull = false;
bool hasext = false;
......@@ -641,7 +631,7 @@ MemTuple memtuple_form_to_align(
}
/* compute needed length */
len = compute_memtuple_size(pbind, values, isnull, hasnull, &null_save_len, use_null_saves_aligned);
len = compute_memtuple_size(pbind, values, isnull, hasnull, &null_save_len);
colbind = (len <= MEMTUPLE_LEN_FITSHORT) ? &pbind->bind : &pbind->large_bind;
if(!destlen)
......@@ -745,15 +735,7 @@ MemTuple memtuple_form_to_align(
Assert(bind->offset != 0);
short *null_saves = NULL;
if (use_null_saves_aligned)
{
null_saves = colbind->null_saves_aligned;
}
else
{
null_saves = colbind->null_saves;
}
short *null_saves = colbind->null_saves_aligned;
Assert(null_saves);
/* Not null */
......@@ -881,18 +863,6 @@ MemTuple memtuple_form_to_align(
return mtup;
}
/* form a memtuple from values and isnull, to a prespecified buffer */
MemTuple memtuple_form_to(
MemTupleBinding *pbind,
Datum *values,
bool *isnull,
MemTuple mtup,
uint32 *destlen,
bool inline_toast)
{
return memtuple_form_to_align(pbind, values, isnull, mtup, destlen, inline_toast, true /* aligned */);
}
bool memtuple_attisnull(MemTuple mtup, MemTupleBinding *pbind, int attnum)
{
MemTupleBindingCols *colbind = memtuple_get_islarge(mtup, pbind) ? &pbind->large_bind : &pbind->bind;
......@@ -994,6 +964,22 @@ void memtuple_deform(MemTuple mtup, MemTupleBinding *pbind, Datum *datum, bool *
memtuple_get_values(mtup, pbind, datum, isnull, true /* aligned */);
}
/*
* Deform a memtuple with old binding alignment.
*
* We assume that the 'mtup' was created using null_saves, where the
* binding length is not aligned to the following binding's alignment. In
* this case, we create an "upgraded" clone using null_saves_aligned, which
* uses properly aligned binding length.
*/
void
memtuple_deform_misaligned(MemTuple mtup, MemTupleBinding *pbind,
Datum *datum, bool *isnull)
{
memtuple_get_values(mtup, pbind, datum, isnull, false /* aligned */);
}
/*
* Get the Oid assigned to this tuple (when WITH OIDS is used).
*
......@@ -1095,52 +1081,3 @@ bool memtuple_has_misaligned_attribute(MemTuple mtup, MemTupleBinding *pbind)
return false;
}
/*
* Create a clone of a memtuple with complementary binding alignment.
*
* If use_null_saves_aligned is true, we assume that the memtuple was
* created using null_saves, where the binding length is not aligned to the
* following binding's alignment. In this case, we create an "upgraded" clone
* using null_saves_aligned, which uses properly aligned binding length. The
* opposite happens when use_null_saves_aligned is false, i.e. we create a
* "downgraded" clone using the possibly misaligned bindings.
*/
MemTuple memtuple_aligned_clone(MemTuple mtup, MemTupleBinding *pbind, bool use_null_saves_aligned)
{
Assert(memtuple_has_misaligned_attribute(mtup, pbind));
MemTuple newtuple = NULL;
const int attr_count = pbind->tupdesc->natts;
const bool use_dynamic_alloc = (attr_count > MAX_ATTR_COUNT_STATIC_ALLOC);
Datum values_static_alloc[MAX_ATTR_COUNT_STATIC_ALLOC];
bool is_null_static_alloc[MAX_ATTR_COUNT_STATIC_ALLOC];
Datum *values = values_static_alloc;
bool *isnull = is_null_static_alloc;
if (use_dynamic_alloc)
{
values = (Datum *) palloc(attr_count * sizeof(Datum));
isnull = (bool *) palloc(attr_count * sizeof(bool));
}
Assert(values);
Assert(isnull);
/* get attribute values using complementary alignment */
memtuple_get_values(mtup, pbind, values, isnull, !use_null_saves_aligned);
/* create the new memtuple using target alignment */
newtuple = memtuple_form_to_align(pbind, values, isnull, NULL, NULL, false, use_null_saves_aligned);
if (use_dynamic_alloc)
{
pfree(values);
pfree(isnull);
}
return newtuple;
}
......@@ -541,7 +541,7 @@ static int compute_dest_tuplen(TupleDesc tupdesc, MemTupleBinding *pbind, bool h
if(pbind)
{
uint32 nullsave_dummy;
return (int) compute_memtuple_size(pbind, d, isnull, hasnull, &nullsave_dummy, true /* aligned */);
return (int) compute_memtuple_size(pbind, d, isnull, hasnull, &nullsave_dummy);
}
return heap_compute_data_size(tupdesc, d, isnull);
......
......@@ -347,9 +347,6 @@ char *gp_connectemc_mode;
EmcConnectModeType_t gp_emcconnect_transport;
#endif
/* The following GUC holds the default version for append-only tables */
int test_appendonly_version_default = AORelationVersion_GetLatest();
static char *gp_log_gang_str;
static char *gp_log_fts_str;
static char *gp_log_interconnect_str;
......@@ -3822,16 +3819,6 @@ struct config_int ConfigureNamesInt_gp[] =
2048, 0, INT_MAX, NULL, NULL
},
{
{"test_appendonly_version_default", PGC_USERSET, APPENDONLY_TABLES,
gettext_noop("Align append-only blocks to 64 bits."),
NULL,
GUC_GPDB_ADDOPT | GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL
},
&test_appendonly_version_default,
AORelationVersion_GetLatest(), 0, INT_MAX, NULL, NULL
},
{
{"gp_external_max_segs", PGC_USERSET, EXTERNAL_TABLES,
gettext_noop("Maximum number of segments that connect to a single gpfdist URL."),
......
......@@ -160,6 +160,8 @@ typedef struct AOSegfileStatus
AOSegfileState state;
int16 formatversion;
/* if true - never insert into this segno anymore */
bool isfull;
......
......@@ -171,11 +171,12 @@ extern MemTupleBinding* create_memtuple_binding(TupleDesc tupdesc);
extern Datum memtuple_getattr(MemTuple mtup, MemTupleBinding *pbind, int attnum, bool *isnull);
extern bool memtuple_attisnull(MemTuple mtup, MemTupleBinding *pbind, int attnum);
extern uint32 compute_memtuple_size(MemTupleBinding *pbind, Datum *values, bool *isnull, bool hasnull, uint32 *nullsaves, bool use_null_saves_aligned);
extern uint32 compute_memtuple_size(MemTupleBinding *pbind, Datum *values, bool *isnull, bool hasnull, uint32 *nullsaves);
extern MemTuple memtuple_copy_to(MemTuple mtup, MemTupleBinding *pbind, MemTuple dest, uint32 *destlen);
extern MemTuple memtuple_form_to(MemTupleBinding *pbind, Datum *values, bool *isnull, MemTuple dest, uint32 *destlen, bool inline_toast);
extern void memtuple_deform(MemTuple mtup, MemTupleBinding *pbind, Datum *datum, bool *isnull);
extern void memtuple_deform_misaligned(MemTuple mtup, MemTupleBinding *pbind, Datum *datum, bool *isnull);
extern Oid MemTupleGetOid(MemTuple mtup, MemTupleBinding *pbind);
extern void MemTupleSetOid(MemTuple mtup, MemTupleBinding *pbind, Oid oid);
......
......@@ -134,6 +134,4 @@ static inline void AORelationVersion_CheckValid(int version)
(version > AORelationVersion_Original) \
)
extern int test_appendonly_version_default;
#endif /* PG_APPENDONLY_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册