提交 930b2c38 编写于 作者: J Jimmy Yih

ADD COLUMN on an AOCO table should check and drop aocssegs in await drop

When doing an ADD COLUMN operation on an AOCO table, the vpinfo byte
array stored in the aocsseg auxiliary table for each column is updated
to add an extra vpinfo index to match the relnatts value in
pg_class. However, entries in the aocsseg auxiliary table with state 2
(AOSEG_STATE_AWAITING_DROP) will not have their vpinfo byte array
updated. Subsequent calls to getAOCSVPEntry function will then fail
due to the vpinfo size does not match the relnatts value in pg_class
(e.g. persistent table rebuild). Since we have the AccessExclusiveLock
on the table during ADD COLUMN, we might as well schedule the drop
before the vpinfos are updated to prevent any inconsistency issues.
上级 05f55e86
......@@ -210,7 +210,7 @@ GetAOCSFileSegInfo(Relation prel,
struct varlena *v = (struct varlena *) DatumGetPointer(d[Anum_pg_aocs_vpinfo - 1]);
struct varlena *dv = pg_detoast_datum(v);
Assert(VARSIZE(dv) == aocs_vpinfo_size(nvp));
Assert(VARSIZE(dv) <= aocs_vpinfo_size(nvp));
memcpy(&seginfo->vpinfo, dv, aocs_vpinfo_size(nvp));
if(dv!=v)
pfree(dv);
......
......@@ -15,6 +15,7 @@
*/
#include "postgres.h"
#include "access/aocs_compaction.h"
#include "access/appendonlywriter.h"
#include "access/bitmap.h"
#include "access/genam.h"
......@@ -4561,25 +4562,44 @@ ATAocsWriteNewColumns(
/*
* Choose the column that has the smallest segfile size so as to
* minimize disk I/O in subsequent varblock header scan. natts
* includes only existing columns and not the ones being added.
* minimize disk I/O in subsequent varblock header scan. The natts arg
* includes only existing columns and not the ones being added. Once
* we find a segfile with nonzero tuplecount and find the column with
* the smallest eof to return, we continue the loop but skip over all
* segfiles except for those in AOSEG_STATE_AWAITING_DROP state which
* we need to append to our drop list.
*/
static int
column_to_scan(AOCSFileSegInfo **segInfos, int nseg, int natts)
column_to_scan(AOCSFileSegInfo **segInfos, int nseg, int natts, Relation aocsrel)
{
int scancol = -1;
int segi;
int i;
AOCSVPInfoEntry *vpe;
int64 min_eof = 0x7fffffffffffffff; /* largest value for int64 */
for (segi = 0; segi < nseg && scancol == -1; ++segi)
List *drop_segno_list = NIL;
for (segi = 0; segi < nseg; ++segi)
{
/*
* Skip over appendonly segments with no tuples (caused by
* VACUUM) or those left over by compaction process.
* Append to drop_segno_list and skip if state is in
* AOSEG_STATE_AWAITING_DROP. At the end of the loop, we will
* try to drop the segfiles since we currently have the
* AccessExclusiveLock. If we don't do this, aocssegfiles in
* this state will have vpinfo size containing info for less
* number of columns compared to the relation's relnatts in
* its pg_class entry (e.g. in calls to getAOCSVPEntry).
*/
if (segInfos[segi]->total_tupcount > 0 &&
(segInfos[segi]->state != AOSEG_STATE_AWAITING_DROP))
if (segInfos[segi]->state == AOSEG_STATE_AWAITING_DROP)
{
drop_segno_list = lappend_int(drop_segno_list, segInfos[segi]->segno);
continue;
}
/*
* Skip over appendonly segments with no tuples (caused by VACUUM)
*/
if (segInfos[segi]->total_tupcount > 0 && scancol == -1)
{
for (i = 0; i < natts; ++i)
{
......@@ -4593,6 +4613,10 @@ column_to_scan(AOCSFileSegInfo **segInfos, int nseg, int natts)
}
}
}
if (list_length(drop_segno_list) > 0 && Gp_role != GP_ROLE_DISPATCH)
AOCSDrop(aocsrel, drop_segno_list);
return scancol;
}
......@@ -4660,7 +4684,7 @@ ATAocsNoRewrite(AlteredTableInfo *tab)
list_length(tab->newvals));
}
scancol = column_to_scan(segInfos, nseg, tab->oldDesc->natts);
scancol = column_to_scan(segInfos, nseg, tab->oldDesc->natts, rel);
elogif(Debug_appendonly_print_storage_headers, LOG,
"using column %d of relation %s for alter table scan",
scancol, RelationGetRelationName(rel));
......@@ -4734,6 +4758,25 @@ ATAocsNoRewrite(AlteredTableInfo *tab)
aocs_addcol_finish(idesc);
ExecDropSingleTupleTableSlot(slot);
}
if (Gp_role == GP_ROLE_DISPATCH)
{
/*
* We remove the hash entry for this relation even though
* there is no rewrite because we may have dropped some
* segfiles that were in AOSEG_STATE_AWAITING_DROP state in
* column_to_scan(). The cost of recreating the entry later on
* is cheap so this should be fine. If we don't remove the
* hash entry and we had done any segfile drops, master will
* continue to see those segfiles as unavailable for use.
*
* Note that ALTER already took an exclusive lock on the
* relation so we are guaranteed to not drop the hash
* entry from under any concurrent operation.
*/
AORelRemoveHashEntry(RelationGetRelid(rel));
}
FreeExecutorState(estate);
heap_close(rel, NoLock);
return true;
......
......@@ -5,3 +5,6 @@ include $(top_builddir)/src/Makefile.global
TARGETS=tablecmds
include $(top_builddir)/src/backend/mock.mk
tablecmds.t: \
$(MOCK_DIR)/backend/access/aocs/aocs_compaction_mock.o
......@@ -5,6 +5,20 @@
#include "../tablecmds.c"
/*
* Check that two segno lists have the same values.
*/
static int check_segno_list(const List* value, const List* check_value)
{
List *compare;
if (list_length(value) != list_length(check_value))
return false;
compare = list_union_int(value, check_value);
return list_length(compare) == list_length(value);
}
/*
* Ensure that the column having the smallest on-disk segfile is
* chosen for headerscan during ALTER TABLE ADD COLUMN operation.
......@@ -58,6 +72,14 @@ test__column_to_scan(void **state)
segInfos[2]->vpinfo.entry[2].eof = 20;
segInfos[2]->vpinfo.entry[2].eof_uncompressed = 80;
List *drop_segno_list = NIL;
drop_segno_list = lappend_int(drop_segno_list, 3);
Gp_role = GP_ROLE_EXECUTE;
RelationData reldata;
expect_value(AOCSDrop, aorel, &reldata);
expect_check(AOCSDrop, compaction_segno, check_segno_list, drop_segno_list);
will_be_called(AOCSDrop);
/* Empty segment, should be skipped over */
segInfos[3] = (AOCSFileSegInfo *)
malloc(sizeof(AOCSFileSegInfo) + sizeof(AOCSVPInfoEntry)*numcols);
......@@ -73,7 +95,7 @@ test__column_to_scan(void **state)
segInfos[3]->vpinfo.entry[2].eof_uncompressed = 85;
/* Column 0 (vpe index 0) is the smallest (total eof = 120 + 200) */
col = column_to_scan(segInfos, 4, numcols);
col = column_to_scan(segInfos, 4, numcols, &reldata);
assert_int_equal(col, 0);
}
......
-- @Description Ensures that an ALTER TABLE ADD COLUMN will drop segfiles in
-- AOSEG_STATE_AWAITING_DROP state left over by a previous vacuum
--
CREATE TABLE aoco_add_column_after_vacuum_skip_drop (a INT, b INT) WITH (appendonly=true, orientation=column);
CREATE
INSERT INTO aoco_add_column_after_vacuum_skip_drop SELECT i as a, i as b FROM generate_series(1, 10) AS i;
INSERT 10
DELETE FROM aoco_add_column_after_vacuum_skip_drop;
DELETE 10
-- We should see all aocssegs in state 1
2U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
segno|column_num|state
-----+----------+-----
1 |0 |1
1 |1 |1
(2 rows)
-- VACUUM while another session holds lock
1: BEGIN;
BEGIN
1: SELECT COUNT(*) FROM aoco_add_column_after_vacuum_skip_drop;
count
-----
0
(1 row)
2: VACUUM aoco_add_column_after_vacuum_skip_drop;
VACUUM
1: END;
END
-- We should see an aocsseg in state 2 (AOSEG_STATE_AWAITING_DROP)
2U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
segno|column_num|state
-----+----------+-----
1 |0 |2
1 |1 |2
2 |0 |1
2 |1 |1
(4 rows)
-- The ADD COLUMN should clean up aocssegs in state 2 (AOSEG_STATE_AWAITING_DROP)
1: ALTER TABLE aoco_add_column_after_vacuum_skip_drop ADD COLUMN c INT DEFAULT 0;
ALTER
2U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
segno|column_num|state
-----+----------+-----
1 |0 |1
1 |1 |1
1 |2 |1
2 |0 |1
2 |1 |1
2 |2 |1
(6 rows)
-- Check if insert goes into segno 1 instead of segno 2
1: INSERT INTO aoco_add_column_after_vacuum_skip_drop SELECT i as a, i as b, i as c FROM generate_series(1, 100) AS i;
INSERT 100
2U: SELECT segno, tupcount > 0, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
segno|?column?|state
-----+--------+-----
1 |t |1
1 |t |1
1 |t |1
2 |f |1
2 |f |1
2 |f |1
(6 rows)
......@@ -95,3 +95,4 @@ test: uao/vacuum_self_serializable2_column
test: uao/vacuum_self_serializable3_column
test: uao/vacuum_while_insert_column
test: uao/vacuum_while_vacuum_column
test: add_column_after_vacuum_skip_drop_column
-- @Description Ensures that an ALTER TABLE ADD COLUMN will drop segfiles in
-- AOSEG_STATE_AWAITING_DROP state left over by a previous vacuum
--
CREATE TABLE aoco_add_column_after_vacuum_skip_drop (a INT, b INT) WITH (appendonly=true, orientation=column);
INSERT INTO aoco_add_column_after_vacuum_skip_drop SELECT i as a, i as b FROM generate_series(1, 10) AS i;
DELETE FROM aoco_add_column_after_vacuum_skip_drop;
-- We should see all aocssegs in state 1
2U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
-- VACUUM while another session holds lock
1: BEGIN;
1: SELECT COUNT(*) FROM aoco_add_column_after_vacuum_skip_drop;
2: VACUUM aoco_add_column_after_vacuum_skip_drop;
1: END;
-- We should see an aocsseg in state 2 (AOSEG_STATE_AWAITING_DROP)
2U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
-- The ADD COLUMN should clean up aocssegs in state 2 (AOSEG_STATE_AWAITING_DROP)
1: ALTER TABLE aoco_add_column_after_vacuum_skip_drop ADD COLUMN c INT DEFAULT 0;
2U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
-- Check if insert goes into segno 1 instead of segno 2
1: INSERT INTO aoco_add_column_after_vacuum_skip_drop SELECT i as a, i as b, i as c FROM generate_series(1, 100) AS i;
2U: SELECT segno, tupcount > 0, state FROM gp_toolkit.__gp_aocsseg_name('aoco_add_column_after_vacuum_skip_drop');
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册