提交 793d14c3 编写于 作者: A Ashwin Agrawal

Always use SnapshotNow for AO insert

SnapshotNow MUST be used during insert flow to fetch the latest EOF value from
aoseg table. Earlier usage of ActiveSnapshot in AO insert flow caused data
corruption as potentially could read incorrect or stale EOF value from
aoseg. The specific scenario this happens when entry from AppendOnlyHash table
gets evicted out. It invalidates (zeros out) the latestWriteXid and hence
check for usedByConcurrentTransaction cannot be performed. So, any transaction
later inserting data to same AO table, if it has aquired the snapshot and has
latestWriteXid listed in its in-progress distributed transaction list, gets
the same segfile to write. Based on DTM visibility rules using ActiveSnapshot
current transaction will not read the EOF value from previous inserted
transaction as its listed in its in-progress list and hence will overwrite the
data but SnapshotNow will see the latest and make sure to append.
上级 73dc0ecb
......@@ -666,8 +666,7 @@ AppendOnlyCompact(Relation aorel,
/* Get information about all the file segments we need to scan */
segfile_array = GetAllFileSegInfo(aorel, SnapshotNow, &total_segfiles);
insertDesc = appendonly_insert_init(aorel, SnapshotNow,
insert_segno, false);
insertDesc = appendonly_insert_init(aorel, insert_segno, false);
for(i = 0 ; i < total_segfiles ; i++)
{
......
......@@ -2540,7 +2540,7 @@ appendonly_update_init(Relation rel, Snapshot appendOnlyMetaDataSnapshot, int se
*/
AppendOnlyUpdateDesc aoUpdateDesc = (AppendOnlyUpdateDesc) palloc0(sizeof(AppendOnlyUpdateDescData));
aoUpdateDesc->aoInsertDesc = appendonly_insert_init(rel, appendOnlyMetaDataSnapshot, segno, true);
aoUpdateDesc->aoInsertDesc = appendonly_insert_init(rel, segno, true);
AppendOnlyVisimap_Init(&aoUpdateDesc->visibilityMap,
aoUpdateDesc->aoInsertDesc->aoi_rel->rd_appendonly->visimaprelid,
......@@ -2617,7 +2617,7 @@ HTSU_Result appendonly_update(AppendOnlyUpdateDesc aoUpdateDesc,
* append only tables.
*/
AppendOnlyInsertDesc
appendonly_insert_init(Relation rel, Snapshot appendOnlyMetaDataSnapshot, int segno, bool update_mode)
appendonly_insert_init(Relation rel, int segno, bool update_mode)
{
AppendOnlyInsertDesc aoInsertDesc;
int maxtupsize;
......@@ -2634,7 +2634,6 @@ appendonly_insert_init(Relation rel, Snapshot appendOnlyMetaDataSnapshot, int se
* Get the pg_appendonly information for this table
*/
Assert(rel->rd_appendonly->majorversion == 1 && rel->rd_appendonly->minorversion == 1);
/*
* allocate and initialize the insert descriptor
*/
......@@ -2645,7 +2644,7 @@ appendonly_insert_init(Relation rel, Snapshot appendOnlyMetaDataSnapshot, int se
* Writers uses this since they have exclusive access to the lock acquired with
* LockRelationAppendOnlySegmentFile for the segment-file.
*/
aoInsertDesc->appendOnlyMetaDataSnapshot = appendOnlyMetaDataSnapshot;
aoInsertDesc->appendOnlyMetaDataSnapshot = SnapshotNow;
aoInsertDesc->mt_bind = create_memtuple_binding(RelationGetDescr(rel));
......
......@@ -4081,7 +4081,6 @@ CopyFrom(CopyState cstate)
ResultRelInfoSetSegno(resultRelInfo, cstate->ao_segnos);
resultRelInfo->ri_aoInsertDesc =
appendonly_insert_init(resultRelInfo->ri_RelationDesc,
SnapshotNow,
resultRelInfo->ri_aosegno, false);
}
else if (relstorage == RELSTORAGE_AOCOLS &&
......
......@@ -5384,7 +5384,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap)
MemTupleBinding* mt_bind;
if(newrel)
aoInsertDesc = appendonly_insert_init(newrel, SnapshotNow, segno, false);
aoInsertDesc = appendonly_insert_init(newrel, segno, false);
mt_bind = (newrel ? aoInsertDesc->mt_bind : create_memtuple_binding(newTupDesc));
......@@ -14801,7 +14801,7 @@ split_rows(Relation intoa, Relation intob, Relation temprel)
if (!(*targetAODescPtr))
{
MemoryContextSwitchTo(oldCxt);
*targetAODescPtr = appendonly_insert_init(targetRelation, SnapshotNow,
*targetAODescPtr = appendonly_insert_init(targetRelation,
RESERVED_SEGNO, false);
MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
}
......
......@@ -3240,7 +3240,6 @@ ExecInsert(TupleTableSlot *slot,
resultRelInfo->ri_aoInsertDesc =
appendonly_insert_init(resultRelationDesc,
ActiveSnapshot,
resultRelInfo->ri_aosegno,
false);
......@@ -5244,8 +5243,7 @@ intorel_receive(TupleTableSlot *slot, DestReceiver *self)
AOTupleId aoTupleId;
if (myState->ao_insertDesc == NULL)
myState->ao_insertDesc = appendonly_insert_init(into_rel, SnapshotNow,
RESERVED_SEGNO, false);
myState->ao_insertDesc = appendonly_insert_init(into_rel, RESERVED_SEGNO, false);
appendonly_insert(myState->ao_insertDesc, tuple, &tupleOid, &aoTupleId);
pfree(tuple);
......
......@@ -343,7 +343,7 @@ extern void appendonly_fetch_detail(
AppendOnlyFetchDesc aoFetchDesc,
AppendOnlyFetchDetail *aoFetchDetail);
extern void appendonly_fetch_finish(AppendOnlyFetchDesc aoFetchDesc);
extern AppendOnlyInsertDesc appendonly_insert_init(Relation rel, Snapshot appendOnlyMetaDataSnapshot, int segno, bool update_mode);
extern AppendOnlyInsertDesc appendonly_insert_init(Relation rel, int segno, bool update_mode);
extern void appendonly_insert(
AppendOnlyInsertDesc aoInsertDesc,
MemTuple instup,
......
Parsed test spec with 2 sessions
starting permutation: s1begin s1setguc s1insert s2begin s2select s1commit s2insert s2commit s2select
step s1begin: BEGIN;
step s1setguc: SET test_AppendOnlyHash_eviction_vs_just_marking_not_inuse=1;
step s1insert: INSERT INTO appendonly_eof SELECT * FROM generate_series(1, 1000);
step s2begin: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s2select: SELECT count(*) from appendonly_eof;
count
100
step s1commit: COMMIT;
step s2insert: INSERT INTO appendonly_eof SELECT * FROM generate_series(1, 10);
step s2commit: COMMIT;
step s2select: SELECT count(*) from appendonly_eof;
count
1110
......@@ -3,3 +3,4 @@
test: ao-serializable-read
test: ao-serializable-vacuum
test: ao-insert-eof
# Test validating concurrent insert to AO with appendonlywriter hashtable entry eviction.
#
# Scenario test validates is concurrent insert to AO
# - after it has acquired the snapshot and
# - hashtable entry recording concurrent insert transactions id in
# latestWriteXid got evicted out
# - and then it performs the segment-file selection for inserting data
# Using serializable transaction here just for easy/comfort to grab snapshot
# before AO segment file selection logic kicks-in. Same test can be easily
# written without serializable by using fault-injector, by suspending insert
# statement after acquiring snapshot but before reaching segment file selection
# logic for AO.
# Without the fix for using SnapshotNow to read aoseg during inserts, it used to
# overwrite and corrupt the data in some cases or error out inserts hiting the
# sanity checks.
setup
{
CREATE TABLE appendonly_eof (a int) WITH (appendonly=true);
INSERT INTO appendonly_eof SELECT * FROM generate_series(1, 100);
}
session "s1"
step "s1begin" { BEGIN; }
step "s1setguc" { SET test_AppendOnlyHash_eviction_vs_just_marking_not_inuse=1; }
step "s1insert" { INSERT INTO appendonly_eof SELECT * FROM generate_series(1, 1000); }
step "s1commit" { COMMIT; }
session "s2"
step "s2begin" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
step "s2select" { SELECT count(*) from appendonly_eof; }
step "s2insert" { INSERT INTO appendonly_eof SELECT * FROM generate_series(1, 10); }
step "s2commit" { COMMIT; }
permutation "s1begin" "s1setguc" "s1insert" "s2begin" "s2select" "s1commit" "s2insert" "s2commit" "s2select"
1: BEGIN;
BEGIN
1: SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
SET
1: SELECT COUNT(*) FROM test_ao_mpp19912;
count
-----
1000
(1 row)
2: BEGIN;
BEGIN
2: INSERT INTO test_ao_mpp19912 SELECT i, i from generate_series(1, 1000) i;
INSERT 1000
2: COMMIT;
COMMIT
3: BEGIN;
BEGIN
3: INSERT INTO test_ao_mpp19912_supp1 SELECT i, i FROM generate_series(1, 1000) i;
INSERT 1000
4: BEGIN;
BEGIN
4: INSERT INTO test_ao_mpp19912_supp2 SELECT i, i FROM generate_series(1, 1000) i;
INSERT 1000
5: BEGIN;
BEGIN
5: INSERT INTO test_ao_mpp19912_supp3 SELECT i, i FROM generate_series(1, 1000) i;
INSERT 1000
6: INSERT INTO test_ao_mpp19912_supp4 SELECT i, i FROM generate_series(1, 1000) i;
ERROR: can't have more than 3 different append-only tables open for writing data at the same time. if tables are heavily partitioned or if your workload requires, increase the value of max_appendonly_tables and retry (appendonlywriter.c:432)
3: COMMIT;
COMMIT
4: COMMIT;
COMMIT
5: COMMIT;
COMMIT
1: INSERT INTO test_ao_mpp19912 SELECT i, i FROM generate_series(1, 200) i;
ERROR: Unexpected EOF for relation name test_ao_mpp19912, relfilenode 25150, segment file 1. EOF from gp_persistent_relation_node 36048 greater than current EOF 18024 (appendonlyam.c:591) (seg0 agrawa2-mac.local:4100 pid=14684) (cdbdisp.c:1526)
1: COMMIT;
COMMIT
DROP TABLE IF EXISTS test_ao_mpp19912;
DROP
CREATE TABLE test_ao_mpp19912 ( i int, j int) WITH (appendonly=true);
CREATE
INSERT INTO test_ao_mpp19912 SELECT i, i FROM generate_series(1, 1000) i;
INSERT 1000
DROP TABLE IF EXISTS test_ao_mpp19912_supp1;
DROP
DROP TABLE IF EXISTS test_ao_mpp19912_supp2;
DROP
DROP TABLE IF EXISTS test_ao_mpp19912_supp3;
DROP
DROP TABLE IF EXISTS test_ao_mpp19912_supp4;
DROP
CREATE TABLE test_ao_mpp19912_supp1 ( i int, j int) WITH (appendonly=true);
CREATE
CREATE TABLE test_ao_mpp19912_supp2 ( i int, j int) WITH (appendonly=true);
CREATE
CREATE TABLE test_ao_mpp19912_supp3 ( i int, j int) WITH (appendonly=true);
CREATE
CREATE TABLE test_ao_mpp19912_supp4 ( i int, j int) WITH (appendonly=true);
CREATE
1: BEGIN;
1: SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
1: SELECT COUNT(*) FROM test_ao_mpp19912;
2: BEGIN;
2: INSERT INTO test_ao_mpp19912 SELECT i, i from generate_series(1, 1000) i;
2: COMMIT;
3: BEGIN;
3: INSERT INTO test_ao_mpp19912_supp1 SELECT i, i FROM generate_series(1, 1000) i;
4: BEGIN;
4: INSERT INTO test_ao_mpp19912_supp2 SELECT i, i FROM generate_series(1, 1000) i;
5: BEGIN;
5: INSERT INTO test_ao_mpp19912_supp3 SELECT i, i FROM generate_series(1, 1000) i;
6: INSERT INTO test_ao_mpp19912_supp4 SELECT i, i FROM generate_series(1, 1000) i;
3: COMMIT;
4: COMMIT;
5: COMMIT;
1: INSERT INTO test_ao_mpp19912 SELECT i, i FROM generate_series(1, 200) i;
1: COMMIT;
-- start_ignore
SET gp_create_table_random_default_distribution=off;
-- end_ignore
DROP TABLE IF EXISTS test_ao_mpp19912;
CREATE TABLE test_ao_mpp19912 ( i int, j int) WITH (appendonly=true);
INSERT INTO test_ao_mpp19912 SELECT i, i FROM generate_series(1, 1000) i;
DROP TABLE IF EXISTS test_ao_mpp19912_supp1;
DROP TABLE IF EXISTS test_ao_mpp19912_supp2;
DROP TABLE IF EXISTS test_ao_mpp19912_supp3;
DROP TABLE IF EXISTS test_ao_mpp19912_supp4;
CREATE TABLE test_ao_mpp19912_supp1 ( i int, j int) WITH (appendonly=true);
CREATE TABLE test_ao_mpp19912_supp2 ( i int, j int) WITH (appendonly=true);
CREATE TABLE test_ao_mpp19912_supp3 ( i int, j int) WITH (appendonly=true);
CREATE TABLE test_ao_mpp19912_supp4 ( i int, j int) WITH (appendonly=true);
-- start_matchsubs
# Change things like "psql: /Users/mglkey/cdbfast/main/foo/bar.sql:123:" to "PATH"
m/.*ERROR:.*Unexpected EOF for relation name test_ao_mpp19912.*/
s/.*ERROR:.*Unexpected EOF for relation name test_ao_mpp19912.*/ERROR: Unexpected EOF for relation name test_ao_mpp19912/
m/.*ERROR:.*Unexpected EOF for relation name test_aoco_mpp19912.*/
s/.*ERROR:.*Unexpected EOF for relation name test_aoco_mpp19912.*/ERROR: Unexpected EOF for relation name test_aoco_mpp19912/
-- end_matchsubs
......@@ -16,64 +16,11 @@ limitations under the License.
"""
import tinctest
from mpp.gpdb.tests.storage.lib.sql_isolation_testcase import SQLIsolationTestCase
from mpp.models import SQLConcurrencyTestCase, GpfdistSQLTestCase
from mpp.lib.gpConfig import GpConfig
from mpp.lib.gpstop import GpStop
class AppendOnlyEOFTests(SQLIsolationTestCase):
"""
@product_version gpdb: [4.3.3.0-]
@gucs gp_create_table_random_default_distribution=off
"""
sql_dir = 'sql/'
ans_dir = 'expected/'
data_dir = 'data/'
out_dir = 'output/'
master_value = None
segment_value = None
@classmethod
def setUpClass(cls):
super(AppendOnlyEOFTests, cls).setUpClass()
gpconfig = GpConfig()
(cls.master_value, cls.segment_value) = gpconfig.getParameter('max_appendonly_tables')
tinctest.logger.debug("Original max_appendonly_tables values - Master Value: %s Segment Value: %s" %(cls.master_value, cls.segment_value))
gpconfig.setParameter('max_appendonly_tables', '3', '3')
GpStop().run_gpstop_cmd(restart=True)
(master_value, segment_value) = gpconfig.getParameter('max_appendonly_tables')
tinctest.logger.debug("Set max_appendonly_tables to Master Value: %s Segment Value: %s " %(master_value, segment_value))
if master_value != '3' or segment_value != '3':
raise Exception("Failed to set max_appendonly_tables to the required values")
@classmethod
def tearDownClass(cls):
gpconfig = GpConfig()
if (not cls.master_value) or (not cls.segment_value):
raise Exception("Original max_appendonly_tables value is None")
gpconfig.setParameter('max_appendonly_tables', cls.master_value, cls.segment_value)
GpStop().run_gpstop_cmd(restart=True)
# Make sure the values are reset properly
(master_value, segment_value) = gpconfig.getParameter('max_appendonly_tables')
try:
if master_value != cls.master_value or segment_value != cls.segment_value:
raise Exception("Failed to reset max_appendonly_tables to the required values")
finally:
super(AppendOnlyEOFTests, cls).tearDownClass()
class AOCOEOFConcurrencyTests(GpfdistSQLTestCase, SQLConcurrencyTestCase):
"""
@gpdiff True
@product_version gpdb: [4.3.3.0-]
@gucs gp_create_table_random_default_distribution=off
"""
sql_dir = 'sql_concurrency/'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册