未验证 提交 b76db711 编写于 作者: X xiong-gang 提交者: GitHub

gpcheckcat: add the check of vpinfo consistency

column 'vpinfo' in pg_aoseg.pg_aocsseg_xxx record the 'eof' of each attribute
in the AOCS table. Add a new check 'aoseg_table' in gpcheckcat, it checks the
number of attributes in 'vpinfo' is the same as the number of attributes in
'pg_attribute'. This check is performed in parallel and independently on each
segment, and it checks aoseg table and pg_attribute in different transaction,
so it should be run 'offline' to avoid false alarm.
上级 0799b771
......@@ -2424,6 +2424,89 @@ def checkTableMissingEntry(cat):
myprint(' Execution error: ' + str(e))
myprint(qry)
class checkAOSegVpinfoThread(execThread):
def __init__(self, cfg, db):
execThread.__init__(self, cfg, db, None)
def run(self):
aoseg_query = """
SELECT a.relname, a.relid, a.segrelid, cl.relname
FROM (SELECT p.relid, p.segrelid, c.relname FROM pg_appendonly p LEFT JOIN pg_class c ON p.relid = c.oid WHERE p.columnstore = true) a
LEFT JOIN pg_class cl ON a.segrelid = cl.oid;
"""
try:
# Read the list of aoseg tables from the database
curs = self.db.query(aoseg_query)
for relname, relid, segrelid, segrelname in curs.getresult():
qry = "SELECT count(*) FROM pg_attribute WHERE attrelid=%d AND attnum > 0;" % (relid)
attr_count = self.db.query(qry).getresult()[0][0]
qry = "SELECT distinct(length(vpinfo)) FROM pg_aoseg.%s WHERE xmax = 0;" % (segrelname)
vpinfo_curs = self.db.query(qry)
nrows = vpinfo_curs.ntuples()
if nrows == 0:
continue
elif nrows > 1:
GV.checkStatus = False
setError(ERROR_NOREPAIR)
logger.info('[FAIL] inconsistent vpinfo')
logger.error("found {nrows} vpinfo(s) with different length in 'pg_aoseg.{segrelname}' of table '{relname}' on segment {content}"
.format(nrows = nrows,
segrelname = segrelname,
relname = relname,
content = self.cfg['content']))
logger.error(qry)
continue
vpinfo_length = vpinfo_curs.getresult()[0][0]
# vpinfo is bytea type, the length of the first 3 fields is 12 bytes, and the size of AOCSVPInfoEntry is 16
# typedef struct AOCSVPInfo
# {
# int32 _len;
# int32 version;
# int32 nEntry;
#
# AOCSVPInfoEntry entry[1];
# } AOCSVPInfo;
vpinfo_attr_count = (vpinfo_length - 12) / 16
if vpinfo_attr_count != attr_count:
GV.checkStatus = False
setError(ERROR_NOREPAIR)
logger.info('[FAIL] inconsistent vpinfo')
logger.error("vpinfo in 'pg_aoseg.{segrelname}' of table '{relname}' contains {vpinfo_attr_count} attributes, while pg_attribute has {attr_count} attributes on segment {content}"
.format(segrelname = segrelname,
relname = relname,
vpinfo_attr_count = vpinfo_attr_count,
attr_count = attr_count,
content = self.cfg['content']))
logger.error(qry)
except Exception, e:
GV.checkStatus = False
self.error = e
def checkAOSegVpinfo():
threads = []
i = 1
# parallelise check
for dbid in GV.cfg:
cfg = GV.cfg[dbid]
db_connection = connect2(cfg)
thread = checkAOSegVpinfoThread(cfg, db_connection)
thread.start()
logger.debug('launching check thread %s for dbid %i' %
(thread.getName(), dbid))
threads.append(thread)
if (i % GV.opt['-B']) == 0:
processThread(threads)
threads = []
i += 1
processThread(threads)
# -------------------------------------------------------------------------------
......@@ -2969,6 +3052,14 @@ all_checks = {
"order": 14,
"online": True
},
"aoseg_table":
{
"description": "Check that vpinfo in aoseg table is consistent with pg_attribute",
"fn": lambda: checkAOSegVpinfo(),
"version": 'main',
"order": 15,
"online": False
}
}
......
......@@ -498,6 +498,16 @@ Feature: gpcheckcat tests
And the user runs "dropdb gpcheckcat_orphans"
And the path "repair_dir" is removed from current working directory
Scenario: gpcheckcat should report vpinfo inconsistent error
Given database "vpinfo_inconsistent_db" is dropped and recreated
And there is a "co" table "public.co_vpinfo" in "vpinfo_inconsistent_db" with data
When the user runs "gpcheckcat vpinfo_inconsistent_db"
Then gpcheckcat should return a return code of 0
When an attribute of table "co_vpinfo" in database "vpinfo_inconsistent_db" is deleted on segment with content id "0"
Then psql should return a return code of 0
When the user runs "gpcheckcat -R aoseg_table vpinfo_inconsistent_db"
Then gpcheckcat should print "Failed test\(s\) that are not reported here: aoseg_table" to stdout
########################### @concourse_cluster tests ###########################
# The @concourse_cluster tag denotes the scenario that requires a remote cluster
......@@ -538,4 +548,3 @@ Feature: gpcheckcat tests
Examples:
| attrname | tablename |
| conrelid | pg_constraint |
......@@ -1607,6 +1607,7 @@ def impl(context, filename):
@then('an attribute of table "{table}" in database "{dbname}" is deleted on segment with content id "{segid}"')
@when('an attribute of table "{table}" in database "{dbname}" is deleted on segment with content id "{segid}"')
def impl(context, table, dbname, segid):
local_cmd = 'psql %s -t -c "SELECT port,hostname FROM gp_segment_configuration WHERE content=%s and role=\'p\';"' % (
dbname, segid)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册