提交 effae659 编写于 作者: N Ning Yu

Transfer meta-only index files instead of empty ones

An empty b-tree index file is not empty, it contains only the meta page.
By transfer meta-only index files to the new segments, they can be
launched directly without the "ignore_system_indexes" setting, and we do
not need an extra relaunch of the new segments.

We use base/13199/5112 as the template of meta-only index files, it is
pg_partition_oid_index of template0.
上级 b0b0b958
......@@ -53,6 +53,11 @@ SEGMENT_CONFIGURATION_BACKUP_FILE = "gpexpand.gp_segment_configuration"
DBNAME = 'postgres'
# an empty b-tree index file is considered invalid, it needs to contain a meta
# page, so we use the pg_partition_oid_index of template0 as a template of
# empty index file.
EMPTY_INDEX = 'base/13199/5112'
#global var
_gp_expand = None
......@@ -627,9 +632,9 @@ class SegmentTemplate:
"""Builds segment template tar file"""
self.statusLogger.set_status('BUILD_SEGMENT_TEMPLATE_STARTED', self.tempDir)
# build segment template should consider tablespace files
excludes = self._list_master_only_files()
self._create_template(newTableSpaceInfo, excludes=excludes)
self._fixup_template(excludes=excludes)
self._list_master_only_files()
self._create_template(newTableSpaceInfo)
self._fixup_template()
self._tar_template()
self.statusLogger.set_status('BUILD_SEGMENT_TEMPLATE_DONE')
......@@ -662,37 +667,45 @@ class SegmentTemplate:
regclasses = "(" + ", ".join(regclasses) + ")"
self.logger.debug("master only tables: %s" % regclasses)
global_sql = """
/* relation files */
global_relation_sql = """
SELECT pg_catalog.pg_relation_filepath(c.oid)
FROM pg_catalog.pg_class c
WHERE c.oid IN %s
AND c.relfilenode = 0
""" % (regclasses)
UNION ALL
/* index files */
global_index_sql = """
SELECT pg_catalog.pg_relation_filepath(i.indexrelid)
FROM pg_catalog.pg_index i
JOIN pg_catalog.pg_class c
ON i.indexrelid = c.oid
WHERE i.indrelid IN %s
AND c.relfilenode = 0
""" % (regclasses, regclasses)
""" % (regclasses)
per_db_sql = """
/* relation files */
per_db_relation_sql = """
SELECT pg_catalog.pg_relation_filepath(c.oid)
FROM pg_catalog.pg_class c
WHERE c.oid IN %s
AND c.relfilenode <> 0
""" % (regclasses)
result = []
per_db_index_sql = """
SELECT pg_catalog.pg_relation_filepath(i.indexrelid)
FROM pg_catalog.pg_index i
JOIN pg_catalog.pg_class c
ON i.indexrelid = c.oid
WHERE i.indrelid IN %s
AND c.relfilenode <> 0
""" % (regclasses)
self.master_only_relation_paths = []
self.master_only_index_paths = []
def add_paths(paths):
def scan_paths(paths):
"""we need to include not only the relation/index files,
but also visibility, free space and seg files"""
result = []
for path in paths:
# ${filenode}: the relation file itself
result.append(os.path.join('.', path))
......@@ -701,6 +714,7 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
result.extend(glob.glob('./%s_*' % path))
# ${filenode}.[1-9][0-9]*: the seg files
result.extend(glob.glob('./%s.*' % path))
return result
# the file list will be passed to pg_basebackup as the exclude list,
# it expects the paths are like "./global/1234", so we must chdir to
......@@ -710,12 +724,19 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
# first list the global master-only tables
with dbconn.connect(self.dburl, encoding='UTF8') as conn:
# the relation paths
paths = [str(row[0])
for row in dbconn.execSQL(conn, global_sql).fetchall()]
self.logger.debug("raw files of global master-only tables: %s" % paths)
add_paths(paths)
for row in dbconn.execSQL(conn, global_relation_sql).fetchall()]
self.logger.debug("raw relation files of global master-only tables: %s" % paths)
self.master_only_relation_paths.extend(scan_paths(paths))
# also get a list of all the databases
# the index paths
paths = [str(row[0])
for row in dbconn.execSQL(conn, global_index_sql).fetchall()]
self.logger.debug("raw index files of global master-only tables: %s" % paths)
self.master_only_index_paths.extend(scan_paths(paths))
# also get a list of the databases
databases = [str(row[0])
for row in catalog.getDatabaseList(conn)]
self.logger.debug("list of databases: %s" % databases)
......@@ -732,18 +753,24 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
, port=self.dburl.pgport
, dbname=database)
with dbconn.connect(dburl, encoding='UTF8') as conn:
# the relation paths
paths = [str(row[0])
for row in dbconn.execSQL(conn, per_db_sql).fetchall()]
self.logger.debug("raw files of per-database master-only tables: %s" % paths)
add_paths(paths)
for row in dbconn.execSQL(conn, per_db_relation_sql).fetchall()]
self.logger.debug("raw relation files of per-database master-only tables: %s" % paths)
self.master_only_relation_paths.extend(scan_paths(paths))
self.logger.debug("files of master only tables: %s" % result)
# the index paths
paths = [str(row[0])
for row in dbconn.execSQL(conn, per_db_index_sql).fetchall()]
self.logger.debug("raw index files of per-database master-only tables: %s" % paths)
self.master_only_relation_paths.extend(scan_paths(paths))
os.chdir(oldcwd)
return result
self.logger.debug("relation files of master only tables: %s" % self.master_only_relation_paths)
self.logger.debug("index files of master only tables: %s" % self.master_only_index_paths)
def _create_template(self, newTableSpaceInfo=None, excludes=[]):
def _create_template(self, newTableSpaceInfo=None):
"""Creates the schema template that is used by new segments"""
self.logger.info('Creating segment template')
......@@ -762,6 +789,7 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
try:
masterSeg = self.gparray.master
excludes = self.master_only_relation_paths + self.master_only_index_paths
cmd = PgBaseBackup(pgdata=self.tempDir,
host=masterSeg.getSegmentHostName(),
port=str(masterSeg.getSegmentPort()),
......@@ -777,11 +805,9 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
# then there are no user-created tablespaces in the system,
# no need to consider tablespace problems in gpexpand.
if newTableSpaceInfo:
self._handle_tablespace_template(dummyDBID, newTableSpaceInfo,
excludes)
self._handle_tablespace_template(dummyDBID, newTableSpaceInfo)
def _handle_tablespace_template(self, dummyDBID, newTableSpaceInfo,
excludes=[]):
def _handle_tablespace_template(self, dummyDBID, newTableSpaceInfo):
"""
If there are user-created tablespaces in GreenplumDB cluster, we
have to pack them into the template. The logic here contains two
......@@ -822,7 +848,9 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
# build a list of master-only files that are under tablespaces, these
# files will be ignored during the dumping.
full_excludes = []
excludes = self.master_only_relation_paths + self.master_only_index_paths
full_exclude_relations = []
full_exclude_indices = []
for pathname in excludes:
if not pathname.startswith('./pg_tblspc/'):
# only tablespaces are handled here
......@@ -831,9 +859,14 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
# we need to change it to './16385/<dummyDBID>/GPDB_7_301911081/16386/6052_vm'
seps = pathname.split(os.sep)
pathname = os.sep.join([seps[2], str(dummyDBID)] + seps[3:])
full_excludes.append(os.path.join(tablespace_template_dir,
pathname))
if pathname in self.master_only_relation_paths:
full_exclude_relations.append(os.path.join(tablespace_template_dir,
pathname))
else:
full_exclude_indices.append(os.path.join(tablespace_template_dir,
pathname))
full_excludes = full_exclude_relations + full_exclude_indices
for tbcspc_oid in tbcspc_oids:
symlink_path = os.path.join(master_tblspc_dir, tbcspc_oid)
target_path = os.readlink(symlink_path)
......@@ -846,26 +879,34 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
shutil.rmtree(os.path.join(os.path.dirname(target_path),
str(dummyDBID)))
with open(os.path.join(self.tempDir,
"pg_tblspc",
"newTableSpaceInfo.json"), "w") as f:
json.dump(newTableSpaceInfo, f)
empty_index = os.path.join(tablespace_template_dir, EMPTY_INDEX)
# the files of the master only tables are already deleted, now add an
# empty copy of them, so they are seen as empty on the new segments,
# and we do not need to delete them via sql separately.
for fullname in full_excludes:
# the file should not already exist, however in case it does, which
# indicates an error in the excluding logic, raise an error.
if os.path.exists(fullname):
self.logger.error("Master-only catalog file '%s' is not correctly skipped" % fullname)
raise Exception('Invalid exclude list')
dirname = os.path.dirname(fullname)
# the template dir is not expected to be updated concurrently,
# so it is safe to use a check-and-create style to create dirs.
if not os.path.isdir(dirname):
os.makedirs(dirname)
# the file should not already exist, however in case it does, which
# indicates an error in the excluding logic, raise an error.
if os.path.exists(fullname):
self.logger.error("Could not exclude file '%s' from the template: file exists" % fullname)
raise Exception('Invalid exclude list')
open(fullname, 'ab')
with open(os.path.join(self.tempDir,
"pg_tblspc",
"newTableSpaceInfo.json"), "w") as f:
json.dump(newTableSpaceInfo, f)
if fullname in full_exclude_relations:
# an empty relation file is valid, so touch it directly
open(fullname, 'ab')
else:
# an empty index file is invalid, in needs to contain a b-tree
# meta page, so we copy such a file from an existing one
shutil.copyfile(empty_index, fullname)
def _gen_DummyDBID(self):
"""gen a random int that surely beyond the possible dbid range"""
......@@ -985,7 +1026,7 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
self._start_new_primary_segments()
self._stop_new_primary_segments()
def _fixup_template(self, excludes=[]):
def _fixup_template(self):
"""Copies postgresql.conf and pg_hba.conf files from a valid segment on the system.
Then modifies the template copy of pg_hba.conf"""
......@@ -1007,6 +1048,9 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
remoteHost=self.srcSegHostname)
cpCmd.run(validateAfter=True)
excludes = self.master_only_relation_paths + self.master_only_index_paths
empty_index = os.path.join(self.tempDir, EMPTY_INDEX)
# the files of the master only tables are already deleted, now add an
# empty copy of them, so they are seen as empty on the new segments,
# and we do not need to delete them via sql separately.
......@@ -1022,17 +1066,24 @@ SELECT pg_catalog.pg_relation_filepath(c.oid)
if '.' in filename:
# ignore seg files
continue
# the template dir is not expected to be updated concurrently,
# so it is safe to use a check-and-create style to create dirs.
if not os.path.isdir(dirname):
os.makedirs(dirname)
# the file should not already exist, however in case it does,
# which indicates an error in the excluding logic, raise an
# error.
if os.path.exists(fullname):
self.logger.error("Could not exclude file '%s' from the template: file exists" % fullname)
self.logger.error("Master-only catalog file '%s' is not correctly skipped" % fullname)
raise Exception('Invalid exclude list')
open(fullname, 'ab')
# the template dir is not expected to be updated concurrently,
# so it is safe to use a check-and-create style to create dirs.
if not os.path.isdir(dirname):
os.makedirs(dirname)
if pathname in self.master_only_relation_paths:
# an empty relation file is valid, so touch it directly
open(fullname, 'ab')
else:
# an empty index file is invalid, in needs to contain a
# b-tree meta page, so we copy such a file from an existing
# one
shutil.copyfile(empty_index, fullname)
def _tar_template(self):
"""Tars up the template files"""
......@@ -1534,15 +1585,8 @@ class gpexpand:
conn.close()
"""
Connect to each database in each segment and do some cleanup of tables
that have stuff in them as a result of copying the segment from the
master. Note, this functionality used to be in segcopy and was
therefore done just once to the original copy of the master.
Need to start the new segments with the system indexes disabled, this
is necessary because the master-only catalog tables are copied as empty
files, as well as their index files, the indexes are invalid and should
not be used until being reindexed.
Connect to each database in each segment and do some cleanup of tables that have stuff in them as a result of copying the segment from the master.
Note, this functionality used to be in segcopy and was therefore done just once to the original copy of the master.
"""
for seg in newSegments:
if seg.isSegmentMirror() == True:
......@@ -1558,8 +1602,7 @@ class gpexpand:
, ctxt=REMOTE
, remoteHost=seg.getSegmentHostName()
, pg_ctl_wait=True
, timeout=SEGMENT_TIMEOUT_DEFAULT
, disableSystemIndexes=True)
, timeout=SEGMENT_TIMEOUT_DEFAULT)
self.pool.addCommand(segStartCmd)
self.pool.join()
self.pool.check_results()
......@@ -1597,43 +1640,6 @@ class gpexpand:
self.pool.check_results()
self.pool.getCompletedItems()
# stop the segments
for seg in newSegments:
if seg.isSegmentMirror():
continue
""" Stop all the new segments. """
segStartCmd = SegmentStop(
name="Stopping new segment dbid %s on host %s." % (str(seg.getSegmentDbId()),
seg.getSegmentHostName())
, dataDir=seg.getSegmentDataDirectory()
, ctxt=REMOTE
, remoteHost=seg.getSegmentHostName()
, timeout=SEGMENT_TIMEOUT_DEFAULT)
self.pool.addCommand(segStartCmd)
self.pool.join()
self.pool.check_results()
# restart the segments with index enabled
for seg in newSegments:
if seg.isSegmentMirror() == True:
continue
""" Start all the new segments in utilty mode. """
segStartCmd = SegmentStart(
name="Starting new segment dbid %s on host %s." % (str(seg.getSegmentDbId()),
seg.getSegmentHostName())
, gpdb=seg
, numContentsInCluster=self.newPrimaryCount # Starting seg on it's own.
, era=None
, mirrormode=MIRROR_MODE_MIRRORLESS
, utilityMode=True
, ctxt=REMOTE
, remoteHost=seg.getSegmentHostName()
, pg_ctl_wait=True
, timeout=SEGMENT_TIMEOUT_DEFAULT)
self.pool.addCommand(segStartCmd)
self.pool.join()
self.pool.check_results()
# --------------------------------------------------------------------------
def restore_master(self):
"""Restores the gp_segment_configuration catalog table for rollback"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册