提交 47687ca0 编写于 作者: N Ning Yu

Check for data integration on expand_after_icw pipeline.

上级 d824012e
......@@ -78,21 +78,49 @@ EOF
echo "$pgoptions"
# detect for partial tables from all the non-template databases,
# exit code is 0 if no partial table is found, or 1 otherwise
function list_partial_tables() {
local pgoptions="$(get_pgoptions)"
su gpadmin -c bash <<EOF
. /usr/local/greenplum-db-devel/greenplum_path.sh
export PGOPTIONS='$pgoptions'
python $CWDIR/scan_partial_table.py
# usage: sort_dump < input_file > output_file
# filter and sort the 'INSERT INTO' lines of "pg_dumpall --inserts" output.
# will also append the database name to end of each line as comment.
function sort_dump() {
sed -nrf "$CWDIR/filter_dump.sed" | sort
# usage: expand_cluster <old_size> <new_size>
function expand_cluster() {
local old="$1"
local new="$2"
local inputfile="/tmp/inputfile.${old}-${new}"
local pidfile="/tmp/postmaster.pid.${old}-${new}"
local dump_before="/tmp/dump.${old}-${new}.before.sql"
local dump_after="/tmp/dump.${old}-${new}.after.sql"
local sorted_dump_before="/tmp/sorted-dump.${old}-${new}.before.sql"
local sorted_dump_after="/tmp/sorted-dump.${old}-${new}.after.sql"
local sorted_dump_diff="/tmp/sorted-dump.${old}-${new}.diff"
local dbname="postgres"
local pgoptions="$(get_pgoptions)"
local retval=0
local uncompleted
local partial
pushd gpdb_src/gpAux/gpdemo
gen_gpexpand_input "$old" "$new"
# dump before expansion
su gpadmin -c "pg_dumpall --inserts -Oxaf '$dump_before'"
# Backup master pid, by checking it later we can know whether the cluster is
# restarted during the tests.
su gpadmin -c "head -n 1 $MASTER_DATA_DIRECTORY/postmaster.pid >$pidfile"
......@@ -104,23 +132,40 @@ function expand_cluster() {
uncompleted=$(su gpadmin -c "psql -Aqtd $dbname -c \"select count(*) from gpexpand.status_detail where status <> 'COMPLETED'\"")
# cleanup
su gpadmin -c "yes | PGOPTIONS='$pgoptions' gpexpand -s -c"
su gpadmin -c "dropdb $dbname" 2>/dev/null || : # ignore failure
# dump after expansion
su gpadmin -c "pg_dumpall --inserts -Oxaf '$dump_after'"
if [ "$uncompleted" -ne 0 ]; then
echo "error: some tables are not successfully expanded"
return 1
echo "error: fail to expand some tables"
# double check gp_distribution_policy.numsegments in every database
if ! list_partial_tables; then
echo "error: some tables are not expanded"
echo "checking for data integration after expansion..."
sort_dump < "$dump_before" > "$sorted_dump_before"
sort_dump < "$dump_after" > "$sorted_dump_after"
if diff -u0 "$sorted_dump_before" "$sorted_dump_after" >"$sorted_dump_diff"; then
echo "before and after dumps have no difference"
echo "error: before and after dumps differ, here are part of the sorted diff:"
head -n50 "$sorted_dump_diff"
# double check gp_distribution_policy.numsegments
partial=$(su gpadmin -c "psql -Aqtd $dbname -c \"select count(*) from gp_distribution_policy where numsegments <> $new\"")
if [ "$partial" -ne 0 ]; then
echo "error: not all the tables are expanded by gpexpand"
return 1
if [ "$retval" -eq 0 ]; then
echo "all the tables are successfully expanded"
echo "all the tables are successfully expanded"
return 0
return $retval
# usage: make_cluster [<demo_cluster_options>]
# foreach database name
\@^\\connect (.*)$@{
# adjust its format
s@@ /* DATABASE: \1 */@;
# copy it to hold space
# foreach insert command
# append the database name from hold space
# join the two lines
# output it
#!/usr/bin/env python
import sys
from gppylib.db import dbconn
list_dbs_sql = '''
select datname from pg_database
where datallowconn and not datistemplate
get_cluster_size_sql = '''
select numsegments from gp_toolkit.__gp_number_of_segments
scan_sql = '''
select n.nspname, c.relname
from gp_distribution_policy d
join pg_class c on c.oid = d.localoid
join pg_namespace n on n.oid = c.relnamespace
where d.numsegments <> {cluster_size:d}
and c.relstorage <> 'x'
dburl = dbconn.DbURL()
conn = dbconn.connect(dburl)
cursor = dbconn.execSQL(conn, list_dbs_sql)
dbnames = [row[0] for row in cursor]
cluster_size = int(dbconn.execSQLForSingleton(conn, get_cluster_size_sql))
print('scanning for partial tables...')
retval = 0
for dbname in dbnames:
dburl = dbconn.DbURL(dbname=dbname)
conn = dbconn.connect(dburl)
cursor = dbconn.execSQL(conn, scan_sql.format(cluster_size=cluster_size))
if cursor.rowcount > 0:
retval = 1
for row in cursor:
print('- "{dbname}"."{namespace}"."{relname}"'.format(
dbname=dbname.replace('"', '""'),
namespace=row[0].replace('"', '""'),
relname=row[1].replace('"', '""')))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册