提交 15250649 编写于 作者: K Karen Huddleston 提交者: Chris Hajas

Added sha256 validator class to gptransfer

Adding sha256 as an additional validation method since md5 is not
supported with FIPS mode enabled. Sha256 in the database requires
pgcrypto so we check if the source and destination system have pgcrypto
installed.
Signed-off-by: NChris Hajas <chajas@pivotal.io>
上级 b16fd779
......@@ -1047,7 +1047,48 @@ class CountTableValidator(TableValidator):
# --------------------------------------------------------------------------
class MD5MergeTableValidator(TableValidator):
class HashMergeTableValidator(TableValidator):
"""
Validator class that defines some methods used by MD5 and SHA-256 validation
"""
def _src_proc(self):
"""
Thread proc that executes the SQL statement on the source side. To
use this method the SQL statement must only return a single row. If
your query requires more complexity you should override this.
"""
try:
self._src_res = execSQL(self._src_conn, self._src_sql)
except:
self._src_failed = True
finally:
cmd = GpCloseNamedPipe(
'close source validation pipe', self._src_pipe, REMOTE, self._src_host)
cmd.run()
def _dest_proc(self):
"""
Thread proc that executes the SQL statement on the destination side. To
use this method the SQL statement must only return a single row. If
your query requires more complexity you should override this.
"""
try:
self._dest_res = execSQL(self._dest_conn, self._dest_sql)
except:
self._dest_failed = True
finally:
cmd = GpCloseNamedPipe(
'close dest validation pipe', self._dest_pipe, REMOTE, self._dest_host)
cmd.run()
# --------------------------------------------------------------------------
class MD5MergeTableValidator(HashMergeTableValidator):
"""
Validation that compares the MD5 hashes of all the rows in a table.
"""
......@@ -1112,38 +1153,101 @@ class MD5MergeTableValidator(TableValidator):
sql % (src_schema, src_table, self._src_pipe),
sql % (dest_schema, dest_table, self._dest_pipe))
def _src_proc(self):
def _compare(self):
"""
Thread proc that executes the SQL statement on the source side. To
use this method the SQL statement must only return a single row. If
your query requires more complexity you should override this.
Diff the output of the hashes
"""
res = False
try:
self._src_res = execSQL(self._src_conn, self._src_sql)
except:
self._src_failed = True
self._pool.join()
self._pool.check_results()
if not self._dest_failed and not self._src_failed:
res = ((self._src_md5_cmd.get_results()).stdout.strip()
== (self._dest_md5_cmd.get_results()).stdout.strip())
finally:
cmd = GpCloseNamedPipe(
'close source validation pipe', self._src_pipe, REMOTE, self._src_host)
cmd.run()
pass
return res
def _dest_proc(self):
@staticmethod
def get_name():
"""
Thread proc that executes the SQL statement on the destination side. To
use this method the SQL statement must only return a single row. If
your query requires more complexity you should override this.
Returns 'md5'
"""
return 'md5'
# --------------------------------------------------------------------------
class SHA256MergeTableValidator(HashMergeTableValidator):
"""
Validation that compares the SHA256 hashes of all the rows in a table.
"""
class SHA256Sum(Command):
"""
Prints sha256sum of file.
"""
try:
self._dest_res = execSQL(self._dest_conn, self._dest_sql)
except:
self._dest_failed = True
finally:
cmd = GpCloseNamedPipe(
'close dest validation pipe', self._dest_pipe, REMOTE, self._dest_host)
cmd.run()
def __init__(self, name, filename, ctxt=LOCAL, remoteHost=None):
"""
name: command name
filename: file to calculate sha256sum of
"""
cmdStr = "sha256sum %s | awk '{print \\$1}'" % filename
Command.__init__(self, name, cmdStr, ctxt, remoteHost)
def __init__(self, work_dir, table_pair, src_conn, dest_conn):
"""
table_pair: table pair to validate
src_conn: Database connection to the source system
dest_conn: Database connection to the destination system
"""
sql = """CREATE OR REPLACE FUNCTION pg_temp.digest(text, text)
RETURNS bytea
AS '$libdir/pgcrypto', 'pg_digest'
LANGUAGE C IMMUTABLE STRICT;
COPY (SELECT pg_temp.digest(textin(record_out(t.*)), 'sha256') hash
FROM %s.%s t ORDER BY hash) TO '%s'"""
src_schema = table_pair.source.schema
src_table = table_pair.source.table
dest_schema = table_pair.dest.schema
dest_table = table_pair.dest.table
self._pool = WorkerPool(2)
self._src_pipe = os.path.join(
work_dir, str(table_pair.source), 'src_sha256_validation')
self._dest_pipe = os.path.join(
work_dir, str(table_pair.source), 'dest_sha256_validation')
self._src_host = DB(src_conn).host
self._dest_host = DB(dest_conn).host
src_pipe_cmd = GpCreateNamedPipe('Create source validation pipe',
self._src_pipe,
REMOTE, self._src_host)
self._pool.addCommand(src_pipe_cmd)
dest_pipe_cmd = GpCreateNamedPipe('Create destination validation pipe',
self._dest_pipe,
REMOTE, self._dest_host)
self._pool.addCommand(dest_pipe_cmd)
self._pool.join()
self._pool.check_results()
self._src_sha256_cmd = SHA256MergeTableValidator.SHA256Sum(
'Source SHA256Sum', self._src_pipe, REMOTE, self._src_host)
self._pool.addCommand(self._src_sha256_cmd)
self._dest_sha256_cmd = SHA256MergeTableValidator.SHA256Sum(
'Dest SHA256Sum', self._dest_pipe, REMOTE, self._dest_host)
self._pool.addCommand(self._dest_sha256_cmd)
TableValidator.__init__(self, work_dir, src_conn, dest_conn,
sql % (src_schema, src_table, self._src_pipe),
sql % (dest_schema, dest_table, self._dest_pipe))
def _compare(self):
"""
......@@ -1154,8 +1258,8 @@ class MD5MergeTableValidator(TableValidator):
self._pool.join()
self._pool.check_results()
if not self._dest_failed and not self._src_failed:
res = ((self._src_md5_cmd.get_results()).stdout.strip()
== (self._dest_md5_cmd.get_results()).stdout.strip())
res = ((self._src_sha256_cmd.get_results()).stdout.strip()
== (self._dest_sha256_cmd.get_results()).stdout.strip())
finally:
pass
return res
......@@ -1163,9 +1267,9 @@ class MD5MergeTableValidator(TableValidator):
@staticmethod
def get_name():
"""
Returns 'md5'
Returns 'sha256'
"""
return 'md5'
return 'sha256'
# --------------------------------------------------------------------------
......@@ -1564,9 +1668,9 @@ class GpTransferCommand(Command):
self._wext_gpfdist_urls = list()
self._ext_gpfdist_urls = list()
self._wext_name = ('w_ext_%s_%s' % (self._table_pair.source.table,
hashlib.md5(str(self._table_pair.source)).hexdigest()))[0:63]
hashlib.sha256(str(self._table_pair.source)).hexdigest()))[0:63]
self._ext_name = ('ext_%s_%s' % (self._table_pair.source.table,
hashlib.md5(str(self._table_pair.source)).hexdigest()))[0:63]
hashlib.sha256(str(self._table_pair.source)).hexdigest()))[0:63]
self._pool = None
if validator:
self._validator_class = validator_factory.get_validator(validator)
......@@ -3148,6 +3252,18 @@ class GpTransfer(object):
logger.warning('Found no tables to exclude from transfer table list')
sys.exit(0)
if self._options.validator == 'sha256':
cmdStr = 'test -e `pg_config --pkglibdir`/pgcrypto.so'
cmd = Command('test pgcrypto on source', cmdStr, LOCAL, self._options.source_host)
cmd.run()
if cmd.get_results().rc != 0:
raise Exception('Source system must have pgcrypto installed when using sha256 validator')
cmd = Command('test pgcrypto on destination', cmdStr, LOCAL, self._options.dest_host)
cmd.run()
if cmd.get_results().rc != 0:
raise Exception('Destination system must have pgcrypto installed when using sha256 validator')
def _final_count_validation(self):
logger.info('Running final table row count validation on destination tables...')
......
......@@ -747,10 +747,13 @@ OPTIONS
Perform data validation on table data. These are the supported types of
validation.
count - Specify this value to compare row counts between source and
count - Specify this value to compare row counts between source and
destination table data.
MD5 - Specify this value to compare MD5 values between source and
md5 - Specify this value to compare MD5 values between source and
destination table data.
sha256 - Specify this value to compare SHA-256 values between source and
destination table data.
If validation for a table fails, gptransfer displays the name of the
......
......@@ -41,6 +41,18 @@ Feature: gptransfer tests
And verify that table "one_row_table" in "gptransfer_testdb4" has "1" rows
And verify that table "wide_rows" in "gptransfer_testdb5" has "10" rows
Scenario: gptransfer full sha256 validator in TEXT format with '\010' delimiter
Given the gptransfer test is initialized
And the user runs "gptransfer --full --delimiter '\010' --format text --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate sha256 --batch-size=10"
Then gptransfer should return a return code of 0
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
And verify that table "t1" in "gptransfer_testdb1" has "200" rows
And verify that table "t2" in "gptransfer_testdb1" has "300" rows
And verify that table "t0" in "gptransfer_testdb3" has "700" rows
And verify that table "empty_table" in "gptransfer_testdb4" has "0" rows
And verify that table "one_row_table" in "gptransfer_testdb4" has "1" rows
And verify that table "wide_rows" in "gptransfer_testdb5" has "10" rows
@T339888
@T339914
Scenario: gptransfer full count validator in TEXT format with '\010' delimiter
......@@ -68,6 +80,18 @@ Feature: gptransfer tests
And verify that table "one_row_table" in "gptransfer_testdb4" has "1" rows
And verify that table "wide_rows" in "gptransfer_testdb5" has "10" rows
Scenario: gptransfer full sha256 validator in CSV format
Given the gptransfer test is initialized
And the user runs "gptransfer --full --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate sha256 --format=csv --batch-size=10"
Then gptransfer should return a return code of 0
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
And verify that table "t1" in "gptransfer_testdb1" has "200" rows
And verify that table "t2" in "gptransfer_testdb1" has "300" rows
And verify that table "t0" in "gptransfer_testdb3" has "700" rows
And verify that table "empty_table" in "gptransfer_testdb4" has "0" rows
And verify that table "one_row_table" in "gptransfer_testdb4" has "1" rows
And verify that table "wide_rows" in "gptransfer_testdb5" has "10" rows
@T439915
Scenario: gptransfer full count validator in CSV format
Given the gptransfer test is initialized
......@@ -230,7 +254,7 @@ Feature: gptransfer tests
@T339842
@T339950
Scenario: gptransfer single database
Scenario: gptransfer single database with md5 validation
Given the gptransfer test is initialized
And the user runs "psql -p $GPTRANSFER_SOURCE_PORT -h $GPTRANSFER_SOURCE_HOST -U $GPTRANSFER_SOURCE_USER -c "CREATE TABLE my_random_dist_table(i int) DISTRIBUTED RANDOMLY;" -d gptransfer_testdb1"
And the user runs "gptransfer -d gptransfer_testdb1 --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate md5 -v --batch-size=10"
......@@ -243,14 +267,14 @@ Feature: gptransfer tests
And the user runs "psql -p $GPTRANSFER_SOURCE_PORT -h $GPTRANSFER_SOURCE_HOST -U $GPTRANSFER_SOURCE_USER -c "DROP TABLE my_random_dist_table;" -d gptransfer_testdb1"
@T339951
Scenario: gptransfer single table
Scenario: gptransfer single table with md5 validation
Given the gptransfer test is initialized
And the user runs "gptransfer -t gptransfer_testdb1.public.t0 --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate md5 --batch-size=10"
Then gptransfer should return a return code of 0
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
@T339952
Scenario: gptransfer input file
Scenario: gptransfer input file with md5 validation
Given the gptransfer test is initialized
And the user runs "gptransfer --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate md5 -f test/behave/mgmt_utils/steps/data/gptransfer_infile --batch-size=1"
Then gptransfer should return a return code of 0
......@@ -258,6 +282,34 @@ Feature: gptransfer tests
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
And verify that table "t0" in "gptransfer_testdb3" has "700" rows
Scenario: gptransfer single database with sha256 validation
Given the gptransfer test is initialized
And the user runs "psql -p $GPTRANSFER_SOURCE_PORT -h $GPTRANSFER_SOURCE_HOST -U $GPTRANSFER_SOURCE_USER -c "CREATE TABLE my_random_dist_table(i int) DISTRIBUTED RANDOMLY;" -d gptransfer_testdb1"
And the user runs "gptransfer -d gptransfer_testdb1 --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate sha256 -v --batch-size=10"
Then gptransfer should return a return code of 0
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
And verify that table "t1" in "gptransfer_testdb1" has "200" rows
And verify that table "t2" in "gptransfer_testdb1" has "300" rows
And the user runs "psql gptransfer_testdb1 -c '\d+ my_random_dist_table'"
Then psql should print "Distributed randomly" to stdout 1 times
And the user runs "psql -p $GPTRANSFER_SOURCE_PORT -h $GPTRANSFER_SOURCE_HOST -U $GPTRANSFER_SOURCE_USER -c "DROP TABLE my_random_dist_table;" -d gptransfer_testdb1"
@T339951
Scenario: gptransfer single table with sha256 validation
Given the gptransfer test is initialized
And the user runs "gptransfer -t gptransfer_testdb1.public.t0 --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate sha256 --batch-size=10"
Then gptransfer should return a return code of 0
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
@T339952
Scenario: gptransfer input file with sha256 validation
Given the gptransfer test is initialized
And the user runs "gptransfer --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --validate sha256 -f test/behave/mgmt_utils/steps/data/gptransfer_infile --batch-size=1"
Then gptransfer should return a return code of 0
And verify that gptransfer is in order of "test/behave/mgmt_utils/steps/data/gptransfer_infile" when partition transfer is "None"
And verify that table "t0" in "gptransfer_testdb1" has "100" rows
And verify that table "t0" in "gptransfer_testdb3" has "700" rows
@T886748
Scenario: gptransfer -F exclude input file
Given the gptransfer test is initialized
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册