diff --git a/gpAux/Makefile b/gpAux/Makefile index 5e256d7033eca8556b05df292e7546b3b445d4cf..3dc1326ffb9e1a461e1d061a81f90e445d98e200 100644 --- a/gpAux/Makefile +++ b/gpAux/Makefile @@ -847,7 +847,6 @@ SET_VERSION_SCRIPTS = \ sbin/gpupgrademirror.py \ lib/python/gppylib/programs/clsAddMirrors.py \ lib/python/gppylib/programs/clsHostCacheLookup.py \ - lib/python/gppylib/programs/clsInjectFault.py \ lib/python/gppylib/programs/clsRecoverSegment.py \ lib/python/gppylib/programs/clsSystemState.py \ lib/python/gppylib/programs/gppkg.py \ diff --git a/gpAux/releng/NON_PRODUCTION_FILES.txt b/gpAux/releng/NON_PRODUCTION_FILES.txt index c1d52ab454d79e3049648d10dbbf97a4c4b3e655..11e57b758ddac0ea691e00bc27591c33cec09414 100644 --- a/gpAux/releng/NON_PRODUCTION_FILES.txt +++ b/gpAux/releng/NON_PRODUCTION_FILES.txt @@ -1,10 +1,8 @@ bin/explain.pl bin/explain.pm bin/gpaddmirrors -bin/gpfaultinjector bin/gptorment.pl bin/lib/gptest.py bin/pgbench bin/postgresql_conf_gp_additions bin/test_fsync -lib/python/gppylib/programs/clsInjectFault.py diff --git a/gpAux/releng/QAUTILS_FILES.txt b/gpAux/releng/QAUTILS_FILES.txt index 13bd67a770dc02aeb655ad9e093fbe11b9333f23..18d612a5e17a79385933d88c711518f1119ebf15 100644 --- a/gpAux/releng/QAUTILS_FILES.txt +++ b/gpAux/releng/QAUTILS_FILES.txt @@ -1,6 +1,4 @@ bin/explain.pl bin/explain.pm -bin/gpfaultinjector bin/gptorment.pl bin/pgbench -lib/python/gppylib/programs/clsInjectFault.py diff --git a/gpMgmt/Makefile b/gpMgmt/Makefile index 765166bf8c643a757c23897dfb230c2a8bea2477..de940fd06bdefe38353c5c1429528c375b242941 100644 --- a/gpMgmt/Makefile +++ b/gpMgmt/Makefile @@ -37,7 +37,6 @@ SET_VERSION_SCRIPTS = \ sbin/gpupgrademirror.py \ lib/python/gppylib/programs/clsAddMirrors.py \ lib/python/gppylib/programs/clsHostCacheLookup.py \ - lib/python/gppylib/programs/clsInjectFault.py \ lib/python/gppylib/programs/clsRecoverSegment.py \ lib/python/gppylib/programs/clsSystemState.py \ lib/python/gppylib/programs/gppkg.py \ diff --git a/gpMgmt/bin/.gitignore b/gpMgmt/bin/.gitignore index 224e9e800a65d1815e13b31f17d8fbffe9c15c5a..2c93dbf96c721523377636f67e17fca108db11aa 100644 --- a/gpMgmt/bin/.gitignore +++ b/gpMgmt/bin/.gitignore @@ -14,7 +14,6 @@ ext /gpdebugc /gpdeletesystemc /gpexpandc -/gpfaultinjectorc /gpfilespacec /gpinitstandbyc /gpkillc diff --git a/gpMgmt/bin/README b/gpMgmt/bin/README index 37b0a86658e94cebad678b4b3dee6754ff15236f..e364319fee7aa3da66dd90a995ee50edbf3f27ce 100644 --- a/gpMgmt/bin/README +++ b/gpMgmt/bin/README @@ -60,10 +60,7 @@ bin/gplogfilter - Filters log files bin/gpstart - Start a Greenplum Database bin/gpstop - Stop a Greenplum Database -bin/gpfaultinjector - Cause a host or segment to fail - sbin/gpaddconfig.py - Helper script for gpaddconfig -sbin/gpfaultinjector.py - Helper script for gpfaultinjector sbin/gpsegcopy - Helper script for gpexpand sbin/gpsegstart.py - Helper script for gpstart sbin/gpsegstop.py - Helper script for gpstop @@ -137,9 +134,6 @@ commands/base.py - Core of commands submodule (could use some work) | +- SQLCommand - abstract class for executing SQL commands -commands/clsInjectFault - binary for filerep fault injection - \- doesn't belong as a part of commands submodule - commands/gp.py - Implements lots of subclasses of Command for various tasks commands/pg.py - Like gp.py, not clear what the separation is, if any. commands/unix.py - Platform information + more subclasses of Command diff --git a/gpMgmt/bin/gpfaultinjector b/gpMgmt/bin/gpfaultinjector deleted file mode 100755 index afb7cdab59415c3094ee2ada3349d8f7b04f2e61..0000000000000000000000000000000000000000 --- a/gpMgmt/bin/gpfaultinjector +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) Greenplum Inc 2010. All Rights Reserved. -# -# -# THIS IMPORT MUST COME FIRST -# import mainUtils FIRST to get python version check -# -from gppylib.mainUtils import * - -# now reset of imports -from gppylib.programs.clsInjectFault import * - -#------------------------------------------------------------------------- -if __name__ == '__main__': - simple_main( GpInjectFaultProgram.createParser, GpInjectFaultProgram.createProgram) - diff --git a/gpMgmt/bin/gppylib/programs/clsInjectFault.py b/gpMgmt/bin/gppylib/programs/clsInjectFault.py deleted file mode 100644 index 11729cffcba8807ec8fad19dd42adf2cf7aab496..0000000000000000000000000000000000000000 --- a/gpMgmt/bin/gppylib/programs/clsInjectFault.py +++ /dev/null @@ -1,343 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) Greenplum Inc 2010. All Rights Reserved. -# -# -# Used to inject faults into the file replication code -# - -# -# THIS IMPORT MUST COME FIRST -# -# import mainUtils FIRST to get python version check -from gppylib.mainUtils import * - -from optparse import Option, OptionGroup, OptionParser, OptionValueError, SUPPRESS_USAGE - -from gppylib.gpparseopts import OptParser, OptChecker -from gppylib.utils import toNonNoneString -from gppylib import gplog -from gppylib import gparray -from gppylib.commands import base -from gppylib.commands import unix -from gppylib.commands import gp -from gppylib.commands import pg -from gppylib.db import catalog -from gppylib.db import dbconn -from gppylib.system import configurationInterface, fileSystemInterface, osInterface -from gppylib import pgconf -from gppylib.system.environment import GpMasterEnvironment - -logger = gplog.get_default_logger() - -#------------------------------------------------------------------------- -class GpInjectFaultProgram: - # - # Constructor: - # - # @param options the options as returned by the options parser - # - def __init__(self, options): - self.options = options - - # - # Build the fault transition message. Fault options themselves will NOT be validated by the - # client -- the server will do that when we send the fault - # - def buildMessage(self) : - - # note that we don't validate these strings -- if they contain newlines - # (and so mess up the transition protocol) then the server will error - result = ["faultInject"] - result.append( toNonNoneString(self.options.faultName)) - result.append( toNonNoneString(self.options.type)) - result.append( toNonNoneString(self.options.ddlStatement)) - result.append( toNonNoneString(self.options.databaseName)) - result.append( toNonNoneString(self.options.tableName)) - result.append( toNonNoneString(self.options.numOccurrences)) - result.append( toNonNoneString(self.options.sleepTimeSeconds)) - return '\n'.join(result) - - # - # build a message that will get status of the fault - # - def buildGetStatusMessage(self) : - # note that we don't validate this string then the server may error - result = ["getFaultInjectStatus"] - result.append( toNonNoneString(self.options.faultName)) - return '\n'.join(result) - - # - # return True if the segment matches the given role, False otherwise - # - def isMatchingRole(self, role, segment): - segmentRole = segment.getSegmentRole() - if role == "primary": - return segmentRole == 'p' - elif role == "mirror": - return segmentRole == 'm' - elif role == "primary_mirror": - return segmentRole == 'm' or segmentRole == 'p' - else: - raise ProgramArgumentValidationException("Invalid role specified: %s" % role) - - # - # load the segments and filter to the ones we should target - # - def loadTargetSegments(self) : - - targetHost = self.options.targetHost - targetRole = self.options.targetRole - targetDbId = self.options.targetDbId - - if targetHost is None and targetDbId is None: - raise ProgramArgumentValidationException(\ - "neither --host nor --seg_dbid specified. " \ - "Exactly one should be specified.") - if targetHost is not None and targetDbId is not None: - raise ProgramArgumentValidationException(\ - "both --host nor --seg_dbid specified. " \ - "Exactly one should be specified.") - if targetHost is not None and targetRole is None: - raise ProgramArgumentValidationException(\ - "--role not specified when --host is specified. " \ - "Role is required when targeting a host.") - if targetDbId is not None and targetRole is not None: - raise ProgramArgumentValidationException(\ - "--role specified when --seg_dbid is specified. " \ - "Role should not be specified when targeting a single dbid.") - - # - # load from master db - # - masterPort = self.options.masterPort - if masterPort is None: - gpEnv = GpMasterEnvironment(self.options.masterDataDirectory, False, verbose=False) - masterPort = gpEnv.getMasterPort() - conf = configurationInterface.getConfigurationProvider().initializeProvider(masterPort) - gpArray = conf.loadSystemConfig(useUtilityMode=True, verbose=False) - segments = gpArray.getDbList() - - # - # prune gpArray according to filter settings - # - segments = [seg for seg in segments if seg.isSegmentQE()] - if targetHost is not None and targetHost != "ALL": - segments = [seg for seg in segments if seg.getSegmentHostName() == targetHost] - - if targetDbId is not None: - segments = gpArray.getDbList() - dbId = int(targetDbId) - segments = [seg for seg in segments if seg.getSegmentDbId() == dbId] - - if targetRole is not None: - segments = [seg for seg in segments if self.isMatchingRole(targetRole, seg)] - - # only DOWN segments remaining? Error out - downSegments = [seg for seg in segments if seg.getSegmentStatus() != 'u'] - if len(downSegments) > 0: - downSegStr = "\n Down Segment: " - raise ExceptionNoStackTraceNeeded( - "Unable to inject fault. At least one segment is marked as down in the database.%s%s" % - (downSegStr, downSegStr.join([str(downSeg) for downSeg in downSegments]))) - - return segments - - # return True for sync, False for async - def getAndValidateIsSyncSetting(self): - syncMode = self.options.syncMode - if syncMode == "sync": - return True - elif syncMode == "async": - return False - raise ExceptionNoStackTraceNeeded( "Invalid -m, --mode option %s" % syncMode) - - # - # write string to a temporary file that will be deleted on completion - # - def writeToTempFile(self, str): - inputFile = fileSystemInterface.getFileSystemProvider().createNamedTemporaryFile() - inputFile.write(str) - inputFile.flush() - return inputFile - - def injectFaults(self, segments, messageText): - - inputFile = self.writeToTempFile(messageText) - - # run the command in serial to each target - for segment in segments : - logger.info("Injecting fault on content=%d:dbid=%d:mode=%s:status=%s", - segment.getSegmentContentId(), - segment.getSegmentDbId(), - segment.getSegmentMode(), - segment.getSegmentStatus()) - # if there is an error then an exception is raised by command execution - cmd = gp.SendFilerepTransitionMessage("Fault Injector", inputFile.name, \ - segment.getSegmentPort(), base.LOCAL, segment.getSegmentHostName()) - cmd.run(validateAfter=False) - - - # validate ourselves - if cmd.results.rc != 0: - raise ExceptionNoStackTraceNeeded("Injection Failed: %s" % cmd.results.stderr) - elif self.options.type == "status": - # server side prints nice success messages on status...so print it - str = cmd.results.stderr - if str.startswith("Success: "): - str = str.replace("Success: ", "", 1) - str = str.replace("\n", "") - logger.info("%s", str) - inputFile.close() - - def waitForFaults(self, segments, statusQueryText ): - inputFile = self.writeToTempFile(statusQueryText) - segments = [seg for seg in segments] - sleepTimeSec = 0.115610199 - sleepTimeMultipler = 1.5 # sleepTimeMultipler * sleepTimeMultipler^11 ~= 10 - - logger.info("Awaiting fault on %d segment(s)", len(segments)) - while len(segments) > 0 : - logger.info("Sleeping %.2f seconds " % sleepTimeSec) - osInterface.getOsProvider().sleep(sleepTimeSec) - - segmentsForNextPass = [] - for segment in segments: - logger.info("Checking for fault completion on %s", segment) - cmd = gp.SendFilerepTransitionMessage.local("Fault Injector Status Check", inputFile.name, \ - segment.getSegmentPort(), segment.getSegmentHostName()) - resultStr = cmd.results.stderr.strip() - if resultStr == "Success: waitMore": - segmentsForNextPass.append(segment) - elif resultStr != "Success: done": - raise Exception("Unexpected result from server %s" % resultStr) - - segments = segmentsForNextPass - sleepTimeSec = sleepTimeSec if sleepTimeSec > 7 else sleepTimeSec * sleepTimeMultipler - inputFile.close() - - def isSyncableFaultType(self): - type = self.options.type - return type != "reset" and type != "status" - - ###### - def run(self): - - if self.options.masterPort is not None and self.options.masterDataDirectory is not None: - raise ProgramArgumentValidationException("both master port and master data directory options specified;" \ - " at most one should be specified, or specify none to use MASTER_DATA_DIRECTORY environment variable") - - isSync = self.getAndValidateIsSyncSetting() - messageText = self.buildMessage() - segments = self.loadTargetSegments() - - # inject, maybe wait - self.injectFaults(segments, messageText) - if isSync and self.isSyncableFaultType() : - statusQueryText = self.buildGetStatusMessage() - self.waitForFaults(segments, statusQueryText) - - logger.info("DONE") - return 0 # success -- exit code 0! - - def cleanup(self): - pass - - #------------------------------------------------------------------------- - @staticmethod - def createParser(): - description = (""" - This utility is NOT SUPPORTED and is for internal-use only. - - Used to inject faults into the file replication code. - """) - - help = [""" - - Return codes: - 0 - Fault injected - non-zero: Error or invalid options - """] - - parser = OptParser(option_class=OptChecker, - description=' '.join(description.split()), - version='%prog version $Revision$') - parser.setHelp(help) - - addStandardLoggingAndHelpOptions(parser, False) - - # these options are used to determine the target segments - addTo = OptionGroup(parser, 'Target Segment Options: ') - parser.add_option_group(addTo) - addTo.add_option('-r', '--role', dest="targetRole", type='string', metavar="", - help="Role of segments to target: primary, mirror, or primary_mirror") - addTo.add_option("-s", "--seg_dbid", dest="targetDbId", type="string", metavar="", - help="The segment dbid on which fault should be set and triggered.") - addTo.add_option("-H", "--host", dest="targetHost", type="string", metavar="", - help="The hostname on which fault should be set and triggered; pass ALL to target all hosts") - - addTo = OptionGroup(parser, 'Master Connection Options') - parser.add_option_group(addTo) - - addMasterDirectoryOptionForSingleClusterProgram(addTo) - addTo.add_option("-p", "--master_port", dest="masterPort", type="int", default=None, - metavar="", - help="DEPRECATED, use MASTER_DATA_DIRECTORY environment variable or -d option. " \ - "The port number of the master database on localhost, " \ - "used to fetch the segment configuration.") - - addTo = OptionGroup(parser, 'Client Polling Options: ') - parser.add_option_group(addTo) - addTo.add_option('-m', '--mode', dest="syncMode", type='string', default="async", - metavar="", - help="Synchronization mode : sync (client waits for fault to occur)" \ - " or async (client only sets fault request on server)") - - # these options are used to build the message for the segments - addTo = OptionGroup(parser, 'Fault Options: ') - parser.add_option_group(addTo) - # NB: This list needs to be kept in sync with: - # - FaultInjectorTypeEnumToString - # - FaultInjectorType_e - addTo.add_option('-y','--type', dest="type", type='string', metavar="", - help="fault type: sleep (insert sleep), fault (report fault to postmaster and fts prober), " \ - "fatal (inject FATAL error), panic (inject PANIC error), error (inject ERROR), " \ - "infinite_loop, data_curruption (corrupt data in memory and persistent media), " \ - "suspend (suspend execution), resume (resume execution that was suspended), " \ - "skip (inject skip i.e. skip checkpoint), " \ - "memory_full (all memory is consumed when injected), " \ - "reset (remove fault injection), status (report fault injection status), " \ - "segv (inject a SEGV), " \ - "interrupt (inject an Interrupt), " \ - "finish_pending (set QueryFinishPending to true), " \ - "checkpoint_and_panic (inject a panic following checkpoint) ") - addTo.add_option("-z", "--sleep_time_s", dest="sleepTimeSeconds", type="int", default="10" , - metavar="", - help="For 'sleep' faults, the amount of time for the sleep. Defaults to %default." \ - "Min Max Range is [0, 7200 sec] ") - addTo.add_option('-f','--fault_name', dest="faultName", type='string', metavar="", - help="See src/include/utils/faultinjector_lists.h for list of fault names") - addTo.add_option("-c", "--ddl_statement", dest="ddlStatement", type="string", - metavar="ddlStatement", - help="The DDL statement on which fault should be set and triggered " \ - "(i.e. create_database, drop_database, create_table, drop_table)") - addTo.add_option("-D", "--database_name", dest="databaseName", type="string", - metavar="databaseName", - help="The database name on which fault should be set and triggered.") - addTo.add_option("-t", "--table_name", dest="tableName", type="string", - metavar="tableName", - help="The table name on which fault should be set and triggered.") - addTo.add_option("-o", "--occurrence", dest="numOccurrences", type="int", default=1, - metavar="numOccurrences", - help="The number of occurrence of the DDL statement with the database name " \ - "and the table name before fault is triggered. Defaults to %default. Max is 1000. " \ - "Fault is triggered always if set to '0'. ") - parser.set_defaults() - return parser - - @staticmethod - def createProgram(options, args): - if len(args) > 0 : - raise ProgramArgumentValidationException(\ - "too many arguments: only options may be specified") - return GpInjectFaultProgram(options) diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index 1bdbc6f056d023805330e08d9bd51c9f23ef523d..b7c0af8186772a19e481c88063fb293c5ff5f58f 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -2944,9 +2944,11 @@ gpdb::InjectFaultInOptTasks FaultInjectorIdentifier_e identifier ) { - // use gpfaultinjector to activate - // e.g. gpfaultinjector -f opt_task_allocate_string_buffer -y --seg_dbid 1 - // use 'reset' as to clear injected fault + /* + * To activate this fault injection point, use gp_inject_fault + * extension with opt_task_allocate_string_buffer as the fault + * name. + */ GP_WRAP_START; { return FaultInjector_InjectFaultIfSet(identifier, DDLNotSpecified, "", ""); diff --git a/src/backend/utils/misc/faultinjector.c b/src/backend/utils/misc/faultinjector.c index 4f3d524241df06b514288d1dfd18514cdfd65bab..c9aba960bfc6e70a8149a2973422ccd897d2f9ce 100644 --- a/src/backend/utils/misc/faultinjector.c +++ b/src/backend/utils/misc/faultinjector.c @@ -1,18 +1,13 @@ /*------------------------------------------------------------------------- * * faultinjector.c - * GP Fault Injector utility (gpfaultinjector python script) is used - * for Greenplum internal testing only. + * GP Fault Injectors are used for Greenplum internal testing only. * - * The utility inject faults (as defined by 'fault_type') on primary or - * mirror segment at predefined 'fault_name. - * - * The utility is started on master host. Master host sends the fault - * injection request to specified segment. It connects to postmaster on a - * segment. Postmaster spawns backend process that sets fault injection - * request into shared memory. Shared memory is accessible to all segment - * processes. Segment processes are checking shared memory to find if/when - * fault has to be injected. + * Fault injectors are used for fine control during testing. They allow a + * developer to create deterministic tests for scenarios that are hard to + * reproduce. This is done by programming actions at certain key areas to + * suspend, skip, or even panic the process. Fault injectors are set in shared + * memory so they are accessible to all segment processes. * * Portions Copyright (c) 2009-2010 Greenplum Inc * Portions Copyright (c) 2012-Present Pivotal Software, Inc.