watchdog.py 12.0 KB
Newer Older
1 2 3
import logging, re, time, os
from autotest.client.shared import error, utils
from virttest import utils_misc, env_process
4 5


6
@error.context_aware
7 8 9
def run_watchdog(test, params, env):
    """
    Configure watchdog, crash the guest and check if watchdog_action occurs.
10 11 12 13 14 15
    Test Step:
        1. see every function step
    Params:
        @param test: QEMU test object.
        @param params: Dictionary with test parameters.
        @param env: Dictionary with the test environment.
16 17
    """

18 19 20 21 22 23 24 25 26
    timeout = int(params.get("login_timeout", '360'))
    relogin_timeout = int(params.get("relogin_timeout", '240'))

    watchdog_device_type = params.get("watchdog_device_type",  "i6300esb")
    watchdog_action = params.get("watchdog_action", "reset")
    trigger_cmd = params.get("trigger_cmd", "echo c > /dev/watchdog")

    #internal function
    def _watchdog_device_check(session, watchdog_device):
27
        """
28 29
        Check the watchdog device have been found and init successfully. if  not
        will raise error.
30
        """
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
        #when using ib700 need modprobe it's driver manually.
        if watchdog_device == "ib700":
            session.cmd("modprobe ib700wdt")

        #when wDT is 6300esb need check pci info
        if watchdog_device == "i6300esb":
            error.context("checking pci info to ensure have WDT device",
                          logging.info)
            o = session.cmd_output("lspci")
            if o:
                wdt_pci_info = re.findall(".*6300ESB Watchdog Timer", o)
                if not wdt_pci_info:
                    raise error.TestFail("Can find watchdog pci")
            logging.info("Found watchdog pci device : %s" % wdt_pci_info)

        #checking watchdog init info using dmesg
        error.context("Checking watchdog init info using dmesg", logging.info)
        dmesg_info = params.get("dmesg_info", "(i6300ESB|ib700wdt).*init")
        (s,o)= session.cmd_status_output("dmesg | grep -i '%s' " % dmesg_info)
        if s != 0:
            error_msg = "Wactchdog device '%s' initialization failed "
            raise error.TestError(error_msg % watchdog_device)
        logging.info("Watchdog device '%s' add and init successfully"
                     % watchdog_device)
        logging.debug("Init info : '%s'" % o)
56

57 58 59 60 61 62 63 64 65 66 67
    def _trigger_watchdog(session, trigger_cmd=None):
        """
        Trigger watchdog action
        Params:
            @session: guest connect session.
            @trigger_cmd: cmd trigger the watchdog
        """
        if trigger_cmd != None:
            error.context("Trigger Watchdog action using:'%s'." % trigger_cmd,
                          logging.info)
            session.sendline(trigger_cmd)
68

69 70 71 72 73 74 75 76 77 78
    def _action_check(session, watchdog_action):
        """
        Check whether or not the watchdog action occured. if the action was
        not occured will raise error.
        """
        #when watchdog action is pause, shutdown, reset, poweroff
        #the vm session will lost responsive
        response_timeout = int(params.get("response_timeout", '240'))
        error.context("Check whether or not watchdog action '%s' take effect"
                       % watchdog_action, logging.info)
79
        if not utils_misc.wait_for(lambda: not session.is_responsive(),
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
                                   response_timeout, 0, 1):
            if watchdog_action == "none" or watchdog_action == "debug":
                logging.info("OK, the guest session is responsive still")
            else:
                raise error.TestFail("Oops, seems action '%s' take no effect, ",
                                     "guest is responsive" % watchdog_action)

        #when action is poweroff or shutdown(without no-shutdown option), the vm
        #will dead, and qemu exit.
        #The others the vm monitor still responsive, can report the vm status.
        if (watchdog_action == "poweroff" or (watchdog_action == "shutdown"
            and params.get("disable_shutdown") != "yes")):
            if not utils_misc.wait_for(lambda: vm.is_dead(),
                                       response_timeout, 0, 1):
                raise error.TestFail("Oops, seems '%s' action take no effect, ",
                                     "guest is alive!" % watchdog_action)
        else:
            if watchdog_action == "pause":
                f_param = "paused"
            elif watchdog_action == "shutdown":
                f_param = "shutdown"
            else:
                f_param = "running"

            if not utils_misc.wait_for(lambda:vm.monitor.verify_status(f_param),
                                       response_timeout, 0, 1):
                logging.debug("Monitor status is:%s" % vm.monitor.get_status())
                raise error.TestFail("Oops, seems action '%s' take no effect, ",
                                     "Wrong monitor status!" % watchdog_action)

        #when the action is reset, need can relogin the guest.
        if watchdog_action == "reset":
            logging.info("Try to login the guest after reboot")
            vm.wait_for_login(timeout=relogin_timeout)
        logging.info("Watchdog action '%s' come into effect." % watchdog_action)


    #test case
    def check_watchdog_support():
        """
        check the host qemu-kvm support watchdog device
        Test Step:
        1. Send qemu command 'qemu -watchdog ?'
        2. Check the watchdog type that the host support.
        """
        qemu_binary = utils_misc.get_path(os.path.join(test.bindir,
                                                       params.get("vm_type")),
                                          params.get("qemu_binary", "qemu"))

        watchdog_type_check = params.get("watchdog_type_check"," -watchdog '?'")
        qemu_cmd  = qemu_binary + watchdog_type_check

        #check the host support watchdog types.
        error.context("Checking whether or not the host support WDT '%s'"
                      % watchdog_device_type, logging.info)
        watchdog_device = utils.system_output("%s 2>&1" % qemu_cmd,
                                              retain_output=True)
        if watchdog_device:
            if re.findall(watchdog_device_type, watchdog_device, re.I):
                logging.info("The host support '%s' type watchdog device" %
                              watchdog_device_type)
            else:
                raise error.TestFail("Host not support watchdog device type %s "
                                     % watchdog_device_type )
                logging.info("The host support watchdog device type is: '%s'"
                            % watchdog_device)
        else:
            raise error.TestFail("No watchdog device support in the host!")

    def guest_boot_with_watchdog():
        """
        check the guest can boot with watchdog device
        Test Step:
        1. Boot guest with watchdog device
        2. Check watchdog device have been initialized successfully in guest
        """
        _watchdog_device_check(session, watchdog_device_type)

    def watchdog_action_test():
        """
        Watchdog action test
        Test Step:
        1. Boot guest with watchdog device
        2. Check watchdog device have been initialized successfully in guest
        3.Trigger wathchdog action through open /dev/watchdog
        4.Ensure watchdog_action take effect.
        """

        _watchdog_device_check(session, watchdog_device_type)
        _trigger_watchdog(session, trigger_cmd)
        _action_check(session, watchdog_action)
171

172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
    def magic_close_support():
        """
        Magic close the watchdog action.
        Test Step:
        1. Boot guest with watchdog device
        2. Check watchdog device have been initialized successfully in guest
        3. Inside guest, trigger watchdog action"
        4. Inside guest, before heartbeat expires, close this action"
        5. Wait heartbeat timeout check the watchdog action deactive.
        """

        response_timeout = int(params.get("response_timeout", '240'))
        magic_cmd = params.get("magic_close_cmd", "echo V > /dev/watchdog")

        _watchdog_device_check(session, watchdog_device_type)
        _trigger_watchdog(session, trigger_cmd)

        #magic close
        error.context("Magic close is start", logging.info)
        _trigger_watchdog(session, magic_cmd)

        if utils_misc.wait_for (lambda:  not session.is_responsive(),
                                response_timeout, 0, 1):
            error_msg = "Oops,Watchdog action take effect, magic close FAILED"
            raise error.TestFail(error_msg)
        logging.info("Magic close take effect.")

    def migration_when_wdt_timeout():
        """
        Migration when WDT timeout
        Test Step:
        1. Boot guest with watchdog device
        2. Check watchdog device have been initialized successfully in guest
        3. Start VM with watchdog device, action reset|poweroff|pause
        4. Inside RHEL guest, trigger watchdog
        5. Before WDT timeout, do vm migration
        6. After migration, check the watchdog action take effect
        """

        mig_timeout = float(params.get("mig_timeout", "3600"))
        mig_protocol = params.get("migration_protocol", "tcp")
        mig_cancel_delay = int(params.get("mig_cancel") == "yes") * 2

        _watchdog_device_check(session, watchdog_device_type)
        _trigger_watchdog(session, trigger_cmd)
217

218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
        error.context("Do migration(protocol:%s),Watchdog have been triggered."
                      % mig_protocol, logging.info)
        vm.migrate(mig_timeout, mig_protocol, mig_cancel_delay)

        _action_check(session, watchdog_action)

    def hotplug_unplug_watchdog_device():
        """
        Hotplug/unplug watchdog device
        Test Step:
        1. Start VM with "-watchdog-action pause" CLI option
        2. Add WDT via monitor
        3. Trigger watchdog action in guest
        4. Remove WDT device through monitor cmd "device_del"
        5. Resume and relogin the guest, check the device have been removed.
        """

        session = vm.wait_for_login(timeout=timeout)
        o = session.cmd_output("lspci")
        if o:
            wdt_pci_info = re.findall(".*6300ESB Watchdog Timer", o)
            if wdt_pci_info:
                raise error.TestFail("Can find watchdog pci")

        plug_watchdog_device = params.get("plug_watchdog_device", "i6300esb")
        watchdog_device_add = ("device_add driver=%s, id=%s"
                               % (plug_watchdog_device, "watchdog"))
        watchdog_device_del = ("device_del id=%s" % "watchdog")

        error.context("Hotplug watchdog device '%s'" % plug_watchdog_device,
                      logging.info)
        vm.monitor.send_args_cmd(watchdog_device_add)

        #wait watchdog device init
        time.sleep(5)
        _watchdog_device_check(session, plug_watchdog_device)
        _trigger_watchdog(session, trigger_cmd)
        _action_check(session, watchdog_action)

        error.context("Hot unplug watchdog device", logging.info)
        vm.monitor.send_args_cmd(watchdog_device_del)

        error.context("Resume the guest, check the WDT have been removed",
                      logging.info)
        vm.resume()
        session = vm.wait_for_login(timeout=timeout)
        o = session.cmd_output("lspci")
        if o:
            wdt_pci_info = re.findall(".*6300ESB Watchdog Timer", o)
            if wdt_pci_info:
                raise error.TestFail("Oops, find watchdog pci, unplug failed")
            logging.info("The WDT remove successfully")


    #main procedure
    test_type = params.get("test_type")
    error.context("'%s' test starting ... " % test_type, logging.info)
    error.context("Boot VM with WDT(Device:'%s', Action:'%s'),and try to login"
                  % (watchdog_device_type, watchdog_action), logging.info)
    params["start_vm"] = "yes"
    env_process.preprocess_vm(test, params, env, params.get("main_vm"))
    vm = env.get_vm(params["main_vm"])
    session = vm.wait_for_login(timeout=timeout)
281

282 283 284 285 286 287
    if (test_type in locals()):
        test_running = locals()[test_type]
        test_running()
    else:
        raise error.TestError("Oops test %s doesn't exist, have a check please."
                              % test_type)