diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f4a93218fbcb61b2f5b55d360f1023029d64bdf9..2841ecac4c47e8594663628b2b13abfc34d359bc 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -98,6 +98,7 @@ struct eeh_pe { #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ +#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */ struct eeh_dev { int mode; /* EEH mode */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ed57fa7920c88dd123fcf139353489fcef4746c0..db1e2b8eff3c4e813d5840b9294128685e8a6b52 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state); +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 1f1e2cc045a9aa315b98712dbb58562aaf817a12..f99ba9b7632219641c25921edea15c9261db9f79 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev) } } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ + /* EEH device removed ? */ + if (!edev || (edev->mode & EEH_DEV_REMOVED)) + return true; + + return false; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - /* We might not have the associated PCI device, - * then we should continue for next one. - */ - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); @@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; + if (!dev || eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (!driver) return NULL; @@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_perm_failure; driver = eeh_pcid_get(dev); @@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata) if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) return NULL; + /* + * We rely on count-based pcibios_release_device() to + * detach permanently offlined PEs. Unfortunately, that's + * not reliable enough. We might have the permanently + * offlined PEs attached, but we needn't take care of + * them and their child devices. + */ + if (eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); @@ -694,8 +717,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* Notify all devices that they're about to go down. */ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - /* Shut down the device drivers for good. */ + /* Mark the PE to be removed permanently */ + pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ if (frozen_bus) { + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + pci_lock_rescan_remove(); pcibios_remove_pci_devices(frozen_bus); pci_unlock_rescan_remove(); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f0c353fa655a3a5a742e2de674851ab3ca88006a..995c2a2846303d3885674d474bcbfd0cae90f9cc 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; - /* - * Mark the PE with the indicated state. Also, - * the associated PCI device will be put into - * I/O frozen state to avoid I/O accesses from - * the PCI device driver. - */ + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return NULL; + pe->state |= state; + + /* Offline PCI devices if applicable */ + if (state != EEH_PE_ISOLATED) + return NULL; + eeh_pe_for_each_dev(pe, edev, tmp) { pdev = eeh_dev_to_pci_dev(edev); if (pdev) @@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +{ + struct eeh_dev *edev = data; + int mode = *((int *)flag); + + edev->mode |= mode; + + return NULL; +} + +/** + * eeh_pe_dev_state_mark - Mark state for all device under the PE + * @pe: EEH PE + * + * Mark specific state for all child devices of the PE. + */ +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) +{ + eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode); +} + /** * __eeh_pe_state_clear - Clear state for the PE * @data: EEH PE @@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag) struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & EEH_PE_ISOLATED)) + return NULL; + pe->state &= ~state; - pe->check_count = 0; + + /* Clear check count since last isolation */ + if (state & EEH_PE_ISOLATED) + pe->check_count = 0; return NULL; } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 83c26d829991e8b8c118d6fa8d8d5bb7ffc68fda..ea6470c21f4e4e86152093c6126990a73544bf62 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, struct pci_dev *dev = NULL; const __be32 *reg; int reglen, devfn; +#ifdef CONFIG_EEH + struct eeh_dev *edev = of_node_to_eeh_dev(dn); +#endif pr_debug(" * %s\n", dn->full_name); if (!of_device_is_available(dn)) @@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, return dev; } + /* Device removed permanently ? */ +#ifdef CONFIG_EEH + if (edev && (edev->mode & EEH_DEV_REMOVED)) + return NULL; +#endif + /* create a new pci_dev for this device */ dev = of_create_pci_dev(dn, bus, devfn); if (!dev) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f98cf99c9f8c545d2bf2691a18543b266e10622f..eefbfcc3fd8c40a9d50b9839fb2292f723c7970d 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -441,11 +441,16 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose, if (!(phb->flags & PNV_PHB_FLAG_EEH)) return true; - /* PE reset ? */ + /* PE reset or device removed ? */ edev = of_node_to_eeh_dev(dn); - if (edev && edev->pe && - (edev->pe->state & EEH_PE_RESET)) - return false; + if (edev) { + if (edev->pe && + (edev->pe->state & EEH_PE_RESET)) + return false; + + if (edev->mode & EEH_DEV_REMOVED) + return false; + } return true; }