PCI/ERR: Recover from RCEC AER errors
[linux-2.6-microblaze.git] / drivers / pci / pcie / err.c
index c543f41..87a2dc8 100644 (file)
@@ -146,38 +146,70 @@ out:
        return 0;
 }
 
+/**
+ * pci_walk_bridge - walk bridges potentially AER affected
+ * @bridge:    bridge which may be a Port or an RCEC
+ * @cb:                callback to be called for each device found
+ * @userdata:  arbitrary pointer to be passed to callback
+ *
+ * If the device provided is a bridge, walk the subordinate bus, including
+ * any bridged devices on buses under this bus.  Call the provided callback
+ * on each device found.
+ *
+ * If the device provided has no subordinate bus, e.g., an RCEC, call the
+ * callback on the device itself.
+ */
+static void pci_walk_bridge(struct pci_dev *bridge,
+                           int (*cb)(struct pci_dev *, void *),
+                           void *userdata)
+{
+       if (bridge->subordinate)
+               pci_walk_bus(bridge->subordinate, cb, userdata);
+       else
+               cb(bridge, userdata);
+}
+
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
-                       pci_channel_state_t state,
-                       pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
+               pci_channel_state_t state,
+               pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
 {
+       int type = pci_pcie_type(dev);
+       struct pci_dev *bridge;
        pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
-       struct pci_bus *bus;
+       struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
 
        /*
-        * Error recovery runs on all subordinates of the first downstream port.
-        * If the downstream port detected the error, it is cleared at the end.
+        * If the error was detected by a Root Port, Downstream Port, or
+        * RCEC, recovery runs on the device itself.  For Ports, that also
+        * includes any subordinate devices.
+        *
+        * If it was detected by another device (Endpoint, etc), recovery
+        * runs on the device and anything else under the same Port, i.e.,
+        * everything under "bridge".
         */
-       if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
-             pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
-               dev = dev->bus->self;
-       bus = dev->subordinate;
-
-       pci_dbg(dev, "broadcast error_detected message\n");
+       if (type == PCI_EXP_TYPE_ROOT_PORT ||
+           type == PCI_EXP_TYPE_DOWNSTREAM ||
+           type == PCI_EXP_TYPE_RC_EC)
+               bridge = dev;
+       else
+               bridge = pci_upstream_bridge(dev);
+
+       pci_dbg(bridge, "broadcast error_detected message\n");
        if (state == pci_channel_io_frozen) {
-               pci_walk_bus(bus, report_frozen_detected, &status);
-               status = reset_link(dev);
+               pci_walk_bridge(bridge, report_frozen_detected, &status);
+               status = reset_subordinates(bridge);
                if (status != PCI_ERS_RESULT_RECOVERED) {
-                       pci_warn(dev, "link reset failed\n");
+                       pci_warn(bridge, "subordinate device reset failed\n");
                        goto failed;
                }
        } else {
-               pci_walk_bus(bus, report_normal_detected, &status);
+               pci_walk_bridge(bridge, report_normal_detected, &status);
        }
 
        if (status == PCI_ERS_RESULT_CAN_RECOVER) {
                status = PCI_ERS_RESULT_RECOVERED;
-               pci_dbg(dev, "broadcast mmio_enabled message\n");
-               pci_walk_bus(bus, report_mmio_enabled, &status);
+               pci_dbg(bridge, "broadcast mmio_enabled message\n");
+               pci_walk_bridge(bridge, report_mmio_enabled, &status);
        }
 
        if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -187,27 +219,35 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
                 * drivers' slot_reset callbacks?
                 */
                status = PCI_ERS_RESULT_RECOVERED;
-               pci_dbg(dev, "broadcast slot_reset message\n");
-               pci_walk_bus(bus, report_slot_reset, &status);
+               pci_dbg(bridge, "broadcast slot_reset message\n");
+               pci_walk_bridge(bridge, report_slot_reset, &status);
        }
 
        if (status != PCI_ERS_RESULT_RECOVERED)
                goto failed;
 
-       pci_dbg(dev, "broadcast resume message\n");
-       pci_walk_bus(bus, report_resume, &status);
+       pci_dbg(bridge, "broadcast resume message\n");
+       pci_walk_bridge(bridge, report_resume, &status);
 
-       if (pcie_aer_is_native(dev))
-               pcie_clear_device_status(dev);
-       pci_aer_clear_nonfatal_status(dev);
-       pci_info(dev, "device recovery successful\n");
+       /*
+        * If we have native control of AER, clear error status in the Root
+        * Port or Downstream Port that signaled the error.  If the
+        * platform retained control of AER, it is responsible for clearing
+        * this status.  In that case, the signaling device may not even be
+        * visible to the OS.
+        */
+       if (host->native_aer || pcie_ports_native) {
+               pcie_clear_device_status(bridge);
+               pci_aer_clear_nonfatal_status(bridge);
+       }
+       pci_info(bridge, "device recovery successful\n");
        return status;
 
 failed:
-       pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+       pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
 
        /* TODO: Should kernel panic here? */
-       pci_info(dev, "device recovery failed\n");
+       pci_info(bridge, "device recovery failed\n");
 
        return status;
 }