arch/powerpc/kernel/eeh_driver.c

   1 /*
   2  * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
   3  * Copyright IBM Corp. 2004 2005
   4  * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
   5  *
   6  * All rights reserved.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or (at
  11  * your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  16  * NON INFRINGEMENT.  See the GNU General Public License for more
  17  * details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22  *
  23  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  24  */
  25 #include <linux/delay.h>
  26 #include <linux/interrupt.h>
  27 #include <linux/irq.h>
  28 #include <linux/module.h>
  29 #include <linux/pci.h>
  30 #include <asm/eeh.h>
  31 #include <asm/eeh_event.h>
  32 #include <asm/ppc-pci.h>
  33 #include <asm/pci-bridge.h>
  34 #include <asm/prom.h>
  35 #include <asm/rtas.h>
  36
  37 struct eeh_rmv_data {
  38         struct list_head edev_list;
  39         int removed;
  40 };
  41
  42 static int eeh_result_priority(enum pci_ers_result result)
  43 {
  44         switch (result) {
  45         case PCI_ERS_RESULT_NONE:
  46                 return 1;
  47         case PCI_ERS_RESULT_NO_AER_DRIVER:
  48                 return 2;
  49         case PCI_ERS_RESULT_RECOVERED:
  50                 return 3;
  51         case PCI_ERS_RESULT_CAN_RECOVER:
  52                 return 4;
  53         case PCI_ERS_RESULT_DISCONNECT:
  54                 return 5;
  55         case PCI_ERS_RESULT_NEED_RESET:
  56                 return 6;
  57         default:
  58                 WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
  59                 return 0;
  60         }
  61 };
  62
  63 static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
  64                                                 enum pci_ers_result new)
  65 {
  66         if (eeh_result_priority(new) > eeh_result_priority(old))
  67                 return new;
  68         return old;
  69 }
  70
  71 /**
  72  * eeh_pcid_get - Get the PCI device driver
  73  * @pdev: PCI device
  74  *
  75  * The function is used to retrieve the PCI device driver for
  76  * the indicated PCI device. Besides, we will increase the reference
  77  * of the PCI device driver to prevent that being unloaded on
  78  * the fly. Otherwise, kernel crash would be seen.
  79  */
  80 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
  81 {
  82         if (!pdev || !pdev->driver)
  83                 return NULL;
  84
  85         if (!try_module_get(pdev->driver->driver.owner))
  86                 return NULL;
  87
  88         return pdev->driver;
  89 }
  90
  91 /**
  92  * eeh_pcid_put - Dereference on the PCI device driver
  93  * @pdev: PCI device
  94  *
  95  * The function is called to do dereference on the PCI device
  96  * driver of the indicated PCI device.
  97  */
  98 static inline void eeh_pcid_put(struct pci_dev *pdev)
  99 {
 100         if (!pdev || !pdev->driver)
 101                 return;
 102
 103         module_put(pdev->driver->driver.owner);
 104 }
 105
 106 /**
 107  * eeh_disable_irq - Disable interrupt for the recovering device
 108  * @dev: PCI device
 109  *
 110  * This routine must be called when reporting temporary or permanent
 111  * error to the particular PCI device to disable interrupt of that
 112  * device. If the device has enabled MSI or MSI-X interrupt, we needn't
 113  * do real work because EEH should freeze DMA transfers for those PCI
 114  * devices encountering EEH errors, which includes MSI or MSI-X.
 115  */
 116 static void eeh_disable_irq(struct pci_dev *dev)
 117 {
 118         struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
 119
 120         /* Don't disable MSI and MSI-X interrupts. They are
 121          * effectively disabled by the DMA Stopped state
 122          * when an EEH error occurs.
 123          */
 124         if (dev->msi_enabled || dev->msix_enabled)
 125                 return;
 126
 127         if (!irq_has_action(dev->irq))
 128                 return;
 129
 130         edev->mode |= EEH_DEV_IRQ_DISABLED;
 131         disable_irq_nosync(dev->irq);
 132 }
 133
 134 /**
 135  * eeh_enable_irq - Enable interrupt for the recovering device
 136  * @dev: PCI device
 137  *
 138  * This routine must be called to enable interrupt while failed
 139  * device could be resumed.
 140  */
 141 static void eeh_enable_irq(struct pci_dev *dev)
 142 {
 143         struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
 144
 145         if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
 146                 edev->mode &= ~EEH_DEV_IRQ_DISABLED;
 147                 /*
 148                  * FIXME !!!!!
 149                  *
 150                  * This is just ass backwards. This maze has
 151                  * unbalanced irq_enable/disable calls. So instead of
 152                  * finding the root cause it works around the warning
 153                  * in the irq_enable code by conditionally calling
 154                  * into it.
 155                  *
 156                  * That's just wrong.The warning in the core code is
 157                  * there to tell people to fix their asymmetries in
 158                  * their own code, not by abusing the core information
 159                  * to avoid it.
 160                  *
 161                  * I so wish that the assymetry would be the other way
 162                  * round and a few more irq_disable calls render that
 163                  * shit unusable forever.
 164                  *
 165                  *      tglx
 166                  */
 167                 if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))
 168                         enable_irq(dev->irq);
 169         }
 170 }
 171
 172 static bool eeh_dev_removed(struct eeh_dev *edev)
 173 {
 174         /* EEH device removed ? */
 175         if (!edev || (edev->mode & EEH_DEV_REMOVED))
 176                 return true;
 177
 178         return false;
 179 }
 180
 181 static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 182 {
 183         struct pci_dev *pdev;
 184
 185         if (!edev)
 186                 return NULL;
 187
 188         /*
 189          * We cannot access the config space on some adapters.
 190          * Otherwise, it will cause fenced PHB. We don't save
 191          * the content in their config space and will restore
 192          * from the initial config space saved when the EEH
 193          * device is created.
 194          */
 195         if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
 196                 return NULL;
 197
 198         pdev = eeh_dev_to_pci_dev(edev);
 199         if (!pdev)
 200                 return NULL;
 201
 202         pci_save_state(pdev);
 203         return NULL;
 204 }
 205
 206 /**
 207  * eeh_report_error - Report pci error to each device driver
 208  * @data: eeh device
 209  * @userdata: return value
 210  *
 211  * Report an EEH error to each device driver, collect up and
 212  * merge the device driver responses. Cumulative response
 213  * passed back in "userdata".
 214  */
 215 static void *eeh_report_error(struct eeh_dev *edev, void *userdata)
 216 {
 217         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 218         enum pci_ers_result rc, *res = userdata;
 219         struct pci_driver *driver;
 220
 221         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 222                 return NULL;
 223
 224         device_lock(&dev->dev);
 225         dev->error_state = pci_channel_io_frozen;
 226
 227         driver = eeh_pcid_get(dev);
 228         if (!driver) goto out_no_dev;
 229
 230         eeh_disable_irq(dev);
 231
 232         if (!driver->err_handler ||
 233             !driver->err_handler->error_detected)
 234                 goto out;
 235
 236         rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 237
 238         *res = pci_ers_merge_result(*res, rc);
 239
 240         edev->in_error = true;
 241         pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
 242
 243 out:
 244         eeh_pcid_put(dev);
 245 out_no_dev:
 246         device_unlock(&dev->dev);
 247         return NULL;
 248 }
 249
 250 /**
 251  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
 252  * @data: eeh device
 253  * @userdata: return value
 254  *
 255  * Tells each device driver that IO ports, MMIO and config space I/O
 256  * are now enabled. Collects up and merges the device driver responses.
 257  * Cumulative response passed back in "userdata".
 258  */
 259 static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata)
 260 {
 261         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 262         enum pci_ers_result rc, *res = userdata;
 263         struct pci_driver *driver;
 264
 265         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 266                 return NULL;
 267
 268         device_lock(&dev->dev);
 269         driver = eeh_pcid_get(dev);
 270         if (!driver) goto out_no_dev;
 271
 272         if (!driver->err_handler ||
 273             !driver->err_handler->mmio_enabled ||
 274             (edev->mode & EEH_DEV_NO_HANDLER))
 275                 goto out;
 276
 277         rc = driver->err_handler->mmio_enabled(dev);
 278
 279         *res = pci_ers_merge_result(*res, rc);
 280
 281 out:
 282         eeh_pcid_put(dev);
 283 out_no_dev:
 284         device_unlock(&dev->dev);
 285         return NULL;
 286 }
 287
 288 /**
 289  * eeh_report_reset - Tell device that slot has been reset
 290  * @data: eeh device
 291  * @userdata: return value
 292  *
 293  * This routine must be called while EEH tries to reset particular
 294  * PCI device so that the associated PCI device driver could take
 295  * some actions, usually to save data the driver needs so that the
 296  * driver can work again while the device is recovered.
 297  */
 298 static void *eeh_report_reset(struct eeh_dev *edev, void *userdata)
 299 {
 300         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 301         enum pci_ers_result rc, *res = userdata;
 302         struct pci_driver *driver;
 303
 304         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 305                 return NULL;
 306
 307         device_lock(&dev->dev);
 308         dev->error_state = pci_channel_io_normal;
 309
 310         driver = eeh_pcid_get(dev);
 311         if (!driver) goto out_no_dev;
 312
 313         eeh_enable_irq(dev);
 314
 315         if (!driver->err_handler ||
 316             !driver->err_handler->slot_reset ||
 317             (edev->mode & EEH_DEV_NO_HANDLER) ||
 318             (!edev->in_error))
 319                 goto out;
 320
 321         rc = driver->err_handler->slot_reset(dev);
 322         *res = pci_ers_merge_result(*res, rc);
 323
 324 out:
 325         eeh_pcid_put(dev);
 326 out_no_dev:
 327         device_unlock(&dev->dev);
 328         return NULL;
 329 }
 330
 331 static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
 332 {
 333         struct pci_dev *pdev;
 334
 335         if (!edev)
 336                 return NULL;
 337
 338         /*
 339          * The content in the config space isn't saved because
 340          * the blocked config space on some adapters. We have
 341          * to restore the initial saved config space when the
 342          * EEH device is created.
 343          */
 344         if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
 345                 if (list_is_last(&edev->list, &edev->pe->edevs))
 346                         eeh_pe_restore_bars(edev->pe);
 347
 348                 return NULL;
 349         }
 350
 351         pdev = eeh_dev_to_pci_dev(edev);
 352         if (!pdev)
 353                 return NULL;
 354
 355         pci_restore_state(pdev);
 356         return NULL;
 357 }
 358
 359 /**
 360  * eeh_report_resume - Tell device to resume normal operations
 361  * @data: eeh device
 362  * @userdata: return value
 363  *
 364  * This routine must be called to notify the device driver that it
 365  * could resume so that the device driver can do some initialization
 366  * to make the recovered device work again.
 367  */
 368 static void *eeh_report_resume(struct eeh_dev *edev, void *userdata)
 369 {
 370         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 371         bool was_in_error;
 372         struct pci_driver *driver;
 373
 374         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 375                 return NULL;
 376
 377         device_lock(&dev->dev);
 378         dev->error_state = pci_channel_io_normal;
 379
 380         driver = eeh_pcid_get(dev);
 381         if (!driver) goto out_no_dev;
 382
 383         was_in_error = edev->in_error;
 384         edev->in_error = false;
 385         eeh_enable_irq(dev);
 386
 387         if (!driver->err_handler ||
 388             !driver->err_handler->resume ||
 389             (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
 390                 edev->mode &= ~EEH_DEV_NO_HANDLER;
 391                 goto out;
 392         }
 393
 394         driver->err_handler->resume(dev);
 395
 396         pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
 397 out:
 398         eeh_pcid_put(dev);
 399 #ifdef CONFIG_PCI_IOV
 400         if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
 401                 eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 402 #endif
 403 out_no_dev:
 404         device_unlock(&dev->dev);
 405         return NULL;
 406 }
 407
 408 /**
 409  * eeh_report_failure - Tell device driver that device is dead.
 410  * @data: eeh device
 411  * @userdata: return value
 412  *
 413  * This informs the device driver that the device is permanently
 414  * dead, and that no further recovery attempts will be made on it.
 415  */
 416 static void *eeh_report_failure(struct eeh_dev *edev, void *userdata)
 417 {
 418         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 419         struct pci_driver *driver;
 420
 421         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 422                 return NULL;
 423
 424         device_lock(&dev->dev);
 425         dev->error_state = pci_channel_io_perm_failure;
 426
 427         driver = eeh_pcid_get(dev);
 428         if (!driver) goto out_no_dev;
 429
 430         eeh_disable_irq(dev);
 431
 432         if (!driver->err_handler ||
 433             !driver->err_handler->error_detected)
 434                 goto out;
 435
 436         driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 437
 438         pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
 439 out:
 440         eeh_pcid_put(dev);
 441 out_no_dev:
 442         device_unlock(&dev->dev);
 443         return NULL;
 444 }
 445
 446 static void *eeh_add_virt_device(void *data, void *userdata)
 447 {
 448         struct pci_driver *driver;
 449         struct eeh_dev *edev = (struct eeh_dev *)data;
 450         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 451         struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 452
 453         if (!(edev->physfn)) {
 454                 pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
 455                         __func__, pdn->phb->global_number, pdn->busno,
 456                         PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 457                 return NULL;
 458         }
 459
 460         driver = eeh_pcid_get(dev);
 461         if (driver) {
 462                 if (driver->err_handler) {
 463                         eeh_pcid_put(dev);
 464                         return NULL;
 465                 }
 466                 eeh_pcid_put(dev);
 467         }
 468
 469 #ifdef CONFIG_PCI_IOV
 470         pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
 471 #endif
 472         return NULL;
 473 }
 474
 475 static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 476 {
 477         struct pci_driver *driver;
 478         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 479         struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
 480         int *removed = rmv_data ? &rmv_data->removed : NULL;
 481
 482         /*
 483          * Actually, we should remove the PCI bridges as well.
 484          * However, that's lots of complexity to do that,
 485          * particularly some of devices under the bridge might
 486          * support EEH. So we just care about PCI devices for
 487          * simplicity here.
 488          */
 489         if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
 490                 return NULL;
 491
 492         /*
 493          * We rely on count-based pcibios_release_device() to
 494          * detach permanently offlined PEs. Unfortunately, that's
 495          * not reliable enough. We might have the permanently
 496          * offlined PEs attached, but we needn't take care of
 497          * them and their child devices.
 498          */
 499         if (eeh_dev_removed(edev))
 500                 return NULL;
 501
 502         if (removed) {
 503                 if (eeh_pe_passed(edev->pe))
 504                         return NULL;
 505                 driver = eeh_pcid_get(dev);
 506                 if (driver) {
 507                         if (driver->err_handler &&
 508                             driver->err_handler->error_detected &&
 509                             driver->err_handler->slot_reset) {
 510                                 eeh_pcid_put(dev);
 511                                 return NULL;
 512                         }
 513                         eeh_pcid_put(dev);
 514                 }
 515         }
 516
 517         /* Remove it from PCI subsystem */
 518         pr_debug("EEH: Removing %s without EEH sensitive driver\n",
 519                  pci_name(dev));
 520         edev->bus = dev->bus;
 521         edev->mode |= EEH_DEV_DISCONNECTED;
 522         if (removed)
 523                 (*removed)++;
 524
 525         if (edev->physfn) {
 526 #ifdef CONFIG_PCI_IOV
 527                 struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 528
 529                 pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
 530                 edev->pdev = NULL;
 531
 532                 /*
 533                  * We have to set the VF PE number to invalid one, which is
 534                  * required to plug the VF successfully.
 535                  */
 536                 pdn->pe_number = IODA_INVALID_PE;
 537 #endif
 538                 if (rmv_data)
 539                         list_add(&edev->rmv_list, &rmv_data->edev_list);
 540         } else {
 541                 pci_lock_rescan_remove();
 542                 pci_stop_and_remove_bus_device(dev);
 543                 pci_unlock_rescan_remove();
 544         }
 545
 546         return NULL;
 547 }
 548
 549 static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
 550 {
 551         struct eeh_dev *edev, *tmp;
 552
 553         eeh_pe_for_each_dev(pe, edev, tmp) {
 554                 if (!(edev->mode & EEH_DEV_DISCONNECTED))
 555                         continue;
 556
 557                 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
 558                 eeh_rmv_from_parent_pe(edev);
 559         }
 560
 561         return NULL;
 562 }
 563
 564 /*
 565  * Explicitly clear PE's frozen state for PowerNV where
 566  * we have frozen PE until BAR restore is completed. It's
 567  * harmless to clear it for pSeries. To be consistent with
 568  * PE reset (for 3 times), we try to clear the frozen state
 569  * for 3 times as well.
 570  */
 571 static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
 572 {
 573         bool clear_sw_state = *(bool *)flag;
 574         int i, rc = 1;
 575
 576         for (i = 0; rc && i < 3; i++)
 577                 rc = eeh_unfreeze_pe(pe, clear_sw_state);
 578
 579         /* Stop immediately on any errors */
 580         if (rc) {
 581                 pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
 582                         __func__, rc, pe->phb->global_number, pe->addr);
 583                 return (void *)pe;
 584         }
 585
 586         return NULL;
 587 }
 588
 589 static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
 590                                      bool clear_sw_state)
 591 {
 592         void *rc;
 593
 594         rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
 595         if (!rc)
 596                 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 597
 598         return rc ? -EIO : 0;
 599 }
 600
 601 int eeh_pe_reset_and_recover(struct eeh_pe *pe)
 602 {
 603         int ret;
 604
 605         /* Bail if the PE is being recovered */
 606         if (pe->state & EEH_PE_RECOVERING)
 607                 return 0;
 608
 609         /* Put the PE into recovery mode */
 610         eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 611
 612         /* Save states */
 613         eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
 614
 615         /* Issue reset */
 616         ret = eeh_pe_reset_full(pe);
 617         if (ret) {
 618                 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 619                 return ret;
 620         }
 621
 622         /* Unfreeze the PE */
 623         ret = eeh_clear_pe_frozen_state(pe, true);
 624         if (ret) {
 625                 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 626                 return ret;
 627         }
 628
 629         /* Restore device state */
 630         eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
 631
 632         /* Clear recovery mode */
 633         eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 634
 635         return 0;
 636 }
 637
 638 /**
 639  * eeh_reset_device - Perform actual reset of a pci slot
 640  * @driver_eeh_aware: Does the device's driver provide EEH support?
 641  * @pe: EEH PE
 642  * @bus: PCI bus corresponding to the isolcated slot
 643  * @rmv_data: Optional, list to record removed devices
 644  *
 645  * This routine must be called to do reset on the indicated PE.
 646  * During the reset, udev might be invoked because those affected
 647  * PCI devices will be removed and then added.
 648  */
 649 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 650                             struct eeh_rmv_data *rmv_data,
 651                             bool driver_eeh_aware)
 652 {
 653         time64_t tstamp;
 654         int cnt, rc;
 655         struct eeh_dev *edev;
 656
 657         /* pcibios will clear the counter; save the value */
 658         cnt = pe->freeze_count;
 659         tstamp = pe->tstamp;
 660
 661         /*
 662          * We don't remove the corresponding PE instances because
 663          * we need the information afterwords. The attached EEH
 664          * devices are expected to be attached soon when calling
 665          * into pci_hp_add_devices().
 666          */
 667         eeh_pe_state_mark(pe, EEH_PE_KEEP);
 668         if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
 669                 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 670         } else {
 671                 pci_lock_rescan_remove();
 672                 pci_hp_remove_devices(bus);
 673                 pci_unlock_rescan_remove();
 674         }
 675
 676         /*
 677          * Reset the pci controller. (Asserts RST#; resets config space).
 678          * Reconfigure bridges and devices. Don't try to bring the system
 679          * up if the reset failed for some reason.
 680          *
 681          * During the reset, it's very dangerous to have uncontrolled PCI
 682          * config accesses. So we prefer to block them. However, controlled
 683          * PCI config accesses initiated from EEH itself are allowed.
 684          */
 685         rc = eeh_pe_reset_full(pe);
 686         if (rc)
 687                 return rc;
 688
 689         pci_lock_rescan_remove();
 690
 691         /* Restore PE */
 692         eeh_ops->configure_bridge(pe);
 693         eeh_pe_restore_bars(pe);
 694
 695         /* Clear frozen state */
 696         rc = eeh_clear_pe_frozen_state(pe, false);
 697         if (rc) {
 698                 pci_unlock_rescan_remove();
 699                 return rc;
 700         }
 701
 702         /* Give the system 5 seconds to finish running the user-space
 703          * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
 704          * this is a hack, but if we don't do this, and try to bring
 705          * the device up before the scripts have taken it down,
 706          * potentially weird things happen.
 707          */
 708         if (!driver_eeh_aware || rmv_data->removed) {
 709                 pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
 710                         (driver_eeh_aware ? "partial" : "complete"));
 711                 ssleep(5);
 712
 713                 /*
 714                  * The EEH device is still connected with its parent
 715                  * PE. We should disconnect it so the binding can be
 716                  * rebuilt when adding PCI devices.
 717                  */
 718                 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 719                 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
 720                 if (pe->type & EEH_PE_VF) {
 721                         eeh_add_virt_device(edev, NULL);
 722                 } else {
 723                         if (!driver_eeh_aware)
 724                                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 725                         pci_hp_add_devices(bus);
 726                 }
 727         }
 728         eeh_pe_state_clear(pe, EEH_PE_KEEP);
 729
 730         pe->tstamp = tstamp;
 731         pe->freeze_count = cnt;
 732
 733         pci_unlock_rescan_remove();
 734         return 0;
 735 }
 736
 737 /* The longest amount of time to wait for a pci device
 738  * to come back on line, in seconds.
 739  */
 740 #define MAX_WAIT_FOR_RECOVERY 300
 741
 742 /**
 743  * eeh_handle_normal_event - Handle EEH events on a specific PE
 744  * @pe: EEH PE - which should not be used after we return, as it may
 745  * have been invalidated.
 746  *
 747  * Attempts to recover the given PE.  If recovery fails or the PE has failed
 748  * too many times, remove the PE.
 749  *
 750  * While PHB detects address or data parity errors on particular PCI
 751  * slot, the associated PE will be frozen. Besides, DMA's occurring
 752  * to wild addresses (which usually happen due to bugs in device
 753  * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
 754  * #PERR or other misc PCI-related errors also can trigger EEH errors.
 755  *
 756  * Recovery process consists of unplugging the device driver (which
 757  * generated hotplug events to userspace), then issuing a PCI #RST to
 758  * the device, then reconfiguring the PCI config space for all bridges
 759  * & devices under this slot, and then finally restarting the device
 760  * drivers (which cause a second set of hotplug events to go out to
 761  * userspace).
 762  */
 763 void eeh_handle_normal_event(struct eeh_pe *pe)
 764 {
 765         struct pci_bus *bus;
 766         struct eeh_dev *edev, *tmp;
 767         int rc = 0;
 768         enum pci_ers_result result = PCI_ERS_RESULT_NONE;
 769         struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
 770
 771         bus = eeh_pe_bus_get(pe);
 772         if (!bus) {
 773                 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
 774                         __func__, pe->phb->global_number, pe->addr);
 775                 return;
 776         }
 777
 778         eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 779
 780         eeh_pe_update_time_stamp(pe);
 781         pe->freeze_count++;
 782         if (pe->freeze_count > eeh_max_freezes) {
 783                 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
 784                        pe->phb->global_number, pe->addr,
 785                        pe->freeze_count);
 786                 goto hard_fail;
 787         }
 788         pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
 789                 pe->freeze_count, eeh_max_freezes);
 790
 791         /* Walk the various device drivers attached to this slot through
 792          * a reset sequence, giving each an opportunity to do what it needs
 793          * to accomplish the reset.  Each child gets a report of the
 794          * status ... if any child can't handle the reset, then the entire
 795          * slot is dlpar removed and added.
 796          *
 797          * When the PHB is fenced, we have to issue a reset to recover from
 798          * the error. Override the result if necessary to have partially
 799          * hotplug for this case.
 800          */
 801         pr_info("EEH: Notify device drivers to shutdown\n");
 802         eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 803         if ((pe->type & EEH_PE_PHB) &&
 804             result != PCI_ERS_RESULT_NONE &&
 805             result != PCI_ERS_RESULT_NEED_RESET)
 806                 result = PCI_ERS_RESULT_NEED_RESET;
 807
 808         /* Get the current PCI slot state. This can take a long time,
 809          * sometimes over 300 seconds for certain systems.
 810          */
 811         rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
 812         if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
 813                 pr_warn("EEH: Permanent failure\n");
 814                 goto hard_fail;
 815         }
 816
 817         /* Since rtas may enable MMIO when posting the error log,
 818          * don't post the error log until after all dev drivers
 819          * have been informed.
 820          */
 821         pr_info("EEH: Collect temporary log\n");
 822         eeh_slot_error_detail(pe, EEH_LOG_TEMP);
 823
 824         /* If all device drivers were EEH-unaware, then shut
 825          * down all of the device drivers, and hope they
 826          * go down willingly, without panicing the system.
 827          */
 828         if (result == PCI_ERS_RESULT_NONE) {
 829                 pr_info("EEH: Reset with hotplug activity\n");
 830                 rc = eeh_reset_device(pe, bus, NULL, false);
 831                 if (rc) {
 832                         pr_warn("%s: Unable to reset, err=%d\n",
 833                                 __func__, rc);
 834                         goto hard_fail;
 835                 }
 836         }
 837
 838         /* If all devices reported they can proceed, then re-enable MMIO */
 839         if (result == PCI_ERS_RESULT_CAN_RECOVER) {
 840                 pr_info("EEH: Enable I/O for affected devices\n");
 841                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 842
 843                 if (rc < 0)
 844                         goto hard_fail;
 845                 if (rc) {
 846                         result = PCI_ERS_RESULT_NEED_RESET;
 847                 } else {
 848                         pr_info("EEH: Notify device drivers to resume I/O\n");
 849                         eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
 850                 }
 851         }
 852
 853         /* If all devices reported they can proceed, then re-enable DMA */
 854         if (result == PCI_ERS_RESULT_CAN_RECOVER) {
 855                 pr_info("EEH: Enabled DMA for affected devices\n");
 856                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
 857
 858                 if (rc < 0)
 859                         goto hard_fail;
 860                 if (rc) {
 861                         result = PCI_ERS_RESULT_NEED_RESET;
 862                 } else {
 863                         /*
 864                          * We didn't do PE reset for the case. The PE
 865                          * is still in frozen state. Clear it before
 866                          * resuming the PE.
 867                          */
 868                         eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 869                         result = PCI_ERS_RESULT_RECOVERED;
 870                 }
 871         }
 872
 873         /* If any device has a hard failure, then shut off everything. */
 874         if (result == PCI_ERS_RESULT_DISCONNECT) {
 875                 pr_warn("EEH: Device driver gave up\n");
 876                 goto hard_fail;
 877         }
 878
 879         /* If any device called out for a reset, then reset the slot */
 880         if (result == PCI_ERS_RESULT_NEED_RESET) {
 881                 pr_info("EEH: Reset without hotplug activity\n");
 882                 rc = eeh_reset_device(pe, bus, &rmv_data, true);
 883                 if (rc) {
 884                         pr_warn("%s: Cannot reset, err=%d\n",
 885                                 __func__, rc);
 886                         goto hard_fail;
 887                 }
 888
 889                 pr_info("EEH: Notify device drivers "
 890                         "the completion of reset\n");
 891                 result = PCI_ERS_RESULT_NONE;
 892                 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
 893         }
 894
 895         /* All devices should claim they have recovered by now. */
 896         if ((result != PCI_ERS_RESULT_RECOVERED) &&
 897             (result != PCI_ERS_RESULT_NONE)) {
 898                 pr_warn("EEH: Not recovered\n");
 899                 goto hard_fail;
 900         }
 901
 902         /*
 903          * For those hot removed VFs, we should add back them after PF get
 904          * recovered properly.
 905          */
 906         list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
 907                 eeh_add_virt_device(edev, NULL);
 908                 list_del(&edev->rmv_list);
 909         }
 910
 911         /* Tell all device drivers that they can resume operations */
 912         pr_info("EEH: Notify device driver to resume\n");
 913         eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 914
 915         pr_info("EEH: Recovery successful.\n");
 916         goto final;
 917
 918 hard_fail:
 919         /*
 920          * About 90% of all real-life EEH failures in the field
 921          * are due to poorly seated PCI cards. Only 10% or so are
 922          * due to actual, failed cards.
 923          */
 924         pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
 925                "Please try reseating or replacing it\n",
 926                 pe->phb->global_number, pe->addr);
 927
 928         eeh_slot_error_detail(pe, EEH_LOG_PERM);
 929
 930         /* Notify all devices that they're about to go down. */
 931         eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 932
 933         /* Mark the PE to be removed permanently */
 934         eeh_pe_state_mark(pe, EEH_PE_REMOVED);
 935
 936         /*
 937          * Shut down the device drivers for good. We mark
 938          * all removed devices correctly to avoid access
 939          * the their PCI config any more.
 940          */
 941         if (pe->type & EEH_PE_VF) {
 942                 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
 943                 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 944         } else {
 945                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 946                 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 947
 948                 pci_lock_rescan_remove();
 949                 pci_hp_remove_devices(bus);
 950                 pci_unlock_rescan_remove();
 951                 /* The passed PE should no longer be used */
 952                 return;
 953         }
 954 final:
 955         eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 956 }
 957
 958 /**
 959  * eeh_handle_special_event - Handle EEH events without a specific failing PE
 960  *
 961  * Called when an EEH event is detected but can't be narrowed down to a
 962  * specific PE.  Iterates through possible failures and handles them as
 963  * necessary.
 964  */
 965 void eeh_handle_special_event(void)
 966 {
 967         struct eeh_pe *pe, *phb_pe;
 968         struct pci_bus *bus;
 969         struct pci_controller *hose;
 970         unsigned long flags;
 971         int rc;
 972
 973
 974         do {
 975                 rc = eeh_ops->next_error(&pe);
 976
 977                 switch (rc) {
 978                 case EEH_NEXT_ERR_DEAD_IOC:
 979                         /* Mark all PHBs in dead state */
 980                         eeh_serialize_lock(&flags);
 981
 982                         /* Purge all events */
 983                         eeh_remove_event(NULL, true);
 984
 985                         list_for_each_entry(hose, &hose_list, list_node) {
 986                                 phb_pe = eeh_phb_pe_get(hose);
 987                                 if (!phb_pe) continue;
 988
 989                                 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 990                         }
 991
 992                         eeh_serialize_unlock(flags);
 993
 994                         break;
 995                 case EEH_NEXT_ERR_FROZEN_PE:
 996                 case EEH_NEXT_ERR_FENCED_PHB:
 997                 case EEH_NEXT_ERR_DEAD_PHB:
 998                         /* Mark the PE in fenced state */
 999                         eeh_serialize_lock(&flags);
1000
1001                         /* Purge all events of the PHB */
1002                         eeh_remove_event(pe, true);
1003
1004                         if (rc == EEH_NEXT_ERR_DEAD_PHB)
1005                                 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
1006                         else
1007                                 eeh_pe_state_mark(pe,
1008                                         EEH_PE_ISOLATED | EEH_PE_RECOVERING);
1009
1010                         eeh_serialize_unlock(flags);
1011
1012                         break;
1013                 case EEH_NEXT_ERR_NONE:
1014                         return;
1015                 default:
1016                         pr_warn("%s: Invalid value %d from next_error()\n",
1017                                 __func__, rc);
1018                         return;
1019                 }
1020
1021                 /*
1022                  * For fenced PHB and frozen PE, it's handled as normal
1023                  * event. We have to remove the affected PHBs for dead
1024                  * PHB and IOC
1025                  */
1026                 if (rc == EEH_NEXT_ERR_FROZEN_PE ||
1027                     rc == EEH_NEXT_ERR_FENCED_PHB) {
1028                         eeh_handle_normal_event(pe);
1029                 } else {
1030                         pci_lock_rescan_remove();
1031                         list_for_each_entry(hose, &hose_list, list_node) {
1032                                 phb_pe = eeh_phb_pe_get(hose);
1033                                 if (!phb_pe ||
1034                                     !(phb_pe->state & EEH_PE_ISOLATED) ||
1035                                     (phb_pe->state & EEH_PE_RECOVERING))
1036                                         continue;
1037
1038                                 /* Notify all devices to be down */
1039                                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
1040                                 eeh_pe_dev_traverse(pe,
1041                                         eeh_report_failure, NULL);
1042                                 bus = eeh_pe_bus_get(phb_pe);
1043                                 if (!bus) {
1044                                         pr_err("%s: Cannot find PCI bus for "
1045                                                "PHB#%x-PE#%x\n",
1046                                                __func__,
1047                                                pe->phb->global_number,
1048                                                pe->addr);
1049                                         break;
1050                                 }
1051                                 pci_hp_remove_devices(bus);
1052                         }
1053                         pci_unlock_rescan_remove();
1054                 }
1055
1056                 /*
1057                  * If we have detected dead IOC, we needn't proceed
1058                  * any more since all PHBs would have been removed
1059                  */
1060                 if (rc == EEH_NEXT_ERR_DEAD_IOC)
1061                         break;
1062         } while (rc != EEH_NEXT_ERR_NONE);
1063 }