1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * The file intends to implement PE based on the information from
4 * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
5 * All the PEs should be organized as hierarchy tree. The first level
6 * of the tree will be associated to existing PHBs since the particular
7 * PE is only meaningful in one PHB domain.
9 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
12 #include <linux/delay.h>
13 #include <linux/export.h>
14 #include <linux/gfp.h>
15 #include <linux/kernel.h>
17 #include <linux/pci.h>
18 #include <linux/string.h>
20 #include <asm/pci-bridge.h>
21 #include <asm/ppc-pci.h>
23 static int eeh_pe_aux_size = 0;
24 static LIST_HEAD(eeh_phb_pe);
27 * eeh_set_pe_aux_size - Set PE auxillary data size
28 * @size: PE auxillary data size
30 * Set PE auxillary data size
32 void eeh_set_pe_aux_size(int size)
37 eeh_pe_aux_size = size;
41 * eeh_pe_alloc - Allocate PE
42 * @phb: PCI controller
45 * Allocate PE instance dynamically.
47 static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
52 alloc_size = sizeof(struct eeh_pe);
53 if (eeh_pe_aux_size) {
54 alloc_size = ALIGN(alloc_size, cache_line_size());
55 alloc_size += eeh_pe_aux_size;
59 pe = kzalloc(alloc_size, GFP_KERNEL);
62 /* Initialize PHB PE */
65 INIT_LIST_HEAD(&pe->child_list);
66 INIT_LIST_HEAD(&pe->edevs);
68 pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
74 * eeh_phb_pe_create - Create PHB PE
75 * @phb: PCI controller
77 * The function should be called while the PHB is detected during
78 * system boot or PCI hotplug in order to create PHB PE.
80 int eeh_phb_pe_create(struct pci_controller *phb)
85 pe = eeh_pe_alloc(phb, EEH_PE_PHB);
87 pr_err("%s: out of memory!\n", __func__);
91 /* Put it into the list */
92 list_add_tail(&pe->child, &eeh_phb_pe);
94 pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number);
100 * eeh_wait_state - Wait for PE state
102 * @max_wait: maximal period in millisecond
104 * Wait for the state of associated PE. It might take some time
105 * to retrieve the PE's state.
107 int eeh_wait_state(struct eeh_pe *pe, int max_wait)
113 * According to PAPR, the state of PE might be temporarily
114 * unavailable. Under the circumstance, we have to wait
115 * for indicated time determined by firmware. The maximal
116 * wait time is 5 minutes, which is acquired from the original
117 * EEH implementation. Also, the original implementation
118 * also defined the minimal wait time as 1 second.
120 #define EEH_STATE_MIN_WAIT_TIME (1000)
121 #define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
124 ret = eeh_ops->get_state(pe, &mwait);
126 if (ret != EEH_STATE_UNAVAILABLE)
130 pr_warn("%s: Timeout when getting PE's state (%d)\n",
132 return EEH_STATE_NOT_SUPPORT;
135 if (mwait < EEH_STATE_MIN_WAIT_TIME) {
136 pr_warn("%s: Firmware returned bad wait value %d\n",
138 mwait = EEH_STATE_MIN_WAIT_TIME;
139 } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
140 pr_warn("%s: Firmware returned too long wait value %d\n",
142 mwait = EEH_STATE_MAX_WAIT_TIME;
145 msleep(min(mwait, max_wait));
151 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
152 * @phb: PCI controller
154 * The overall PEs form hierarchy tree. The first layer of the
155 * hierarchy tree is composed of PHB PEs. The function is used
156 * to retrieve the corresponding PHB PE according to the given PHB.
158 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
162 list_for_each_entry(pe, &eeh_phb_pe, child) {
164 * Actually, we needn't check the type since
165 * the PE for PHB has been determined when that
168 if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
176 * eeh_pe_next - Retrieve the next PE in the tree
180 * The function is used to retrieve the next PE in the
183 struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root)
185 struct list_head *next = pe->child_list.next;
187 if (next == &pe->child_list) {
191 next = pe->child.next;
192 if (next != &pe->parent->child_list)
198 return list_entry(next, struct eeh_pe, child);
202 * eeh_pe_traverse - Traverse PEs in the specified PHB
205 * @flag: extra parameter to callback
207 * The function is used to traverse the specified PE and its
208 * child PEs. The traversing is to be terminated once the
209 * callback returns something other than NULL, or no more PEs
212 void *eeh_pe_traverse(struct eeh_pe *root,
213 eeh_pe_traverse_func fn, void *flag)
218 eeh_for_each_pe(root, pe) {
227 * eeh_pe_dev_traverse - Traverse the devices from the PE
229 * @fn: function callback
230 * @flag: extra parameter to callback
232 * The function is used to traverse the devices of the specified
233 * PE and its child PEs.
235 void eeh_pe_dev_traverse(struct eeh_pe *root,
236 eeh_edev_traverse_func fn, void *flag)
239 struct eeh_dev *edev, *tmp;
242 pr_warn("%s: Invalid PE %p\n",
247 /* Traverse root PE */
248 eeh_for_each_pe(root, pe)
249 eeh_pe_for_each_dev(pe, edev, tmp)
254 * __eeh_pe_get - Check the PE address
256 * For one particular PE, it can be identified by PE address
257 * or tranditional BDF address. BDF address is composed of
258 * Bus/Device/Function number. The extra data referred by flag
259 * indicates which type of address should be used.
261 static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
263 int *target_pe = flag;
265 /* PHB PEs are special and should be ignored */
266 if (pe->type & EEH_PE_PHB)
269 if (*target_pe == pe->addr)
276 * eeh_pe_get - Search PE based on the given address
277 * @phb: PCI controller
280 * Search the corresponding PE based on the specified address which
281 * is included in the eeh device. The function is used to check if
282 * the associated PE has been created against the PE address. It's
283 * notable that the PE address has 2 format: traditional PE address
284 * which is composed of PCI bus/device/function number, or unified
287 struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
289 struct eeh_pe *root = eeh_phb_pe_get(phb);
291 return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
295 * eeh_pe_tree_insert - Add EEH device to parent PE
297 * @new_pe_parent: PE to create additional PEs under
299 * Add EEH device to the PE in edev->pe_config_addr. If a PE already
300 * exists with that address then @edev is added to that PE. Otherwise
301 * a new PE is created and inserted into the PE tree as a child of
304 * If @new_pe_parent is NULL then the new PE will be inserted under
305 * directly under the PHB.
307 int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
309 struct pci_controller *hose = edev->controller;
310 struct eeh_pe *pe, *parent;
313 * Search the PE has been existing or not according
314 * to the PE address. If that has been existing, the
315 * PE should be composed of PCI bus and its subordinate
318 pe = eeh_pe_get(hose, edev->pe_config_addr);
320 if (pe->type & EEH_PE_INVALID) {
321 list_add_tail(&edev->entry, &pe->edevs);
324 * We're running to here because of PCI hotplug caused by
325 * EEH recovery. We need clear EEH_PE_INVALID until the top.
329 if (!(parent->type & EEH_PE_INVALID))
331 parent->type &= ~EEH_PE_INVALID;
332 parent = parent->parent;
335 eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n",
338 /* Mark the PE as type of PCI bus */
339 pe->type = EEH_PE_BUS;
342 /* Put the edev to PE */
343 list_add_tail(&edev->entry, &pe->edevs);
344 eeh_edev_dbg(edev, "Added to bus PE\n");
349 /* Create a new EEH PE */
351 pe = eeh_pe_alloc(hose, EEH_PE_VF);
353 pe = eeh_pe_alloc(hose, EEH_PE_DEVICE);
355 pr_err("%s: out of memory!\n", __func__);
359 pe->addr = edev->pe_config_addr;
362 * Put the new EEH PE into hierarchy tree. If the parent
363 * can't be found, the newly created PE will be attached
364 * to PHB directly. Otherwise, we have to associate the
365 * PE with its parent.
367 if (!new_pe_parent) {
368 new_pe_parent = eeh_phb_pe_get(hose);
369 if (!new_pe_parent) {
370 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
371 __func__, hose->global_number);
378 /* link new PE into the tree */
379 pe->parent = new_pe_parent;
380 list_add_tail(&pe->child, &new_pe_parent->child_list);
383 * Put the newly created PE into the child list and
384 * link the EEH device accordingly.
386 list_add_tail(&edev->entry, &pe->edevs);
388 eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
389 new_pe_parent->addr);
395 * eeh_pe_tree_remove - Remove one EEH device from the associated PE
398 * The PE hierarchy tree might be changed when doing PCI hotplug.
399 * Also, the PCI devices or buses could be removed from the system
400 * during EEH recovery. So we have to call the function remove the
401 * corresponding PE accordingly if necessary.
403 int eeh_pe_tree_remove(struct eeh_dev *edev)
405 struct eeh_pe *pe, *parent, *child;
409 pe = eeh_dev_to_pe(edev);
411 eeh_edev_dbg(edev, "No PE found for device.\n");
415 /* Remove the EEH device */
417 list_del(&edev->entry);
420 * Check if the parent PE includes any EEH devices.
421 * If not, we should delete that. Also, we should
422 * delete the parent PE if it doesn't have associated
423 * child PEs and EEH devices.
428 /* PHB PEs should never be removed */
429 if (pe->type & EEH_PE_PHB)
433 * XXX: KEEP is set while resetting a PE. I don't think it's
434 * ever set without RECOVERING also being set. I could
435 * be wrong though so catch that with a WARN.
437 keep = !!(pe->state & EEH_PE_KEEP);
438 recover = !!(pe->state & EEH_PE_RECOVERING);
439 WARN_ON(keep && !recover);
441 if (!keep && !recover) {
442 if (list_empty(&pe->edevs) &&
443 list_empty(&pe->child_list)) {
444 list_del(&pe->child);
451 * Mark the PE as invalid. At the end of the recovery
452 * process any invalid PEs will be garbage collected.
454 * We need to delay the free()ing of them since we can
455 * remove edev's while traversing the PE tree which
456 * might trigger the removal of a PE and we can't
457 * deal with that (yet).
459 if (list_empty(&pe->edevs)) {
461 list_for_each_entry(child, &pe->child_list, child) {
462 if (!(child->type & EEH_PE_INVALID)) {
469 pe->type |= EEH_PE_INVALID;
482 * eeh_pe_update_time_stamp - Update PE's frozen time stamp
485 * We have time stamp for each PE to trace its time of getting
486 * frozen in last hour. The function should be called to update
487 * the time stamp on first error of the specific PE. On the other
488 * handle, we needn't account for errors happened in last hour.
490 void eeh_pe_update_time_stamp(struct eeh_pe *pe)
496 if (pe->freeze_count <= 0) {
497 pe->freeze_count = 0;
498 pe->tstamp = ktime_get_seconds();
500 tstamp = ktime_get_seconds();
501 if (tstamp - pe->tstamp > 3600) {
503 pe->freeze_count = 0;
509 * eeh_pe_state_mark - Mark specified state for PE and its associated device
512 * EEH error affects the current PE and its child PEs. The function
513 * is used to mark appropriate state for the affected PEs and the
514 * associated devices.
516 void eeh_pe_state_mark(struct eeh_pe *root, int state)
520 eeh_for_each_pe(root, pe)
521 if (!(pe->state & EEH_PE_REMOVED))
524 EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
527 * eeh_pe_mark_isolated
530 * Record that a PE has been isolated by marking the PE and it's children as
531 * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
532 * as pci_channel_io_frozen.
534 void eeh_pe_mark_isolated(struct eeh_pe *root)
537 struct eeh_dev *edev;
538 struct pci_dev *pdev;
540 eeh_pe_state_mark(root, EEH_PE_ISOLATED);
541 eeh_for_each_pe(root, pe) {
542 list_for_each_entry(edev, &pe->edevs, entry) {
543 pdev = eeh_dev_to_pci_dev(edev);
545 pdev->error_state = pci_channel_io_frozen;
547 /* Block PCI config access if required */
548 if (pe->state & EEH_PE_CFG_RESTRICTED)
549 pe->state |= EEH_PE_CFG_BLOCKED;
552 EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
554 static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
556 int mode = *((int *)flag);
562 * eeh_pe_dev_state_mark - Mark state for all device under the PE
565 * Mark specific state for all child devices of the PE.
567 void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
569 eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
573 * eeh_pe_state_clear - Clear state for the PE
576 * @include_passed: include passed-through devices?
578 * The function is used to clear the indicated state from the
579 * given PE. Besides, we also clear the check count of the PE
582 void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
585 struct eeh_dev *edev, *tmp;
586 struct pci_dev *pdev;
588 eeh_for_each_pe(root, pe) {
589 /* Keep the state of permanently removed PE intact */
590 if (pe->state & EEH_PE_REMOVED)
593 if (!include_passed && eeh_pe_passed(pe))
599 * Special treatment on clearing isolated state. Clear
600 * check count since last isolation and put all affected
601 * devices to normal state.
603 if (!(state & EEH_PE_ISOLATED))
607 eeh_pe_for_each_dev(pe, edev, tmp) {
608 pdev = eeh_dev_to_pci_dev(edev);
612 pdev->error_state = pci_channel_io_normal;
615 /* Unblock PCI config access if required */
616 if (pe->state & EEH_PE_CFG_RESTRICTED)
617 pe->state &= ~EEH_PE_CFG_BLOCKED;
622 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
623 * buses assigned explicitly by firmware, and we probably have
624 * lost that after reset. So we have to delay the check until
625 * the PCI-CFG registers have been restored for the parent
628 * Don't use normal PCI-CFG accessors, which probably has been
629 * blocked on normal path during the stage. So we need utilize
630 * eeh operations, which is always permitted.
632 static void eeh_bridge_check_link(struct eeh_dev *edev)
639 * We only check root port and downstream ports of
642 if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
645 eeh_edev_dbg(edev, "Checking PCIe link...\n");
647 /* Check slot status */
648 cap = edev->pcie_cap;
649 eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val);
650 if (!(val & PCI_EXP_SLTSTA_PDS)) {
651 eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
655 /* Check power status if we have the capability */
656 eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val);
657 if (val & PCI_EXP_SLTCAP_PCP) {
658 eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val);
659 if (val & PCI_EXP_SLTCTL_PCC) {
660 eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
661 val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
662 val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
663 eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val);
669 eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val);
670 val &= ~PCI_EXP_LNKCTL_LD;
671 eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
674 eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val);
675 if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
676 eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
681 /* Wait the link is up until timeout (5s) */
683 while (timeout < 5000) {
687 eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val);
688 if (val & PCI_EXP_LNKSTA_DLLLA)
692 if (val & PCI_EXP_LNKSTA_DLLLA)
693 eeh_edev_dbg(edev, "Link up (%s)\n",
694 (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
696 eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
699 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
700 #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
702 static void eeh_restore_bridge_bars(struct eeh_dev *edev)
707 * Device BARs: 0x10 - 0x18
708 * Bus numbers and windows: 0x18 - 0x30
710 for (i = 4; i < 13; i++)
711 eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
713 eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]);
715 /* Cache line & Latency timer: 0xC 0xD */
716 eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
717 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
718 eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
719 SAVED_BYTE(PCI_LATENCY_TIMER));
720 /* Max latency, min grant, interrupt ping and line: 0x3C */
721 eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
723 /* PCI Command: 0x4 */
724 eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] |
725 PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
727 /* Check the PCIe link is ready */
728 eeh_bridge_check_link(edev);
731 static void eeh_restore_device_bars(struct eeh_dev *edev)
736 for (i = 4; i < 10; i++)
737 eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
738 /* 12 == Expansion ROM Address */
739 eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]);
741 eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
742 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
743 eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
744 SAVED_BYTE(PCI_LATENCY_TIMER));
746 /* max latency, min grant, interrupt pin and line */
747 eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
750 * Restore PERR & SERR bits, some devices require it,
751 * don't touch the other command bits
753 eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd);
754 if (edev->config_space[1] & PCI_COMMAND_PARITY)
755 cmd |= PCI_COMMAND_PARITY;
757 cmd &= ~PCI_COMMAND_PARITY;
758 if (edev->config_space[1] & PCI_COMMAND_SERR)
759 cmd |= PCI_COMMAND_SERR;
761 cmd &= ~PCI_COMMAND_SERR;
762 eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd);
766 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
770 * Loads the PCI configuration space base address registers,
771 * the expansion ROM base address, the latency timer, and etc.
772 * from the saved values in the device node.
774 static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
776 /* Do special restore for bridges */
777 if (edev->mode & EEH_DEV_BRIDGE)
778 eeh_restore_bridge_bars(edev);
780 eeh_restore_device_bars(edev);
782 if (eeh_ops->restore_config)
783 eeh_ops->restore_config(edev);
787 * eeh_pe_restore_bars - Restore the PCI config space info
790 * This routine performs a recursive walk to the children
791 * of this device as well.
793 void eeh_pe_restore_bars(struct eeh_pe *pe)
796 * We needn't take the EEH lock since eeh_pe_dev_traverse()
799 eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
803 * eeh_pe_loc_get - Retrieve location code binding to the given PE
806 * Retrieve the location code of the given PE. If the primary PE bus
807 * is root bus, we will grab location code from PHB device tree node
808 * or root port. Otherwise, the upstream bridge's device tree node
809 * of the primary PE bus will be checked for the location code.
811 const char *eeh_pe_loc_get(struct eeh_pe *pe)
813 struct pci_bus *bus = eeh_pe_bus_get(pe);
814 struct device_node *dn;
815 const char *loc = NULL;
818 dn = pci_bus_to_OF_node(bus);
824 if (pci_is_root_bus(bus))
825 loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
827 loc = of_get_property(dn, "ibm,slot-location-code",
840 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
843 * Retrieve the PCI bus according to the given PE. Basically,
844 * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
845 * primary PCI bus will be retrieved. The parent bus will be
846 * returned for BUS PE. However, we don't have associated PCI
849 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
851 struct eeh_dev *edev;
852 struct pci_dev *pdev;
854 if (pe->type & EEH_PE_PHB)
857 /* The primary bus might be cached during probe time */
858 if (pe->state & EEH_PE_PRI_BUS)
861 /* Retrieve the parent PCI bus of first (top) PCI device */
862 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
863 pdev = eeh_dev_to_pci_dev(edev);