1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Machine check exception handling.
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
10 #define pr_fmt(fmt) "mce: " fmt
12 #include <linux/hardirq.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/percpu.h>
16 #include <linux/export.h>
17 #include <linux/irq_work.h>
18 #include <linux/extable.h>
19 #include <linux/ftrace.h>
21 #include <asm/machdep.h>
25 static DEFINE_PER_CPU(int, mce_nest_count);
26 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
28 /* Queue for delayed MCE events. */
29 static DEFINE_PER_CPU(int, mce_queue_count);
30 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
32 /* Queue for delayed MCE UE events. */
33 static DEFINE_PER_CPU(int, mce_ue_count);
34 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
37 static void machine_check_process_queued_event(struct irq_work *work);
38 static void machine_check_ue_irq_work(struct irq_work *work);
39 static void machine_check_ue_event(struct machine_check_event *evt);
40 static void machine_process_ue_event(struct work_struct *work);
42 static struct irq_work mce_event_process_work = {
43 .func = machine_check_process_queued_event,
46 static struct irq_work mce_ue_event_irq_work = {
47 .func = machine_check_ue_irq_work,
50 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
52 static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
54 int mce_register_notifier(struct notifier_block *nb)
56 return blocking_notifier_chain_register(&mce_notifier_list, nb);
58 EXPORT_SYMBOL_GPL(mce_register_notifier);
60 int mce_unregister_notifier(struct notifier_block *nb)
62 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
64 EXPORT_SYMBOL_GPL(mce_unregister_notifier);
66 static void mce_set_error_info(struct machine_check_event *mce,
67 struct mce_error_info *mce_err)
69 mce->error_type = mce_err->error_type;
70 switch (mce_err->error_type) {
71 case MCE_ERROR_TYPE_UE:
72 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
74 case MCE_ERROR_TYPE_SLB:
75 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
77 case MCE_ERROR_TYPE_ERAT:
78 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
80 case MCE_ERROR_TYPE_TLB:
81 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
83 case MCE_ERROR_TYPE_USER:
84 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
86 case MCE_ERROR_TYPE_RA:
87 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
89 case MCE_ERROR_TYPE_LINK:
90 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
92 case MCE_ERROR_TYPE_UNKNOWN:
99 * Decode and save high level MCE information into per cpu buffer which
100 * is an array of machine_check_event structure.
102 void save_mce_event(struct pt_regs *regs, long handled,
103 struct mce_error_info *mce_err,
104 uint64_t nip, uint64_t addr, uint64_t phys_addr)
106 int index = __this_cpu_inc_return(mce_nest_count) - 1;
107 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
110 * Return if we don't have enough space to log mce event.
111 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
112 * the check below will stop buffer overrun.
114 if (index >= MAX_MC_EVT)
117 /* Populate generic machine check info */
118 mce->version = MCE_V1;
120 mce->srr1 = regs->msr;
121 mce->gpr3 = regs->gpr[3];
123 mce->cpu = get_paca()->paca_index;
125 /* Mark it recovered if we have handled it and MSR(RI=1). */
126 if (handled && (regs->msr & MSR_RI))
127 mce->disposition = MCE_DISPOSITION_RECOVERED;
129 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
131 mce->initiator = mce_err->initiator;
132 mce->severity = mce_err->severity;
133 mce->sync_error = mce_err->sync_error;
134 mce->error_class = mce_err->error_class;
137 * Populate the mce error_type and type-specific error_type.
139 mce_set_error_info(mce, mce_err);
144 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
145 mce->u.tlb_error.effective_address_provided = true;
146 mce->u.tlb_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
148 mce->u.slb_error.effective_address_provided = true;
149 mce->u.slb_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
151 mce->u.erat_error.effective_address_provided = true;
152 mce->u.erat_error.effective_address = addr;
153 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
154 mce->u.user_error.effective_address_provided = true;
155 mce->u.user_error.effective_address = addr;
156 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
157 mce->u.ra_error.effective_address_provided = true;
158 mce->u.ra_error.effective_address = addr;
159 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
160 mce->u.link_error.effective_address_provided = true;
161 mce->u.link_error.effective_address = addr;
162 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
163 mce->u.ue_error.effective_address_provided = true;
164 mce->u.ue_error.effective_address = addr;
165 if (phys_addr != ULONG_MAX) {
166 mce->u.ue_error.physical_address_provided = true;
167 mce->u.ue_error.physical_address = phys_addr;
168 mce->u.ue_error.ignore_event = mce_err->ignore_event;
169 machine_check_ue_event(mce);
177 * mce Pointer to machine_check_event structure to be filled.
178 * release Flag to indicate whether to free the event slot or not.
179 * 0 <= do not release the mce event. Caller will invoke
180 * release_mce_event() once event has been consumed.
181 * 1 <= release the slot.
186 * get_mce_event() will be called by platform specific machine check
187 * handle routine and in KVM.
188 * When we call get_mce_event(), we are still in interrupt context and
189 * preemption will not be scheduled until ret_from_expect() routine
192 int get_mce_event(struct machine_check_event *mce, bool release)
194 int index = __this_cpu_read(mce_nest_count) - 1;
195 struct machine_check_event *mc_evt;
202 /* Check if we have MCE info to process. */
203 if (index < MAX_MC_EVT) {
204 mc_evt = this_cpu_ptr(&mce_event[index]);
205 /* Copy the event structure and release the original */
212 /* Decrement the count to free the slot. */
214 __this_cpu_dec(mce_nest_count);
219 void release_mce_event(void)
221 get_mce_event(NULL, true);
224 static void machine_check_ue_irq_work(struct irq_work *work)
226 schedule_work(&mce_ue_event_work);
230 * Queue up the MCE event which then can be handled later.
232 static void machine_check_ue_event(struct machine_check_event *evt)
236 index = __this_cpu_inc_return(mce_ue_count) - 1;
237 /* If queue is full, just return for now. */
238 if (index >= MAX_MC_EVT) {
239 __this_cpu_dec(mce_ue_count);
242 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
244 /* Queue work to process this event later. */
245 irq_work_queue(&mce_ue_event_irq_work);
249 * Queue up the MCE event which then can be handled later.
251 void machine_check_queue_event(void)
254 struct machine_check_event evt;
256 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
259 index = __this_cpu_inc_return(mce_queue_count) - 1;
260 /* If queue is full, just return for now. */
261 if (index >= MAX_MC_EVT) {
262 __this_cpu_dec(mce_queue_count);
265 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
267 /* Queue irq work to process this event later. */
268 irq_work_queue(&mce_event_process_work);
271 void mce_common_process_ue(struct pt_regs *regs,
272 struct mce_error_info *mce_err)
274 const struct exception_table_entry *entry;
276 entry = search_kernel_exception_table(regs->nip);
278 mce_err->ignore_event = true;
279 regs->nip = extable_fixup(entry);
284 * process pending MCE event from the mce event queue. This function will be
285 * called during syscall exit.
287 static void machine_process_ue_event(struct work_struct *work)
290 struct machine_check_event *evt;
292 while (__this_cpu_read(mce_ue_count) > 0) {
293 index = __this_cpu_read(mce_ue_count) - 1;
294 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
295 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
296 #ifdef CONFIG_MEMORY_FAILURE
298 * This should probably queued elsewhere, but
301 * Don't report this machine check because the caller has a
302 * asked us to ignore the event, it has a fixup handler which
303 * will do the appropriate error handling and reporting.
305 if (evt->error_type == MCE_ERROR_TYPE_UE) {
306 if (evt->u.ue_error.ignore_event) {
307 __this_cpu_dec(mce_ue_count);
311 if (evt->u.ue_error.physical_address_provided) {
314 pfn = evt->u.ue_error.physical_address >>
316 memory_failure(pfn, 0);
318 pr_warn("Failed to identify bad address from "
319 "where the uncorrectable error (UE) "
323 __this_cpu_dec(mce_ue_count);
327 * process pending MCE event from the mce event queue. This function will be
328 * called during syscall exit.
330 static void machine_check_process_queued_event(struct irq_work *work)
333 struct machine_check_event *evt;
335 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
338 * For now just print it to console.
339 * TODO: log this error event to FSP or nvram.
341 while (__this_cpu_read(mce_queue_count) > 0) {
342 index = __this_cpu_read(mce_queue_count) - 1;
343 evt = this_cpu_ptr(&mce_event_queue[index]);
345 if (evt->error_type == MCE_ERROR_TYPE_UE &&
346 evt->u.ue_error.ignore_event) {
347 __this_cpu_dec(mce_queue_count);
350 machine_check_print_event_info(evt, false, false);
351 __this_cpu_dec(mce_queue_count);
355 void machine_check_print_event_info(struct machine_check_event *evt,
356 bool user_mode, bool in_guest)
358 const char *level, *sevstr, *subtype, *err_type, *initiator;
359 uint64_t ea = 0, pa = 0;
363 static const char *mc_ue_types[] = {
366 "Page table walk ifetch",
368 "Page table walk Load/Store",
370 static const char *mc_slb_types[] = {
375 static const char *mc_erat_types[] = {
380 static const char *mc_tlb_types[] = {
385 static const char *mc_user_types[] = {
389 static const char *mc_ra_types[] = {
391 "Instruction fetch (bad)",
392 "Instruction fetch (foreign)",
393 "Page table walk ifetch (bad)",
394 "Page table walk ifetch (foreign)",
397 "Page table walk Load/Store (bad)",
398 "Page table walk Load/Store (foreign)",
399 "Load/Store (foreign)",
401 static const char *mc_link_types[] = {
403 "Instruction fetch (timeout)",
404 "Page table walk ifetch (timeout)",
407 "Page table walk Load/Store (timeout)",
409 static const char *mc_error_class[] = {
412 "Probable Hardware error (some chance of software cause)",
414 "Probable Software error (some chance of hardware cause)",
417 /* Print things out */
418 if (evt->version != MCE_V1) {
419 pr_err("Machine Check Exception, Unknown event version %d !\n",
423 switch (evt->severity) {
424 case MCE_SEV_NO_ERROR:
428 case MCE_SEV_WARNING:
429 level = KERN_WARNING;
443 switch(evt->initiator) {
444 case MCE_INITIATOR_CPU:
447 case MCE_INITIATOR_PCI:
450 case MCE_INITIATOR_ISA:
453 case MCE_INITIATOR_MEMORY:
454 initiator = "Memory";
456 case MCE_INITIATOR_POWERMGM:
457 initiator = "Power Management";
459 case MCE_INITIATOR_UNKNOWN:
461 initiator = "Unknown";
465 switch (evt->error_type) {
466 case MCE_ERROR_TYPE_UE:
468 subtype = evt->u.ue_error.ue_error_type <
469 ARRAY_SIZE(mc_ue_types) ?
470 mc_ue_types[evt->u.ue_error.ue_error_type]
472 if (evt->u.ue_error.effective_address_provided)
473 ea = evt->u.ue_error.effective_address;
474 if (evt->u.ue_error.physical_address_provided)
475 pa = evt->u.ue_error.physical_address;
477 case MCE_ERROR_TYPE_SLB:
479 subtype = evt->u.slb_error.slb_error_type <
480 ARRAY_SIZE(mc_slb_types) ?
481 mc_slb_types[evt->u.slb_error.slb_error_type]
483 if (evt->u.slb_error.effective_address_provided)
484 ea = evt->u.slb_error.effective_address;
486 case MCE_ERROR_TYPE_ERAT:
488 subtype = evt->u.erat_error.erat_error_type <
489 ARRAY_SIZE(mc_erat_types) ?
490 mc_erat_types[evt->u.erat_error.erat_error_type]
492 if (evt->u.erat_error.effective_address_provided)
493 ea = evt->u.erat_error.effective_address;
495 case MCE_ERROR_TYPE_TLB:
497 subtype = evt->u.tlb_error.tlb_error_type <
498 ARRAY_SIZE(mc_tlb_types) ?
499 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
501 if (evt->u.tlb_error.effective_address_provided)
502 ea = evt->u.tlb_error.effective_address;
504 case MCE_ERROR_TYPE_USER:
506 subtype = evt->u.user_error.user_error_type <
507 ARRAY_SIZE(mc_user_types) ?
508 mc_user_types[evt->u.user_error.user_error_type]
510 if (evt->u.user_error.effective_address_provided)
511 ea = evt->u.user_error.effective_address;
513 case MCE_ERROR_TYPE_RA:
514 err_type = "Real address";
515 subtype = evt->u.ra_error.ra_error_type <
516 ARRAY_SIZE(mc_ra_types) ?
517 mc_ra_types[evt->u.ra_error.ra_error_type]
519 if (evt->u.ra_error.effective_address_provided)
520 ea = evt->u.ra_error.effective_address;
522 case MCE_ERROR_TYPE_LINK:
524 subtype = evt->u.link_error.link_error_type <
525 ARRAY_SIZE(mc_link_types) ?
526 mc_link_types[evt->u.link_error.link_error_type]
528 if (evt->u.link_error.effective_address_provided)
529 ea = evt->u.link_error.effective_address;
531 case MCE_ERROR_TYPE_DCACHE:
532 err_type = "D-Cache";
535 case MCE_ERROR_TYPE_ICACHE:
536 err_type = "I-Cache";
540 case MCE_ERROR_TYPE_UNKNOWN:
541 err_type = "Unknown";
546 dar_str[0] = pa_str[0] = '\0';
547 if (ea && evt->srr0 != ea) {
548 /* Load/Store address */
549 n = sprintf(dar_str, "DAR: %016llx ", ea);
551 sprintf(dar_str + n, "paddr: %016llx ", pa);
553 sprintf(pa_str, " paddr: %016llx", pa);
556 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
557 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
558 err_type, subtype, dar_str,
559 evt->disposition == MCE_DISPOSITION_RECOVERED ?
560 "Recovered" : "Not recovered");
562 if (in_guest || user_mode) {
563 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
564 level, evt->cpu, current->pid, current->comm,
565 in_guest ? "Guest " : "", evt->srr0, pa_str);
567 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
568 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
571 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
573 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
574 mc_error_class[evt->error_class] : "Unknown";
575 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
577 #ifdef CONFIG_PPC_BOOK3S_64
578 /* Display faulty slb contents for SLB errors. */
579 if (evt->error_type == MCE_ERROR_TYPE_SLB)
580 slb_dump_contents(local_paca->mce_faulty_slbs);
583 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
586 * This function is called in real mode. Strictly no printk's please.
588 * regs->nip and regs->msr contains srr0 and ssr1.
590 long notrace machine_check_early(struct pt_regs *regs)
593 bool nested = in_nmi();
594 u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
596 this_cpu_set_ftrace_enabled(0);
601 hv_nmi_check_nonrecoverable(regs);
604 * See if platform is capable of handling machine check.
606 if (ppc_md.machine_check_early)
607 handled = ppc_md.machine_check_early(regs);
612 this_cpu_set_ftrace_enabled(ftrace_enabled);
617 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
620 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
621 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
622 } hmer_debug_trig_function;
624 static int init_debug_trig_function(void)
627 struct device_node *cpun;
628 struct property *prop = NULL;
631 /* First look in the device tree */
633 cpun = of_get_cpu_node(smp_processor_id(), NULL);
635 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
637 if (strcmp(str, "bit17-vector-ci-load") == 0)
638 hmer_debug_trig_function = DTRIG_VECTOR_CI;
639 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
640 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
646 /* If we found the property, don't look at PVR */
650 pvr = mfspr(SPRN_PVR);
651 /* Check for POWER9 Nimbus (scale-out) */
652 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
653 /* DD2.2 and later */
654 if ((pvr & 0xfff) >= 0x202)
655 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
656 /* DD2.0 and DD2.1 - used for vector CI load emulation */
657 else if ((pvr & 0xfff) >= 0x200)
658 hmer_debug_trig_function = DTRIG_VECTOR_CI;
662 switch (hmer_debug_trig_function) {
663 case DTRIG_VECTOR_CI:
664 pr_debug("HMI debug trigger used for vector CI load\n");
666 case DTRIG_SUSPEND_ESCAPE:
667 pr_debug("HMI debug trigger used for TM suspend escape\n");
674 __initcall(init_debug_trig_function);
677 * Handle HMIs that occur as a result of a debug trigger.
679 * -1 means this is not a HMI cause that we know about
680 * 0 means no further handling is required
681 * 1 means further handling is required
683 long hmi_handle_debugtrig(struct pt_regs *regs)
685 unsigned long hmer = mfspr(SPRN_HMER);
688 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
689 if (!((hmer & HMER_DEBUG_TRIG)
690 && hmer_debug_trig_function != DTRIG_UNKNOWN))
693 hmer &= ~HMER_DEBUG_TRIG;
694 /* HMER is a write-AND register */
695 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
697 switch (hmer_debug_trig_function) {
698 case DTRIG_VECTOR_CI:
700 * Now to avoid problems with soft-disable we
701 * only do the emulation if we are coming from
704 if (regs && user_mode(regs))
705 ret = local_paca->hmi_p9_special_emu = 1;
714 * See if any other HMI causes remain to be handled
716 if (hmer & mfspr(SPRN_HMEER))
725 long hmi_exception_realmode(struct pt_regs *regs)
729 __this_cpu_inc(irq_stat.hmi_exceptions);
731 ret = hmi_handle_debugtrig(regs);
735 wait_for_subcore_guest_exit();
737 if (ppc_md.hmi_exception_early)
738 ppc_md.hmi_exception_early(regs);
740 wait_for_tb_resync();