1 // SPDX-License-Identifier: GPL-2.0-only
3 * Machine check injection support.
4 * Copyright 2008 Intel Corporation.
10 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
11 * for testing different aspects of the RAS code. This driver should be
12 * built as module so that it can be loaded on production kernels for
15 * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
16 * Advanced Micro Devices Inc.
19 #include <linux/cpu.h>
20 #include <linux/debugfs.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/notifier.h>
24 #include <linux/pci.h>
25 #include <linux/uaccess.h>
27 #include <asm/amd_nb.h>
29 #include <asm/irq_vectors.h>
37 * Collect all the MCi_XXX settings
39 static struct mce i_mce;
40 static struct dentry *dfs_inj;
42 #define MAX_FLAG_OPT_SIZE 4
46 SW_INJ = 0, /* SW injection, simply decode the error */
47 HW_INJ, /* Trigger a #MC */
48 DFR_INT_INJ, /* Trigger Deferred error interrupt */
49 THR_INT_INJ, /* Trigger threshold interrupt */
53 static const char * const flags_options[] = {
61 /* Set default injection to SW_INJ */
62 static enum injection_type inj_type = SW_INJ;
64 #define MCE_INJECT_SET(reg) \
65 static int inj_##reg##_set(void *data, u64 val) \
67 struct mce *m = (struct mce *)data; \
73 MCE_INJECT_SET(status);
79 #define MCE_INJECT_GET(reg) \
80 static int inj_##reg##_get(void *data, u64 *val) \
82 struct mce *m = (struct mce *)data; \
88 MCE_INJECT_GET(status);
94 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
95 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
96 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
97 DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
98 DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
100 static void setup_inj_struct(struct mce *m)
102 memset(m, 0, sizeof(struct mce));
104 m->cpuvendor = boot_cpu_data.x86_vendor;
105 m->time = ktime_get_real_seconds();
106 m->cpuid = cpuid_eax(1);
107 m->microcode = boot_cpu_data.microcode;
110 /* Update fake mce registers on current CPU. */
111 static void inject_mce(struct mce *m)
113 struct mce *i = &per_cpu(injectm, m->extcpu);
115 /* Make sure no one reads partially written injectm */
119 /* First set the fields after finished */
120 i->extcpu = m->extcpu;
122 /* Now write record in order, finished last (except above) */
123 memcpy(i, m, sizeof(struct mce));
124 /* Finally activate it */
129 static void raise_poll(struct mce *m)
134 memset(&b, 0xff, sizeof(mce_banks_t));
135 local_irq_save(flags);
136 machine_check_poll(0, &b);
137 local_irq_restore(flags);
141 static void raise_exception(struct mce *m, struct pt_regs *pregs)
147 memset(®s, 0, sizeof(struct pt_regs));
152 /* do_machine_check() expects interrupts disabled -- at least */
153 local_irq_save(flags);
154 do_machine_check(pregs);
155 local_irq_restore(flags);
159 static cpumask_var_t mce_inject_cpumask;
160 static DEFINE_MUTEX(mce_inject_mutex);
162 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
164 int cpu = smp_processor_id();
165 struct mce *m = this_cpu_ptr(&injectm);
166 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
168 cpumask_clear_cpu(cpu, mce_inject_cpumask);
169 if (m->inject_flags & MCJ_EXCEPTION)
170 raise_exception(m, regs);
176 static void mce_irq_ipi(void *info)
178 int cpu = smp_processor_id();
179 struct mce *m = this_cpu_ptr(&injectm);
181 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
182 m->inject_flags & MCJ_EXCEPTION) {
183 cpumask_clear_cpu(cpu, mce_inject_cpumask);
184 raise_exception(m, NULL);
188 /* Inject mce on current CPU */
189 static int raise_local(void)
191 struct mce *m = this_cpu_ptr(&injectm);
192 int context = MCJ_CTX(m->inject_flags);
196 if (m->inject_flags & MCJ_EXCEPTION) {
197 pr_info("Triggering MCE exception on CPU %d\n", cpu);
201 * Could do more to fake interrupts like
202 * calling irq_enter, but the necessary
203 * machinery isn't exported currently.
206 case MCJ_CTX_PROCESS:
207 raise_exception(m, NULL);
210 pr_info("Invalid MCE context\n");
213 pr_info("MCE exception done on CPU %d\n", cpu);
214 } else if (m->status) {
215 pr_info("Starting machine check poll CPU %d\n", cpu);
218 pr_info("Machine check poll done on CPU %d\n", cpu);
225 static void __maybe_unused raise_mce(struct mce *m)
227 int context = MCJ_CTX(m->inject_flags);
231 if (context == MCJ_CTX_RANDOM)
234 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
239 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
240 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
241 for_each_online_cpu(cpu) {
242 struct mce *mcpu = &per_cpu(injectm, cpu);
243 if (!mcpu->finished ||
244 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
245 cpumask_clear_cpu(cpu, mce_inject_cpumask);
247 if (!cpumask_empty(mce_inject_cpumask)) {
248 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
250 * don't wait because mce_irq_ipi is necessary
251 * to be sync with following raise_local
254 smp_call_function_many(mce_inject_cpumask,
255 mce_irq_ipi, NULL, 0);
257 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
258 apic->send_IPI_mask(mce_inject_cpumask,
262 while (!cpumask_empty(mce_inject_cpumask)) {
263 if (!time_before(jiffies, start + 2*HZ)) {
264 pr_err("Timeout waiting for mce inject %lx\n",
265 *cpumask_bits(mce_inject_cpumask));
280 static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
283 struct mce *m = (struct mce *)data;
288 mutex_lock(&mce_inject_mutex);
290 mutex_unlock(&mce_inject_mutex);
295 static struct notifier_block inject_nb = {
296 .notifier_call = mce_inject_raise,
300 * Caller needs to be make sure this cpu doesn't disappear
301 * from under us, i.e.: get_cpu/put_cpu.
303 static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
308 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
310 pr_err("%s: error reading HWCR\n", __func__);
314 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
316 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
318 pr_err("%s: error writing HWCR\n", __func__);
323 static int __set_inj(const char *buf)
327 for (i = 0; i < N_INJ_TYPES; i++) {
328 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
336 static ssize_t flags_read(struct file *filp, char __user *ubuf,
337 size_t cnt, loff_t *ppos)
339 char buf[MAX_FLAG_OPT_SIZE];
342 n = sprintf(buf, "%s\n", flags_options[inj_type]);
344 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
347 static ssize_t flags_write(struct file *filp, const char __user *ubuf,
348 size_t cnt, loff_t *ppos)
350 char buf[MAX_FLAG_OPT_SIZE], *__buf;
353 if (cnt > MAX_FLAG_OPT_SIZE)
356 if (copy_from_user(&buf, ubuf, cnt))
361 /* strip whitespace */
362 __buf = strstrip(buf);
364 err = __set_inj(__buf);
366 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
375 static const struct file_operations flags_fops = {
377 .write = flags_write,
378 .llseek = generic_file_llseek,
382 * On which CPU to inject?
384 MCE_INJECT_GET(extcpu);
386 static int inj_extcpu_set(void *data, u64 val)
388 struct mce *m = (struct mce *)data;
390 if (val >= nr_cpu_ids || !cpu_online(val)) {
391 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
398 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
400 static void trigger_mce(void *info)
402 asm volatile("int $18");
405 static void trigger_dfr_int(void *info)
407 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
410 static void trigger_thr_int(void *info)
412 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
415 static u32 get_nbc_for_node(int node_id)
417 struct cpuinfo_x86 *c = &boot_cpu_data;
420 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
422 return cores_per_node * node_id;
425 static void toggle_nb_mca_mst_cpu(u16 nid)
427 struct amd_northbridge *nb;
432 nb = node_to_amd_nb(nid);
440 err = pci_read_config_dword(F3, NBCFG, &val);
442 pr_err("%s: Error reading F%dx%03x.\n",
443 __func__, PCI_FUNC(F3->devfn), NBCFG);
450 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
454 err = pci_write_config_dword(F3, NBCFG, val);
456 pr_err("%s: Error writing F%dx%03x.\n",
457 __func__, PCI_FUNC(F3->devfn), NBCFG);
460 static void prepare_msrs(void *info)
462 struct mce m = *(struct mce *)info;
465 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
467 if (boot_cpu_has(X86_FEATURE_SMCA)) {
468 if (m.inject_flags == DFR_INT_INJ) {
469 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
470 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
472 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
473 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
476 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
477 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
479 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
480 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
481 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
485 static void do_inject(void)
488 unsigned int cpu = i_mce.extcpu;
491 i_mce.tsc = rdtsc_ordered();
494 i_mce.status |= MCI_STATUS_MISCV;
497 i_mce.status |= MCI_STATUS_SYNDV;
499 if (inj_type == SW_INJ) {
504 /* prep MCE global settings for the injection */
505 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
507 if (!(i_mce.status & MCI_STATUS_PCC))
508 mcg_status |= MCG_STATUS_RIPV;
511 * Ensure necessary status bits for deferred errors:
512 * - MCx_STATUS[Deferred]: make sure it is a deferred error
513 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
515 if (inj_type == DFR_INT_INJ) {
516 i_mce.status |= MCI_STATUS_DEFERRED;
517 i_mce.status &= ~MCI_STATUS_UC;
521 * For multi node CPUs, logging and reporting of bank 4 errors happens
522 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
523 * Fam10h and later BKDGs.
525 if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
527 boot_cpu_data.x86 < 0x17) {
528 toggle_nb_mca_mst_cpu(topology_die_id(cpu));
529 cpu = get_nbc_for_node(topology_die_id(cpu));
533 if (!cpu_online(cpu))
536 toggle_hw_mce_inject(cpu, true);
538 i_mce.mcgstatus = mcg_status;
539 i_mce.inject_flags = inj_type;
540 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
542 toggle_hw_mce_inject(cpu, false);
546 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
549 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
552 smp_call_function_single(cpu, trigger_mce, NULL, 0);
561 * This denotes into which bank we're injecting and triggers
562 * the injection, at the same time.
564 static int inj_bank_set(void *data, u64 val)
566 struct mce *m = (struct mce *)data;
570 /* Get bank count on target CPU so we can handle non-uniform values. */
571 rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
572 n_banks = cap & MCG_BANKCNT_MASK;
574 if (val >= n_banks) {
575 pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
582 /* Reset injection struct */
583 setup_inj_struct(&i_mce);
588 MCE_INJECT_GET(bank);
590 DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
592 static const char readme_msg[] =
593 "Description of the files and their usages:\n"
595 "Note1: i refers to the bank number below.\n"
596 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
597 "as they mirror the hardware registers.\n"
599 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
600 "\t attributes of the error which caused the MCE.\n"
602 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
603 "\t used for error thresholding purposes and its validity is indicated by\n"
604 "\t MCi_STATUS[MiscV].\n"
606 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
607 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
609 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
610 "\t associated with the error.\n"
612 "cpu:\t The CPU to inject the error on.\n"
614 "bank:\t Specify the bank you want to inject the error into: the number of\n"
615 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
616 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
619 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
620 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
621 "\t for AMD processors.\n"
623 "\t Allowed error injection types:\n"
624 "\t - \"sw\": Software error injection. Decode error to a human-readable \n"
625 "\t format only. Safe to use.\n"
626 "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
627 "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
628 "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
629 "\t before injecting.\n"
630 "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
631 "\t error APIC interrupt handler to handle the error if the feature is \n"
632 "\t is present in hardware. \n"
633 "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
634 "\t APIC interrupt handler to handle the error. \n"
636 "ipid:\t IPID (AMD-specific)\n"
640 inj_readme_read(struct file *filp, char __user *ubuf,
641 size_t cnt, loff_t *ppos)
643 return simple_read_from_buffer(ubuf, cnt, ppos,
644 readme_msg, strlen(readme_msg));
647 static const struct file_operations readme_fops = {
648 .read = inj_readme_read,
651 static struct dfs_node {
653 const struct file_operations *fops;
656 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
657 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
658 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
659 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
660 { .name = "ipid", .fops = &ipid_fops, .perm = S_IRUSR | S_IWUSR },
661 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
662 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
663 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
664 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
667 static void __init debugfs_init(void)
671 dfs_inj = debugfs_create_dir("mce-inject", NULL);
673 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
674 debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
675 &i_mce, dfs_fls[i].fops);
678 static int __init inject_init(void)
680 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
685 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
686 mce_register_injector_chain(&inject_nb);
688 setup_inj_struct(&i_mce);
690 pr_info("Machine check injector initialized\n");
695 static void __exit inject_exit(void)
698 mce_unregister_injector_chain(&inject_nb);
699 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
701 debugfs_remove_recursive(dfs_inj);
704 memset(&dfs_fls, 0, sizeof(dfs_fls));
706 free_cpumask_var(mce_inject_cpumask);
709 module_init(inject_init);
710 module_exit(inject_exit);
711 MODULE_LICENSE("GPL");