1 // SPDX-License-Identifier: GPL-2.0-only
3 * Machine check injection support.
4 * Copyright 2008 Intel Corporation.
10 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
11 * for testing different aspects of the RAS code. This driver should be
12 * built as module so that it can be loaded on production kernels for
15 * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
16 * Advanced Micro Devices Inc.
19 #include <linux/cpu.h>
20 #include <linux/debugfs.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/notifier.h>
24 #include <linux/pci.h>
25 #include <linux/uaccess.h>
27 #include <asm/amd_nb.h>
29 #include <asm/irq_vectors.h>
37 * Collect all the MCi_XXX settings
39 static struct mce i_mce;
40 static struct dentry *dfs_inj;
42 #define MAX_FLAG_OPT_SIZE 4
46 SW_INJ = 0, /* SW injection, simply decode the error */
47 HW_INJ, /* Trigger a #MC */
48 DFR_INT_INJ, /* Trigger Deferred error interrupt */
49 THR_INT_INJ, /* Trigger threshold interrupt */
53 static const char * const flags_options[] = {
61 /* Set default injection to SW_INJ */
62 static enum injection_type inj_type = SW_INJ;
64 #define MCE_INJECT_SET(reg) \
65 static int inj_##reg##_set(void *data, u64 val) \
67 struct mce *m = (struct mce *)data; \
73 MCE_INJECT_SET(status);
78 #define MCE_INJECT_GET(reg) \
79 static int inj_##reg##_get(void *data, u64 *val) \
81 struct mce *m = (struct mce *)data; \
87 MCE_INJECT_GET(status);
92 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
93 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
94 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
95 DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
97 static void setup_inj_struct(struct mce *m)
99 memset(m, 0, sizeof(struct mce));
101 m->cpuvendor = boot_cpu_data.x86_vendor;
102 m->time = ktime_get_real_seconds();
103 m->cpuid = cpuid_eax(1);
104 m->microcode = boot_cpu_data.microcode;
107 /* Update fake mce registers on current CPU. */
108 static void inject_mce(struct mce *m)
110 struct mce *i = &per_cpu(injectm, m->extcpu);
112 /* Make sure no one reads partially written injectm */
116 /* First set the fields after finished */
117 i->extcpu = m->extcpu;
119 /* Now write record in order, finished last (except above) */
120 memcpy(i, m, sizeof(struct mce));
121 /* Finally activate it */
126 static void raise_poll(struct mce *m)
131 memset(&b, 0xff, sizeof(mce_banks_t));
132 local_irq_save(flags);
133 machine_check_poll(0, &b);
134 local_irq_restore(flags);
138 static void raise_exception(struct mce *m, struct pt_regs *pregs)
144 memset(®s, 0, sizeof(struct pt_regs));
149 /* do_machine_check() expects interrupts disabled -- at least */
150 local_irq_save(flags);
151 do_machine_check(pregs);
152 local_irq_restore(flags);
156 static cpumask_var_t mce_inject_cpumask;
157 static DEFINE_MUTEX(mce_inject_mutex);
159 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
161 int cpu = smp_processor_id();
162 struct mce *m = this_cpu_ptr(&injectm);
163 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
165 cpumask_clear_cpu(cpu, mce_inject_cpumask);
166 if (m->inject_flags & MCJ_EXCEPTION)
167 raise_exception(m, regs);
173 static void mce_irq_ipi(void *info)
175 int cpu = smp_processor_id();
176 struct mce *m = this_cpu_ptr(&injectm);
178 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
179 m->inject_flags & MCJ_EXCEPTION) {
180 cpumask_clear_cpu(cpu, mce_inject_cpumask);
181 raise_exception(m, NULL);
185 /* Inject mce on current CPU */
186 static int raise_local(void)
188 struct mce *m = this_cpu_ptr(&injectm);
189 int context = MCJ_CTX(m->inject_flags);
193 if (m->inject_flags & MCJ_EXCEPTION) {
194 pr_info("Triggering MCE exception on CPU %d\n", cpu);
198 * Could do more to fake interrupts like
199 * calling irq_enter, but the necessary
200 * machinery isn't exported currently.
203 case MCJ_CTX_PROCESS:
204 raise_exception(m, NULL);
207 pr_info("Invalid MCE context\n");
210 pr_info("MCE exception done on CPU %d\n", cpu);
211 } else if (m->status) {
212 pr_info("Starting machine check poll CPU %d\n", cpu);
215 pr_info("Machine check poll done on CPU %d\n", cpu);
222 static void __maybe_unused raise_mce(struct mce *m)
224 int context = MCJ_CTX(m->inject_flags);
228 if (context == MCJ_CTX_RANDOM)
231 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
236 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
237 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
238 for_each_online_cpu(cpu) {
239 struct mce *mcpu = &per_cpu(injectm, cpu);
240 if (!mcpu->finished ||
241 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
242 cpumask_clear_cpu(cpu, mce_inject_cpumask);
244 if (!cpumask_empty(mce_inject_cpumask)) {
245 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
247 * don't wait because mce_irq_ipi is necessary
248 * to be sync with following raise_local
251 smp_call_function_many(mce_inject_cpumask,
252 mce_irq_ipi, NULL, 0);
254 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
255 apic->send_IPI_mask(mce_inject_cpumask,
259 while (!cpumask_empty(mce_inject_cpumask)) {
260 if (!time_before(jiffies, start + 2*HZ)) {
261 pr_err("Timeout waiting for mce inject %lx\n",
262 *cpumask_bits(mce_inject_cpumask));
277 static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
280 struct mce *m = (struct mce *)data;
285 mutex_lock(&mce_inject_mutex);
287 mutex_unlock(&mce_inject_mutex);
292 static struct notifier_block inject_nb = {
293 .notifier_call = mce_inject_raise,
297 * Caller needs to be make sure this cpu doesn't disappear
298 * from under us, i.e.: get_cpu/put_cpu.
300 static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
305 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
307 pr_err("%s: error reading HWCR\n", __func__);
311 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
313 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
315 pr_err("%s: error writing HWCR\n", __func__);
320 static int __set_inj(const char *buf)
324 for (i = 0; i < N_INJ_TYPES; i++) {
325 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
333 static ssize_t flags_read(struct file *filp, char __user *ubuf,
334 size_t cnt, loff_t *ppos)
336 char buf[MAX_FLAG_OPT_SIZE];
339 n = sprintf(buf, "%s\n", flags_options[inj_type]);
341 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
344 static ssize_t flags_write(struct file *filp, const char __user *ubuf,
345 size_t cnt, loff_t *ppos)
347 char buf[MAX_FLAG_OPT_SIZE], *__buf;
350 if (cnt > MAX_FLAG_OPT_SIZE)
353 if (copy_from_user(&buf, ubuf, cnt))
358 /* strip whitespace */
359 __buf = strstrip(buf);
361 err = __set_inj(__buf);
363 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
372 static const struct file_operations flags_fops = {
374 .write = flags_write,
375 .llseek = generic_file_llseek,
379 * On which CPU to inject?
381 MCE_INJECT_GET(extcpu);
383 static int inj_extcpu_set(void *data, u64 val)
385 struct mce *m = (struct mce *)data;
387 if (val >= nr_cpu_ids || !cpu_online(val)) {
388 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
395 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
397 static void trigger_mce(void *info)
399 asm volatile("int $18");
402 static void trigger_dfr_int(void *info)
404 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
407 static void trigger_thr_int(void *info)
409 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
412 static u32 get_nbc_for_node(int node_id)
414 struct cpuinfo_x86 *c = &boot_cpu_data;
417 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
419 return cores_per_node * node_id;
422 static void toggle_nb_mca_mst_cpu(u16 nid)
424 struct amd_northbridge *nb;
429 nb = node_to_amd_nb(nid);
437 err = pci_read_config_dword(F3, NBCFG, &val);
439 pr_err("%s: Error reading F%dx%03x.\n",
440 __func__, PCI_FUNC(F3->devfn), NBCFG);
447 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
451 err = pci_write_config_dword(F3, NBCFG, val);
453 pr_err("%s: Error writing F%dx%03x.\n",
454 __func__, PCI_FUNC(F3->devfn), NBCFG);
457 static void prepare_msrs(void *info)
459 struct mce m = *(struct mce *)info;
462 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
464 if (boot_cpu_has(X86_FEATURE_SMCA)) {
465 if (m.inject_flags == DFR_INT_INJ) {
466 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
467 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
469 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
470 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
473 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
474 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
476 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
477 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
478 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
482 static void do_inject(void)
485 unsigned int cpu = i_mce.extcpu;
488 i_mce.tsc = rdtsc_ordered();
491 i_mce.status |= MCI_STATUS_MISCV;
494 i_mce.status |= MCI_STATUS_SYNDV;
496 if (inj_type == SW_INJ) {
501 /* prep MCE global settings for the injection */
502 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
504 if (!(i_mce.status & MCI_STATUS_PCC))
505 mcg_status |= MCG_STATUS_RIPV;
508 * Ensure necessary status bits for deferred errors:
509 * - MCx_STATUS[Deferred]: make sure it is a deferred error
510 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
512 if (inj_type == DFR_INT_INJ) {
513 i_mce.status |= MCI_STATUS_DEFERRED;
514 i_mce.status &= ~MCI_STATUS_UC;
518 * For multi node CPUs, logging and reporting of bank 4 errors happens
519 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
520 * Fam10h and later BKDGs.
522 if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
524 boot_cpu_data.x86 < 0x17) {
525 toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
526 cpu = get_nbc_for_node(amd_get_nb_id(cpu));
530 if (!cpu_online(cpu))
533 toggle_hw_mce_inject(cpu, true);
535 i_mce.mcgstatus = mcg_status;
536 i_mce.inject_flags = inj_type;
537 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
539 toggle_hw_mce_inject(cpu, false);
543 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
546 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
549 smp_call_function_single(cpu, trigger_mce, NULL, 0);
558 * This denotes into which bank we're injecting and triggers
559 * the injection, at the same time.
561 static int inj_bank_set(void *data, u64 val)
563 struct mce *m = (struct mce *)data;
567 /* Get bank count on target CPU so we can handle non-uniform values. */
568 rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
569 n_banks = cap & MCG_BANKCNT_MASK;
571 if (val >= n_banks) {
572 pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
579 /* Reset injection struct */
580 setup_inj_struct(&i_mce);
585 MCE_INJECT_GET(bank);
587 DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
589 static const char readme_msg[] =
590 "Description of the files and their usages:\n"
592 "Note1: i refers to the bank number below.\n"
593 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
594 "as they mirror the hardware registers.\n"
596 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
597 "\t attributes of the error which caused the MCE.\n"
599 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
600 "\t used for error thresholding purposes and its validity is indicated by\n"
601 "\t MCi_STATUS[MiscV].\n"
603 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
604 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
606 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
607 "\t associated with the error.\n"
609 "cpu:\t The CPU to inject the error on.\n"
611 "bank:\t Specify the bank you want to inject the error into: the number of\n"
612 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
613 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
616 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
617 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
618 "\t for AMD processors.\n"
620 "\t Allowed error injection types:\n"
621 "\t - \"sw\": Software error injection. Decode error to a human-readable \n"
622 "\t format only. Safe to use.\n"
623 "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
624 "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
625 "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
626 "\t before injecting.\n"
627 "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
628 "\t error APIC interrupt handler to handle the error if the feature is \n"
629 "\t is present in hardware. \n"
630 "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
631 "\t APIC interrupt handler to handle the error. \n"
635 inj_readme_read(struct file *filp, char __user *ubuf,
636 size_t cnt, loff_t *ppos)
638 return simple_read_from_buffer(ubuf, cnt, ppos,
639 readme_msg, strlen(readme_msg));
642 static const struct file_operations readme_fops = {
643 .read = inj_readme_read,
646 static struct dfs_node {
648 const struct file_operations *fops;
651 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
652 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
653 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
654 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
655 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
656 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
657 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
658 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
661 static void __init debugfs_init(void)
665 dfs_inj = debugfs_create_dir("mce-inject", NULL);
667 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
668 debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
669 &i_mce, dfs_fls[i].fops);
672 static int __init inject_init(void)
674 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
679 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
680 mce_register_injector_chain(&inject_nb);
682 setup_inj_struct(&i_mce);
684 pr_info("Machine check injector initialized\n");
689 static void __exit inject_exit(void)
692 mce_unregister_injector_chain(&inject_nb);
693 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
695 debugfs_remove_recursive(dfs_inj);
698 memset(&dfs_fls, 0, sizeof(dfs_fls));
700 free_cpumask_var(mce_inject_cpumask);
703 module_init(inject_init);
704 module_exit(inject_exit);
705 MODULE_LICENSE("GPL");