1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3 #include <linux/slab.h>
9 static struct amd_decoder_ops fam_ops;
11 static u8 xec_mask = 0xf;
13 static void (*decode_dram_ecc)(int node_id, struct mce *m);
15 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
19 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
21 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
23 if (decode_dram_ecc) {
24 WARN_ON(decode_dram_ecc != f);
26 decode_dram_ecc = NULL;
29 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
32 * string representation for the different MCA reported error types, see F3x48
36 /* transaction type */
37 static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
40 static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
42 /* memory transaction type */
43 static const char * const rrrr_msgs[] = {
44 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
47 /* participating processor */
48 const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
49 EXPORT_SYMBOL_GPL(pp_msgs);
52 static const char * const to_msgs[] = { "no timeout", "timed out" };
55 static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
57 /* internal error type */
58 static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
60 static const char * const f15h_mc1_mce_desc[] = {
61 "UC during a demand linefill from L2",
62 "Parity error during data load from IC",
63 "Parity error for IC valid bit",
64 "Main tag parity error",
65 "Parity error in prediction queue",
66 "PFB data/address parity error",
67 "Parity error in the branch status reg",
68 "PFB promotion address error",
69 "Tag error during probe/victimization",
70 "Parity error for IC probe tag valid bit",
71 "PFB non-cacheable bit parity error",
72 "PFB valid bit parity error", /* xec = 0xd */
73 "Microcode Patch Buffer", /* xec = 010 */
81 static const char * const f15h_mc2_mce_desc[] = {
82 "Fill ECC error on data fills", /* xec = 0x4 */
83 "Fill parity error on insn fills",
84 "Prefetcher request FIFO parity error",
85 "PRQ address parity error",
86 "PRQ data parity error",
89 "WCB Data parity error",
90 "VB Data ECC or parity error",
91 "L2 Tag ECC error", /* xec = 0x10 */
92 "Hard L2 Tag ECC error",
93 "Multiple hits on L2 tag",
95 "PRB address parity error"
98 static const char * const mc4_mce_desc[] = {
99 "DRAM ECC error detected on the NB",
100 "CRC error detected on HT link",
101 "Link-defined sync error packets detected on HT link",
104 "Invalid GART PTE entry during GART table walk",
105 "Unsupported atomic RMW received from an IO link",
106 "Watchdog timeout due to lack of progress",
107 "DRAM ECC error detected on the NB",
108 "SVM DMA Exclusion Vector error",
109 "HT data error detected on link",
110 "Protocol error (link, L3, probe filter)",
111 "NB internal arrays parity error",
112 "DRAM addr/ctl signals parity error",
113 "IO link transmission error",
114 "L3 data cache ECC error", /* xec = 0x1c */
115 "L3 cache tag error",
116 "L3 LRU parity bits error",
117 "ECC Error in the Probe Filter directory"
120 static const char * const mc5_mce_desc[] = {
121 "CPU Watchdog timer expire",
122 "Wakeup array dest tag",
126 "Retire dispatch queue",
127 "Mapper checkpoint array",
128 "Physical register file EX0 port",
129 "Physical register file EX1 port",
130 "Physical register file AG0 port",
131 "Physical register file AG1 port",
132 "Flag register file",
134 "Retire status queue"
137 static const char * const mc6_mce_desc[] = {
138 "Hardware Assertion",
140 "Physical Register File",
143 "Status Register File",
146 /* Scalable MCA error strings */
147 static const char * const smca_ls_mce_desc[] = {
148 "Load queue parity error",
149 "Store queue parity error",
150 "Miss address buffer payload parity error",
151 "Level 1 TLB parity error",
152 "DC Tag error type 5",
153 "DC Tag error type 6",
154 "DC Tag error type 1",
155 "Internal error type 1",
156 "Internal error type 2",
157 "System Read Data Error Thread 0",
158 "System Read Data Error Thread 1",
159 "DC Tag error type 2",
160 "DC Data error type 1 and poison consumption",
161 "DC Data error type 2",
162 "DC Data error type 3",
163 "DC Tag error type 4",
164 "Level 2 TLB parity error",
166 "DC Tag error type 3",
167 "DC Tag error type 5",
168 "L2 Fill Data error",
171 static const char * const smca_ls2_mce_desc[] = {
172 "An ECC error was detected on a data cache read by a probe or victimization",
173 "An ECC error or L2 poison was detected on a data cache read by a load",
174 "An ECC error was detected on a data cache read-modify-write by a store",
175 "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
176 "An ECC error or poison bit mismatch was detected on a tag read by a load",
177 "An ECC error or poison bit mismatch was detected on a tag read by a store",
178 "An ECC error was detected on an EMEM read by a load",
179 "An ECC error was detected on an EMEM read-modify-write by a store",
180 "A parity error was detected in an L1 TLB entry by any access",
181 "A parity error was detected in an L2 TLB entry by any access",
182 "A parity error was detected in a PWC entry by any access",
183 "A parity error was detected in an STQ entry by any access",
184 "A parity error was detected in an LDQ entry by any access",
185 "A parity error was detected in a MAB entry by any access",
186 "A parity error was detected in an SCB entry state field by any access",
187 "A parity error was detected in an SCB entry address field by any access",
188 "A parity error was detected in an SCB entry data field by any access",
189 "A parity error was detected in a WCB entry by any access",
190 "A poisoned line was detected in an SCB entry by any access",
191 "A SystemReadDataError error was reported on read data returned from L2 for a load",
192 "A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
193 "A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
194 "A hardware assertion error was reported",
195 "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
198 static const char * const smca_if_mce_desc[] = {
199 "Op Cache Microtag Probe Port Parity Error",
200 "IC Microtag or Full Tag Multi-hit Error",
201 "IC Full Tag Parity Error",
202 "IC Data Array Parity Error",
203 "Decoupling Queue PhysAddr Parity Error",
204 "L0 ITLB Parity Error",
205 "L1 ITLB Parity Error",
206 "L2 ITLB Parity Error",
207 "BPQ Thread 0 Snoop Parity Error",
208 "BPQ Thread 1 Snoop Parity Error",
209 "L1 BTB Multi-Match Error",
210 "L2 BTB Multi-Match Error",
211 "L2 Cache Response Poison Error",
212 "System Read Data Error",
215 static const char * const smca_l2_mce_desc[] = {
216 "L2M Tag Multiple-Way-Hit error",
217 "L2M Tag or State Array ECC Error",
218 "L2M Data Array ECC Error",
219 "Hardware Assert Error",
222 static const char * const smca_de_mce_desc[] = {
223 "Micro-op cache tag parity error",
224 "Micro-op cache data parity error",
225 "Instruction buffer parity error",
226 "Micro-op queue parity error",
227 "Instruction dispatch queue parity error",
228 "Fetch address FIFO parity error",
229 "Patch RAM data parity error",
230 "Patch RAM sequencer parity error",
231 "Micro-op buffer parity error"
234 static const char * const smca_ex_mce_desc[] = {
235 "Watchdog Timeout error",
236 "Physical register file parity error",
237 "Flag register file parity error",
238 "Immediate displacement register file parity error",
239 "Address generator payload parity error",
240 "EX payload parity error",
241 "Checkpoint queue parity error",
242 "Retire dispatch queue parity error",
243 "Retire status queue parity error",
244 "Scheduling queue parity error",
245 "Branch buffer queue parity error",
246 "Hardware Assertion error",
249 static const char * const smca_fp_mce_desc[] = {
250 "Physical register file (PRF) parity error",
251 "Freelist (FL) parity error",
252 "Schedule queue parity error",
254 "Retire queue (RQ) parity error",
255 "Status register file (SRF) parity error",
256 "Hardware assertion",
259 static const char * const smca_l3_mce_desc[] = {
260 "Shadow Tag Macro ECC Error",
261 "Shadow Tag Macro Multi-way-hit Error",
263 "L3M Tag Multi-way-hit Error",
264 "L3M Data ECC Error",
265 "SDP Parity Error or SystemReadDataError from XI",
266 "L3 Victim Queue Parity Error",
267 "L3 Hardware Assertion",
270 static const char * const smca_cs_mce_desc[] = {
273 "Security Violation",
275 "Unexpected Response",
276 "Request or Probe Parity Error",
277 "Read Response Parity Error",
278 "Atomic Request Parity Error",
279 "Probe Filter ECC Error",
282 static const char * const smca_cs2_mce_desc[] = {
285 "Security Violation",
287 "Unexpected Response",
288 "Request or Probe Parity Error",
289 "Read Response Parity Error",
290 "Atomic Request Parity Error",
291 "SDP read response had no match in the CS queue",
292 "Probe Filter Protocol Error",
293 "Probe Filter ECC Error",
294 "SDP read response had an unexpected RETRY error",
295 "Counter overflow error",
296 "Counter underflow error",
299 static const char * const smca_pie_mce_desc[] = {
301 "Register security violation",
303 "Poison data consumption",
304 "A deferred error was detected in the DF"
307 static const char * const smca_umc_mce_desc[] = {
311 "Advanced peripheral bus error",
312 "Address/Command parity error",
313 "Write data CRC error",
314 "DCQ SRAM ECC error",
315 "AES SRAM ECC error",
318 static const char * const smca_pb_mce_desc[] = {
319 "An ECC error in the Parameter Block RAM array",
322 static const char * const smca_psp_mce_desc[] = {
323 "An ECC or parity error in a PSP RAM instance",
326 static const char * const smca_psp2_mce_desc[] = {
327 "High SRAM ECC or parity error",
328 "Low SRAM ECC or parity error",
329 "Instruction Cache Bank 0 ECC or parity error",
330 "Instruction Cache Bank 1 ECC or parity error",
331 "Instruction Tag Ram 0 parity error",
332 "Instruction Tag Ram 1 parity error",
333 "Data Cache Bank 0 ECC or parity error",
334 "Data Cache Bank 1 ECC or parity error",
335 "Data Cache Bank 2 ECC or parity error",
336 "Data Cache Bank 3 ECC or parity error",
337 "Data Tag Bank 0 parity error",
338 "Data Tag Bank 1 parity error",
339 "Data Tag Bank 2 parity error",
340 "Data Tag Bank 3 parity error",
341 "Dirty Data Ram parity error",
342 "TLB Bank 0 parity error",
343 "TLB Bank 1 parity error",
344 "System Hub Read Buffer ECC or parity error",
347 static const char * const smca_smu_mce_desc[] = {
348 "An ECC or parity error in an SMU RAM instance",
351 static const char * const smca_smu2_mce_desc[] = {
352 "High SRAM ECC or parity error",
353 "Low SRAM ECC or parity error",
354 "Data Cache Bank A ECC or parity error",
355 "Data Cache Bank B ECC or parity error",
356 "Data Tag Cache Bank A ECC or parity error",
357 "Data Tag Cache Bank B ECC or parity error",
358 "Instruction Cache Bank A ECC or parity error",
359 "Instruction Cache Bank B ECC or parity error",
360 "Instruction Tag Cache Bank A ECC or parity error",
361 "Instruction Tag Cache Bank B ECC or parity error",
362 "System Hub Read Buffer ECC or parity error",
365 static const char * const smca_mp5_mce_desc[] = {
366 "High SRAM ECC or parity error",
367 "Low SRAM ECC or parity error",
368 "Data Cache Bank A ECC or parity error",
369 "Data Cache Bank B ECC or parity error",
370 "Data Tag Cache Bank A ECC or parity error",
371 "Data Tag Cache Bank B ECC or parity error",
372 "Instruction Cache Bank A ECC or parity error",
373 "Instruction Cache Bank B ECC or parity error",
374 "Instruction Tag Cache Bank A ECC or parity error",
375 "Instruction Tag Cache Bank B ECC or parity error",
378 static const char * const smca_nbio_mce_desc[] = {
379 "ECC or Parity error",
381 "SDP ErrEvent error",
382 "SDP Egress Poison Error",
383 "IOHC Internal Poison Error",
386 static const char * const smca_pcie_mce_desc[] = {
387 "CCIX PER Message logging",
388 "CCIX Read Response with Status: Non-Data Error",
389 "CCIX Write Response with Status: Non-Data Error",
390 "CCIX Read Response with Status: Data Error",
391 "CCIX Non-okay write response with data error",
394 struct smca_mce_desc {
395 const char * const *descs;
396 unsigned int num_descs;
399 static struct smca_mce_desc smca_mce_descs[] = {
400 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
401 [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) },
402 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
403 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
404 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
405 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
406 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
407 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
408 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
409 [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
410 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
411 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
412 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
413 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
414 [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
415 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
416 [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
417 [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
418 [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
419 [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
422 static bool f12h_mc0_mce(u16 ec, u8 xec)
431 pr_cont("during L1 linefill from L2.\n");
432 else if (ll == LL_L1)
433 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
440 static bool f10h_mc0_mce(u16 ec, u8 xec)
442 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
443 pr_cont("during data scrub.\n");
446 return f12h_mc0_mce(ec, xec);
449 static bool k8_mc0_mce(u16 ec, u8 xec)
452 pr_cont("during system linefill.\n");
456 return f10h_mc0_mce(ec, xec);
459 static bool cat_mc0_mce(u16 ec, u8 xec)
466 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
472 pr_cont("Data/Tag parity error due to %s.\n",
473 (r4 == R4_DRD ? "load/hw prf" : "store"));
476 pr_cont("Copyback parity error on a tag miss.\n");
479 pr_cont("Tag parity error during snoop.\n");
484 } else if (BUS_ERROR(ec)) {
486 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
489 pr_cont("System read data error on a ");
493 pr_cont("TLB reload.\n");
511 static bool f15h_mc0_mce(u16 ec, u8 xec)
519 pr_cont("Data Array access error.\n");
523 pr_cont("UC error during a linefill from L2/NB.\n");
528 pr_cont("STQ access error.\n");
532 pr_cont("SCB access error.\n");
536 pr_cont("Tag error.\n");
540 pr_cont("LDQ access error.\n");
546 } else if (BUS_ERROR(ec)) {
549 pr_cont("System Read Data Error.\n");
551 pr_cont(" Internal error condition type %d.\n", xec);
552 } else if (INT_ERROR(ec)) {
554 pr_cont("Hardware Assert.\n");
564 static void decode_mc0_mce(struct mce *m)
566 u16 ec = EC(m->status);
567 u8 xec = XEC(m->status, xec_mask);
569 pr_emerg(HW_ERR "MC0 Error: ");
571 /* TLB error signatures are the same across families */
573 if (TT(ec) == TT_DATA) {
574 pr_cont("%s TLB %s.\n", LL_MSG(ec),
575 ((xec == 2) ? "locked miss"
576 : (xec ? "multimatch" : "parity")));
579 } else if (fam_ops.mc0_mce(ec, xec))
582 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
585 static bool k8_mc1_mce(u16 ec, u8 xec)
594 pr_cont("during a linefill from L2.\n");
595 else if (ll == 0x1) {
598 pr_cont("Parity error during data load.\n");
602 pr_cont("Copyback Parity/Victim error.\n");
606 pr_cont("Tag Snoop error.\n");
619 static bool cat_mc1_mce(u16 ec, u8 xec)
627 if (TT(ec) != TT_INSTR)
631 pr_cont("Data/tag array parity error for a tag hit.\n");
632 else if (r4 == R4_SNOOP)
633 pr_cont("Tag error during snoop/victimization.\n");
635 pr_cont("Tag parity error from victim castout.\n");
637 pr_cont("Microcode patch RAM parity error.\n");
644 static bool f15h_mc1_mce(u16 ec, u8 xec)
653 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
657 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
661 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
665 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
674 static void decode_mc1_mce(struct mce *m)
676 u16 ec = EC(m->status);
677 u8 xec = XEC(m->status, xec_mask);
679 pr_emerg(HW_ERR "MC1 Error: ");
682 pr_cont("%s TLB %s.\n", LL_MSG(ec),
683 (xec ? "multimatch" : "parity error"));
684 else if (BUS_ERROR(ec)) {
685 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
687 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
688 } else if (INT_ERROR(ec)) {
690 pr_cont("Hardware Assert.\n");
693 } else if (fam_ops.mc1_mce(ec, xec))
701 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
704 static bool k8_mc2_mce(u16 ec, u8 xec)
709 pr_cont(" in the write data buffers.\n");
711 pr_cont(" in the victim data buffers.\n");
712 else if (xec == 0x2 && MEM_ERROR(ec))
713 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
714 else if (xec == 0x0) {
716 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
718 else if (BUS_ERROR(ec))
719 pr_cont(": %s/ECC error in data read from NB: %s.\n",
720 R4_MSG(ec), PP_MSG(ec));
721 else if (MEM_ERROR(ec)) {
725 pr_cont(": %s error during data copyback.\n",
728 pr_cont(": %s parity/ECC error during data "
729 "access from L2.\n", R4_MSG(ec));
740 static bool f15h_mc2_mce(u16 ec, u8 xec)
746 pr_cont("Data parity TLB read error.\n");
748 pr_cont("Poison data provided for TLB fill.\n");
751 } else if (BUS_ERROR(ec)) {
755 pr_cont("Error during attempted NB data read.\n");
756 } else if (MEM_ERROR(ec)) {
759 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
763 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
769 } else if (INT_ERROR(ec)) {
771 pr_cont("Hardware Assert.\n");
779 static bool f16h_mc2_mce(u16 ec, u8 xec)
788 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
793 pr_cont("ECC error in L2 tag (%s).\n",
794 ((r4 == R4_GEN) ? "BankReq" :
795 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
800 pr_cont("ECC error in L2 data array (%s).\n",
801 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
802 ((r4 == R4_GEN) ? "Attr" :
803 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
808 pr_cont("Parity error in L2 attribute bits (%s).\n",
809 ((r4 == R4_RD) ? "Hit" :
810 ((r4 == R4_GEN) ? "Attr" : "Fill")));
820 static void decode_mc2_mce(struct mce *m)
822 u16 ec = EC(m->status);
823 u8 xec = XEC(m->status, xec_mask);
825 pr_emerg(HW_ERR "MC2 Error: ");
827 if (!fam_ops.mc2_mce(ec, xec))
828 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
831 static void decode_mc3_mce(struct mce *m)
833 u16 ec = EC(m->status);
834 u8 xec = XEC(m->status, xec_mask);
836 if (boot_cpu_data.x86 >= 0x14) {
837 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
838 " please report on LKML.\n");
842 pr_emerg(HW_ERR "MC3 Error");
847 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
850 pr_cont(" during %s.\n", R4_MSG(ec));
857 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
860 static void decode_mc4_mce(struct mce *m)
862 unsigned int fam = x86_family(m->cpuid);
863 int node_id = amd_get_nb_id(m->extcpu);
864 u16 ec = EC(m->status);
865 u8 xec = XEC(m->status, 0x1f);
868 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
873 /* special handling for DRAM ECCs */
874 if (xec == 0x0 || xec == 0x8) {
875 /* no ECCs on F11h */
879 pr_cont("%s.\n", mc4_mce_desc[xec]);
882 decode_dram_ecc(node_id, m);
889 pr_cont("GART Table Walk data error.\n");
890 else if (BUS_ERROR(ec))
891 pr_cont("DMA Exclusion Vector Table Walk error.\n");
897 if (fam == 0x15 || fam == 0x16)
898 pr_cont("Compute Unit Data Error.\n");
911 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
915 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
918 static void decode_mc5_mce(struct mce *m)
920 unsigned int fam = x86_family(m->cpuid);
921 u16 ec = EC(m->status);
922 u8 xec = XEC(m->status, xec_mask);
924 if (fam == 0xf || fam == 0x11)
927 pr_emerg(HW_ERR "MC5 Error: ");
931 pr_cont("Hardware Assert.\n");
937 if (xec == 0x0 || xec == 0xc)
938 pr_cont("%s.\n", mc5_mce_desc[xec]);
940 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
947 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
950 static void decode_mc6_mce(struct mce *m)
952 u8 xec = XEC(m->status, xec_mask);
954 pr_emerg(HW_ERR "MC6 Error: ");
959 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
963 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
966 /* Decode errors according to Scalable MCA specification */
967 static void decode_smca_error(struct mce *m)
969 struct smca_hwid *hwid;
970 enum smca_bank_types bank_type;
972 u8 xec = XEC(m->status, xec_mask);
974 if (m->bank >= ARRAY_SIZE(smca_banks))
977 hwid = smca_banks[m->bank].hwid;
981 bank_type = hwid->bank_type;
983 if (bank_type == SMCA_RESERVED) {
984 pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
988 ip_name = smca_get_long_name(bank_type);
990 pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
992 /* Only print the decode of valid error codes */
993 if (xec < smca_mce_descs[bank_type].num_descs &&
994 (hwid->xec_bitmap & BIT_ULL(xec))) {
995 pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
998 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
999 decode_dram_ecc(cpu_to_node(m->extcpu), m);
1002 static inline void amd_decode_err_code(u16 ec)
1004 if (INT_ERROR(ec)) {
1005 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
1009 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
1012 pr_cont(", mem/io: %s", II_MSG(ec));
1014 pr_cont(", tx: %s", TT_MSG(ec));
1016 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
1017 pr_cont(", mem-tx: %s", R4_MSG(ec));
1020 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
1026 static const char *decode_error_status(struct mce *m)
1028 if (m->status & MCI_STATUS_UC) {
1029 if (m->status & MCI_STATUS_PCC)
1030 return "System Fatal error.";
1031 if (m->mcgstatus & MCG_STATUS_RIPV)
1032 return "Uncorrected, software restartable error.";
1033 return "Uncorrected, software containable error.";
1036 if (m->status & MCI_STATUS_DEFERRED)
1037 return "Deferred error, no action required.";
1039 return "Corrected error, no action required.";
1043 amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
1045 struct mce *m = (struct mce *)data;
1046 unsigned int fam = x86_family(m->cpuid);
1049 if (m->kflags & MCE_HANDLED_CEC)
1052 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
1054 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
1056 fam, x86_model(m->cpuid), x86_stepping(m->cpuid),
1058 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
1059 ((m->status & MCI_STATUS_UC) ? "UE" :
1060 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
1061 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
1062 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
1063 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
1065 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1067 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
1069 if (!rdmsr_safe(addr, &low, &high) &&
1070 (low & MCI_CONFIG_MCAX))
1071 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
1073 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
1076 /* do the two bits[14:13] together */
1077 ecc = (m->status >> 45) & 0x3;
1079 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1082 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
1084 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
1085 if (fam != 0x15 || m->bank != 4)
1086 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
1090 pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
1092 pr_cont("]: 0x%016llx\n", m->status);
1094 if (m->status & MCI_STATUS_ADDRV)
1095 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
1097 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1098 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
1100 if (m->status & MCI_STATUS_SYNDV)
1101 pr_cont(", Syndrome: 0x%016llx", m->synd);
1105 decode_smca_error(m);
1110 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
1112 /* Doesn't matter which member to test. */
1113 if (!fam_ops.mc0_mce)
1150 amd_decode_err_code(m->status & 0xffff);
1152 m->kflags |= MCE_HANDLED_EDAC;
1156 static struct notifier_block amd_mce_dec_nb = {
1157 .notifier_call = amd_decode_mce,
1158 .priority = MCE_PRIO_EDAC,
1161 static int __init mce_amd_init(void)
1163 struct cpuinfo_x86 *c = &boot_cpu_data;
1165 if (c->x86_vendor != X86_VENDOR_AMD &&
1166 c->x86_vendor != X86_VENDOR_HYGON)
1169 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1176 fam_ops.mc0_mce = k8_mc0_mce;
1177 fam_ops.mc1_mce = k8_mc1_mce;
1178 fam_ops.mc2_mce = k8_mc2_mce;
1182 fam_ops.mc0_mce = f10h_mc0_mce;
1183 fam_ops.mc1_mce = k8_mc1_mce;
1184 fam_ops.mc2_mce = k8_mc2_mce;
1188 fam_ops.mc0_mce = k8_mc0_mce;
1189 fam_ops.mc1_mce = k8_mc1_mce;
1190 fam_ops.mc2_mce = k8_mc2_mce;
1194 fam_ops.mc0_mce = f12h_mc0_mce;
1195 fam_ops.mc1_mce = k8_mc1_mce;
1196 fam_ops.mc2_mce = k8_mc2_mce;
1200 fam_ops.mc0_mce = cat_mc0_mce;
1201 fam_ops.mc1_mce = cat_mc1_mce;
1202 fam_ops.mc2_mce = k8_mc2_mce;
1206 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1208 fam_ops.mc0_mce = f15h_mc0_mce;
1209 fam_ops.mc1_mce = f15h_mc1_mce;
1210 fam_ops.mc2_mce = f15h_mc2_mce;
1215 fam_ops.mc0_mce = cat_mc0_mce;
1216 fam_ops.mc1_mce = cat_mc1_mce;
1217 fam_ops.mc2_mce = f16h_mc2_mce;
1222 pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
1226 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
1231 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1233 mce_register_decode_chain(&amd_mce_dec_nb);
1237 early_initcall(mce_amd_init);
1240 static void __exit mce_amd_exit(void)
1242 mce_unregister_decode_chain(&amd_mce_dec_nb);
1245 MODULE_DESCRIPTION("AMD MCE decoder");
1246 MODULE_ALIAS("edac-mce-amd");
1247 MODULE_LICENSE("GPL");
1248 module_exit(mce_amd_exit);