1 // SPDX-License-Identifier: GPL-2.0
3 * UEFI Common Platform Error Record (CPER) support
5 * Copyright (C) 2010, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
8 * CPER is the format used to describe platform hardware error by
9 * various tables, such as ERST, BERT and HEST etc.
11 * For more information about CPER, please refer to Appendix N of UEFI
12 * Specification version 2.4.
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <linux/time.h>
18 #include <linux/cper.h>
19 #include <linux/dmi.h>
20 #include <linux/acpi.h>
21 #include <linux/pci.h>
22 #include <linux/aer.h>
23 #include <linux/printk.h>
24 #include <linux/bcd.h>
25 #include <acpi/ghes.h>
26 #include <ras/ras_event.h>
28 static char rcd_decode_str[CPER_REC_LEN];
31 * CPER record ID need to be unique even after reboot, because record
32 * ID is used as index for ERST storage, while CPER records from
33 * multiple boot may co-exist in ERST.
35 u64 cper_next_record_id(void)
37 static atomic64_t seq;
39 if (!atomic64_read(&seq)) {
40 time64_t time = ktime_get_real_seconds();
43 * This code is unlikely to still be needed in year 2106,
44 * but just in case, let's use a few more bits for timestamps
45 * after y2038 to be sure they keep increasing monotonically
46 * for the next few hundred years...
48 if (time < 0x80000000)
49 atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
51 atomic64_set(&seq, 0x8000000000000000ull |
52 ktime_get_real_seconds() << 24);
55 return atomic64_inc_return(&seq);
57 EXPORT_SYMBOL_GPL(cper_next_record_id);
59 static const char * const severity_strs[] = {
66 const char *cper_severity_str(unsigned int severity)
68 return severity < ARRAY_SIZE(severity_strs) ?
69 severity_strs[severity] : "unknown";
71 EXPORT_SYMBOL_GPL(cper_severity_str);
74 * cper_print_bits - print strings for set bits
75 * @pfx: prefix for each line, including log level and prefix string
77 * @strs: string array, indexed by bit position
78 * @strs_size: size of the string array: @strs
80 * For each set bit in @bits, print the corresponding string in @strs.
81 * If the output length is longer than 80, multiple line will be
82 * printed, with @pfx is printed at the beginning of each line.
84 void cper_print_bits(const char *pfx, unsigned int bits,
85 const char * const strs[], unsigned int strs_size)
91 for (i = 0; i < strs_size; i++) {
92 if (!(bits & (1U << i)))
97 if (len && len + strlen(str) + 2 > 80) {
102 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
104 len += scnprintf(buf+len, sizeof(buf)-len, ", %s", str);
110 static const char * const proc_type_strs[] = {
116 static const char * const proc_isa_strs[] = {
124 const char * const cper_proc_error_type_strs[] = {
128 "micro-architectural error",
131 static const char * const proc_op_strs[] = {
132 "unknown or generic",
135 "instruction execution",
138 static const char * const proc_flag_strs[] = {
145 static void cper_print_proc_generic(const char *pfx,
146 const struct cper_sec_proc_generic *proc)
148 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
149 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
150 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
151 proc_type_strs[proc->proc_type] : "unknown");
152 if (proc->validation_bits & CPER_PROC_VALID_ISA)
153 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
154 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
155 proc_isa_strs[proc->proc_isa] : "unknown");
156 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
157 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
158 cper_print_bits(pfx, proc->proc_error_type,
159 cper_proc_error_type_strs,
160 ARRAY_SIZE(cper_proc_error_type_strs));
162 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
163 printk("%s""operation: %d, %s\n", pfx, proc->operation,
164 proc->operation < ARRAY_SIZE(proc_op_strs) ?
165 proc_op_strs[proc->operation] : "unknown");
166 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
167 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
168 cper_print_bits(pfx, proc->flags, proc_flag_strs,
169 ARRAY_SIZE(proc_flag_strs));
171 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
172 printk("%s""level: %d\n", pfx, proc->level);
173 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
174 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
175 if (proc->validation_bits & CPER_PROC_VALID_ID)
176 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
177 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
178 printk("%s""target_address: 0x%016llx\n",
179 pfx, proc->target_addr);
180 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
181 printk("%s""requestor_id: 0x%016llx\n",
182 pfx, proc->requestor_id);
183 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
184 printk("%s""responder_id: 0x%016llx\n",
185 pfx, proc->responder_id);
186 if (proc->validation_bits & CPER_PROC_VALID_IP)
187 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
190 static const char * const mem_err_type_strs[] = {
195 "single-symbol chipkill ECC",
196 "multi-symbol chipkill ECC",
204 "scrub corrected error",
205 "scrub uncorrected error",
206 "physical memory map-out event",
209 const char *cper_mem_err_type_str(unsigned int etype)
211 return etype < ARRAY_SIZE(mem_err_type_strs) ?
212 mem_err_type_strs[etype] : "unknown";
214 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
216 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
224 len = CPER_REC_LEN - 1;
225 if (mem->validation_bits & CPER_MEM_VALID_NODE)
226 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
227 if (mem->validation_bits & CPER_MEM_VALID_CARD)
228 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
229 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
230 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
231 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
232 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
233 if (mem->validation_bits & CPER_MEM_VALID_BANK)
234 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
235 if (mem->validation_bits & CPER_MEM_VALID_BANK_GROUP)
236 n += scnprintf(msg + n, len - n, "bank_group: %d ",
237 mem->bank >> CPER_MEM_BANK_GROUP_SHIFT);
238 if (mem->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
239 n += scnprintf(msg + n, len - n, "bank_address: %d ",
240 mem->bank & CPER_MEM_BANK_ADDRESS_MASK);
241 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
242 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
243 if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
246 row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
247 n += scnprintf(msg + n, len - n, "row: %d ", row);
249 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
250 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
251 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
252 n += scnprintf(msg + n, len - n, "bit_position: %d ",
254 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
255 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
257 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
258 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
260 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
261 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
263 if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
264 scnprintf(msg + n, len - n, "chip_id: %d ",
265 mem->extended >> CPER_MEM_CHIP_ID_SHIFT);
271 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
274 const char *bank = NULL, *device = NULL;
276 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
280 len = CPER_REC_LEN - 1;
281 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
283 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
285 n = snprintf(msg, len,
286 "DIMM location: not present. DMI handle: 0x%.4x ",
287 mem->mem_dev_handle);
293 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
294 struct cper_mem_err_compact *cmem)
296 cmem->validation_bits = mem->validation_bits;
297 cmem->node = mem->node;
298 cmem->card = mem->card;
299 cmem->module = mem->module;
300 cmem->bank = mem->bank;
301 cmem->device = mem->device;
302 cmem->row = mem->row;
303 cmem->column = mem->column;
304 cmem->bit_pos = mem->bit_pos;
305 cmem->requestor_id = mem->requestor_id;
306 cmem->responder_id = mem->responder_id;
307 cmem->target_id = mem->target_id;
308 cmem->extended = mem->extended;
309 cmem->rank = mem->rank;
310 cmem->mem_array_handle = mem->mem_array_handle;
311 cmem->mem_dev_handle = mem->mem_dev_handle;
314 const char *cper_mem_err_unpack(struct trace_seq *p,
315 struct cper_mem_err_compact *cmem)
317 const char *ret = trace_seq_buffer_ptr(p);
319 if (cper_mem_err_location(cmem, rcd_decode_str))
320 trace_seq_printf(p, "%s", rcd_decode_str);
321 if (cper_dimm_err_location(cmem, rcd_decode_str))
322 trace_seq_printf(p, "%s", rcd_decode_str);
323 trace_seq_putc(p, '\0');
328 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
331 struct cper_mem_err_compact cmem;
333 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
334 if (len == sizeof(struct cper_sec_mem_err_old) &&
335 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
336 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
339 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
340 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
341 if (mem->validation_bits & CPER_MEM_VALID_PA)
342 printk("%s""physical_address: 0x%016llx\n",
343 pfx, mem->physical_addr);
344 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
345 printk("%s""physical_address_mask: 0x%016llx\n",
346 pfx, mem->physical_addr_mask);
347 cper_mem_err_pack(mem, &cmem);
348 if (cper_mem_err_location(&cmem, rcd_decode_str))
349 printk("%s%s\n", pfx, rcd_decode_str);
350 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
351 u8 etype = mem->error_type;
352 printk("%s""error_type: %d, %s\n", pfx, etype,
353 cper_mem_err_type_str(etype));
355 if (cper_dimm_err_location(&cmem, rcd_decode_str))
356 printk("%s%s\n", pfx, rcd_decode_str);
359 static const char * const pcie_port_type_strs[] = {
361 "legacy PCI end point",
365 "upstream switch port",
366 "downstream switch port",
367 "PCIe to PCI/PCI-X bridge",
368 "PCI/PCI-X to PCIe bridge",
369 "root complex integrated endpoint device",
370 "root complex event collector",
373 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
374 const struct acpi_hest_generic_data *gdata)
376 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
377 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
378 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
379 pcie_port_type_strs[pcie->port_type] : "unknown");
380 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
381 printk("%s""version: %d.%d\n", pfx,
382 pcie->version.major, pcie->version.minor);
383 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
384 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
385 pcie->command, pcie->status);
386 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
388 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
389 pcie->device_id.segment, pcie->device_id.bus,
390 pcie->device_id.device, pcie->device_id.function);
391 printk("%s""slot: %d\n", pfx,
392 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
393 printk("%s""secondary_bus: 0x%02x\n", pfx,
394 pcie->device_id.secondary_bus);
395 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
396 pcie->device_id.vendor_id, pcie->device_id.device_id);
397 p = pcie->device_id.class_code;
398 printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
400 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
401 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
402 pcie->serial_number.lower, pcie->serial_number.upper);
403 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
405 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
406 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
408 /* Fatal errors call __ghes_panic() before AER handler prints this */
409 if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) &&
410 (gdata->error_severity & CPER_SEV_FATAL)) {
411 struct aer_capability_regs *aer;
413 aer = (struct aer_capability_regs *)pcie->aer_info;
414 printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
415 pfx, aer->uncor_status, aer->uncor_mask);
416 printk("%saer_uncor_severity: 0x%08x\n",
417 pfx, aer->uncor_severity);
418 printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
419 aer->header_log.dw0, aer->header_log.dw1,
420 aer->header_log.dw2, aer->header_log.dw3);
424 static const char * const fw_err_rec_type_strs[] = {
425 "IPF SAL Error Record",
426 "SOC Firmware Error Record Type1 (Legacy CrashLog Support)",
427 "SOC Firmware Error Record Type2",
430 static void cper_print_fw_err(const char *pfx,
431 struct acpi_hest_generic_data *gdata,
432 const struct cper_sec_fw_err_rec_ref *fw_err)
434 void *buf = acpi_hest_get_payload(gdata);
435 u32 offset, length = gdata->error_data_length;
437 printk("%s""Firmware Error Record Type: %s\n", pfx,
438 fw_err->record_type < ARRAY_SIZE(fw_err_rec_type_strs) ?
439 fw_err_rec_type_strs[fw_err->record_type] : "unknown");
440 printk("%s""Revision: %d\n", pfx, fw_err->revision);
442 /* Record Type based on UEFI 2.7 */
443 if (fw_err->revision == 0) {
444 printk("%s""Record Identifier: %08llx\n", pfx,
445 fw_err->record_identifier);
446 } else if (fw_err->revision == 2) {
447 printk("%s""Record Identifier: %pUl\n", pfx,
448 &fw_err->record_identifier_guid);
452 * The FW error record may contain trailing data beyond the
453 * structure defined by the specification. As the fields
454 * defined (and hence the offset of any trailing data) vary
455 * with the revision, set the offset to account for this
458 if (fw_err->revision == 0) {
459 /* record_identifier_guid not defined */
460 offset = offsetof(struct cper_sec_fw_err_rec_ref,
461 record_identifier_guid);
462 } else if (fw_err->revision == 1) {
463 /* record_identifier not defined */
464 offset = offsetof(struct cper_sec_fw_err_rec_ref,
467 offset = sizeof(*fw_err);
473 print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, buf, length, true);
476 static void cper_print_tstamp(const char *pfx,
477 struct acpi_hest_generic_data_v300 *gdata)
479 __u8 hour, min, sec, day, mon, year, century, *timestamp;
481 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
482 timestamp = (__u8 *)&(gdata->time_stamp);
483 sec = bcd2bin(timestamp[0]);
484 min = bcd2bin(timestamp[1]);
485 hour = bcd2bin(timestamp[2]);
486 day = bcd2bin(timestamp[4]);
487 mon = bcd2bin(timestamp[5]);
488 year = bcd2bin(timestamp[6]);
489 century = bcd2bin(timestamp[7]);
491 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
492 (timestamp[3] & 0x1 ? "precise " : "imprecise "),
493 century, year, mon, day, hour, min, sec);
498 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
501 guid_t *sec_type = (guid_t *)gdata->section_type;
505 if (acpi_hest_get_version(gdata) >= 3)
506 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
508 severity = gdata->error_severity;
509 printk("%s""Error %d, type: %s\n", pfx, sec_no,
510 cper_severity_str(severity));
511 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
512 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
513 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
514 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
516 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
517 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
518 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
520 printk("%s""section_type: general processor error\n", newpfx);
521 if (gdata->error_data_length >= sizeof(*proc_err))
522 cper_print_proc_generic(newpfx, proc_err);
524 goto err_section_too_small;
525 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
526 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
528 printk("%s""section_type: memory error\n", newpfx);
529 if (gdata->error_data_length >=
530 sizeof(struct cper_sec_mem_err_old))
531 cper_print_mem(newpfx, mem_err,
532 gdata->error_data_length);
534 goto err_section_too_small;
535 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
536 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
538 printk("%s""section_type: PCIe error\n", newpfx);
539 if (gdata->error_data_length >= sizeof(*pcie))
540 cper_print_pcie(newpfx, pcie, gdata);
542 goto err_section_too_small;
543 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
544 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
545 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
547 printk("%ssection_type: ARM processor error\n", newpfx);
548 if (gdata->error_data_length >= sizeof(*arm_err))
549 cper_print_proc_arm(newpfx, arm_err);
551 goto err_section_too_small;
553 #if defined(CONFIG_UEFI_CPER_X86)
554 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
555 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
557 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
558 if (gdata->error_data_length >= sizeof(*ia_err))
559 cper_print_proc_ia(newpfx, ia_err);
561 goto err_section_too_small;
563 } else if (guid_equal(sec_type, &CPER_SEC_FW_ERR_REC_REF)) {
564 struct cper_sec_fw_err_rec_ref *fw_err = acpi_hest_get_payload(gdata);
566 printk("%ssection_type: Firmware Error Record Reference\n",
568 /* The minimal FW Error Record contains 16 bytes */
569 if (gdata->error_data_length >= SZ_16)
570 cper_print_fw_err(newpfx, gdata, fw_err);
572 goto err_section_too_small;
574 const void *err = acpi_hest_get_payload(gdata);
576 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
577 printk("%ssection length: %#x\n", newpfx,
578 gdata->error_data_length);
579 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
580 gdata->error_data_length, true);
585 err_section_too_small:
586 pr_err(FW_WARN "error section length is too small\n");
589 void cper_estatus_print(const char *pfx,
590 const struct acpi_hest_generic_status *estatus)
592 struct acpi_hest_generic_data *gdata;
597 severity = estatus->error_severity;
598 if (severity == CPER_SEV_CORRECTED)
599 printk("%s%s\n", pfx,
600 "It has been corrected by h/w "
601 "and requires no further action");
602 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
603 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
605 apei_estatus_for_each_section(estatus, gdata) {
606 cper_estatus_print_section(newpfx, gdata, sec_no);
610 EXPORT_SYMBOL_GPL(cper_estatus_print);
612 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
614 if (estatus->data_length &&
615 estatus->data_length < sizeof(struct acpi_hest_generic_data))
617 if (estatus->raw_data_length &&
618 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
623 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
625 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
627 struct acpi_hest_generic_data *gdata;
628 unsigned int data_len, record_size;
631 rc = cper_estatus_check_header(estatus);
635 data_len = estatus->data_length;
637 apei_estatus_for_each_section(estatus, gdata) {
638 if (sizeof(struct acpi_hest_generic_data) > data_len)
641 record_size = acpi_hest_get_record_size(gdata);
642 if (record_size > data_len)
645 data_len -= record_size;
652 EXPORT_SYMBOL_GPL(cper_estatus_check);