arch/powerpc/platforms/pseries/ras.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2001 Dave Engebretsen IBM Corporation
   4  */
   5
   6 #include <linux/sched.h>
   7 #include <linux/interrupt.h>
   8 #include <linux/irq.h>
   9 #include <linux/of.h>
  10 #include <linux/fs.h>
  11 #include <linux/reboot.h>
  12 #include <linux/irq_work.h>
  13
  14 #include <asm/machdep.h>
  15 #include <asm/rtas.h>
  16 #include <asm/firmware.h>
  17 #include <asm/mce.h>
  18
  19 #include "pseries.h"
  20
  21 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
  22 static DEFINE_SPINLOCK(ras_log_buf_lock);
  23
  24 static int ras_check_exception_token;
  25
  26 static void mce_process_errlog_event(struct irq_work *work);
  27 static struct irq_work mce_errlog_process_work = {
  28         .func = mce_process_errlog_event,
  29 };
  30
  31 #define EPOW_SENSOR_TOKEN       9
  32 #define EPOW_SENSOR_INDEX       0
  33
  34 /* EPOW events counter variable */
  35 static int num_epow_events;
  36
  37 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  38 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  39 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  40
  41 /* RTAS pseries MCE errorlog section. */
  42 struct pseries_mc_errorlog {
  43         __be32  fru_id;
  44         __be32  proc_id;
  45         u8      error_type;
  46         /*
  47          * sub_err_type (1 byte). Bit fields depends on error_type
  48          *
  49          *   MSB0
  50          *   |
  51          *   V
  52          *   01234567
  53          *   XXXXXXXX
  54          *
  55          * For error_type == MC_ERROR_TYPE_UE
  56          *   XXXXXXXX
  57          *   X          1: Permanent or Transient UE.
  58          *    X         1: Effective address provided.
  59          *     X        1: Logical address provided.
  60          *      XX      2: Reserved.
  61          *        XXX   3: Type of UE error.
  62          *
  63          * For error_type != MC_ERROR_TYPE_UE
  64          *   XXXXXXXX
  65          *   X          1: Effective address provided.
  66          *    XXXXX     5: Reserved.
  67          *         XX   2: Type of SLB/ERAT/TLB error.
  68          */
  69         u8      sub_err_type;
  70         u8      reserved_1[6];
  71         __be64  effective_address;
  72         __be64  logical_address;
  73 } __packed;
  74
  75 /* RTAS pseries MCE error types */
  76 #define MC_ERROR_TYPE_UE                0x00
  77 #define MC_ERROR_TYPE_SLB               0x01
  78 #define MC_ERROR_TYPE_ERAT              0x02
  79 #define MC_ERROR_TYPE_UNKNOWN           0x03
  80 #define MC_ERROR_TYPE_TLB               0x04
  81 #define MC_ERROR_TYPE_D_CACHE           0x05
  82 #define MC_ERROR_TYPE_I_CACHE           0x07
  83
  84 /* RTAS pseries MCE error sub types */
  85 #define MC_ERROR_UE_INDETERMINATE               0
  86 #define MC_ERROR_UE_IFETCH                      1
  87 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH      2
  88 #define MC_ERROR_UE_LOAD_STORE                  3
  89 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
  90
  91 #define UE_EFFECTIVE_ADDR_PROVIDED              0x40
  92 #define UE_LOGICAL_ADDR_PROVIDED                0x20
  93
  94 #define MC_ERROR_SLB_PARITY             0
  95 #define MC_ERROR_SLB_MULTIHIT           1
  96 #define MC_ERROR_SLB_INDETERMINATE      2
  97
  98 #define MC_ERROR_ERAT_PARITY            1
  99 #define MC_ERROR_ERAT_MULTIHIT          2
 100 #define MC_ERROR_ERAT_INDETERMINATE     3
 101
 102 #define MC_ERROR_TLB_PARITY             1
 103 #define MC_ERROR_TLB_MULTIHIT           2
 104 #define MC_ERROR_TLB_INDETERMINATE      3
 105
 106 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 107 {
 108         switch (mlog->error_type) {
 109         case    MC_ERROR_TYPE_UE:
 110                 return (mlog->sub_err_type & 0x07);
 111         case    MC_ERROR_TYPE_SLB:
 112         case    MC_ERROR_TYPE_ERAT:
 113         case    MC_ERROR_TYPE_TLB:
 114                 return (mlog->sub_err_type & 0x03);
 115         default:
 116                 return 0;
 117         }
 118 }
 119
 120 /*
 121  * Enable the hotplug interrupt late because processing them may touch other
 122  * devices or systems (e.g. hugepages) that have not been initialized at the
 123  * subsys stage.
 124  */
 125 int __init init_ras_hotplug_IRQ(void)
 126 {
 127         struct device_node *np;
 128
 129         /* Hotplug Events */
 130         np = of_find_node_by_path("/event-sources/hot-plug-events");
 131         if (np != NULL) {
 132                 if (dlpar_workqueue_init() == 0)
 133                         request_event_sources_irqs(np, ras_hotplug_interrupt,
 134                                                    "RAS_HOTPLUG");
 135                 of_node_put(np);
 136         }
 137
 138         return 0;
 139 }
 140 machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 141
 142 /*
 143  * Initialize handlers for the set of interrupts caused by hardware errors
 144  * and power system events.
 145  */
 146 static int __init init_ras_IRQ(void)
 147 {
 148         struct device_node *np;
 149
 150         ras_check_exception_token = rtas_token("check-exception");
 151
 152         /* Internal Errors */
 153         np = of_find_node_by_path("/event-sources/internal-errors");
 154         if (np != NULL) {
 155                 request_event_sources_irqs(np, ras_error_interrupt,
 156                                            "RAS_ERROR");
 157                 of_node_put(np);
 158         }
 159
 160         /* EPOW Events */
 161         np = of_find_node_by_path("/event-sources/epow-events");
 162         if (np != NULL) {
 163                 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 164                 of_node_put(np);
 165         }
 166
 167         return 0;
 168 }
 169 machine_subsys_initcall(pseries, init_ras_IRQ);
 170
 171 #define EPOW_SHUTDOWN_NORMAL                            1
 172 #define EPOW_SHUTDOWN_ON_UPS                            2
 173 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS        3
 174 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH      4
 175
 176 static void handle_system_shutdown(char event_modifier)
 177 {
 178         switch (event_modifier) {
 179         case EPOW_SHUTDOWN_NORMAL:
 180                 pr_emerg("Power off requested\n");
 181                 orderly_poweroff(true);
 182                 break;
 183
 184         case EPOW_SHUTDOWN_ON_UPS:
 185                 pr_emerg("Loss of system power detected. System is running on"
 186                          " UPS/battery. Check RTAS error log for details\n");
 187                 break;
 188
 189         case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
 190                 pr_emerg("Loss of system critical functions detected. Check"
 191                          " RTAS error log for details\n");
 192                 orderly_poweroff(true);
 193                 break;
 194
 195         case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 196                 pr_emerg("High ambient temperature detected. Check RTAS"
 197                          " error log for details\n");
 198                 orderly_poweroff(true);
 199                 break;
 200
 201         default:
 202                 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 203                         event_modifier);
 204         }
 205 }
 206
 207 struct epow_errorlog {
 208         unsigned char sensor_value;
 209         unsigned char event_modifier;
 210         unsigned char extended_modifier;
 211         unsigned char reserved;
 212         unsigned char platform_reason;
 213 };
 214
 215 #define EPOW_RESET                      0
 216 #define EPOW_WARN_COOLING               1
 217 #define EPOW_WARN_POWER                 2
 218 #define EPOW_SYSTEM_SHUTDOWN            3
 219 #define EPOW_SYSTEM_HALT                4
 220 #define EPOW_MAIN_ENCLOSURE             5
 221 #define EPOW_POWER_OFF                  7
 222
 223 static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 224 {
 225         struct pseries_errorlog *pseries_log;
 226         struct epow_errorlog *epow_log;
 227         char action_code;
 228         char modifier;
 229
 230         pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
 231         if (pseries_log == NULL)
 232                 return;
 233
 234         epow_log = (struct epow_errorlog *)pseries_log->data;
 235         action_code = epow_log->sensor_value & 0xF;     /* bottom 4 bits */
 236         modifier = epow_log->event_modifier & 0xF;      /* bottom 4 bits */
 237
 238         switch (action_code) {
 239         case EPOW_RESET:
 240                 if (num_epow_events) {
 241                         pr_info("Non critical power/cooling issue cleared\n");
 242                         num_epow_events--;
 243                 }
 244                 break;
 245
 246         case EPOW_WARN_COOLING:
 247                 pr_info("Non-critical cooling issue detected. Check RTAS error"
 248                         " log for details\n");
 249                 break;
 250
 251         case EPOW_WARN_POWER:
 252                 pr_info("Non-critical power issue detected. Check RTAS error"
 253                         " log for details\n");
 254                 break;
 255
 256         case EPOW_SYSTEM_SHUTDOWN:
 257                 handle_system_shutdown(modifier);
 258                 break;
 259
 260         case EPOW_SYSTEM_HALT:
 261                 pr_emerg("Critical power/cooling issue detected. Check RTAS"
 262                          " error log for details. Powering off.\n");
 263                 orderly_poweroff(true);
 264                 break;
 265
 266         case EPOW_MAIN_ENCLOSURE:
 267         case EPOW_POWER_OFF:
 268                 pr_emerg("System about to lose power. Check RTAS error log "
 269                          " for details. Powering off immediately.\n");
 270                 emergency_sync();
 271                 kernel_power_off();
 272                 break;
 273
 274         default:
 275                 pr_err("Unknown power/cooling event (action code  = %d)\n",
 276                         action_code);
 277         }
 278
 279         /* Increment epow events counter variable */
 280         if (action_code != EPOW_RESET)
 281                 num_epow_events++;
 282 }
 283
 284 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 285 {
 286         struct pseries_errorlog *pseries_log;
 287         struct pseries_hp_errorlog *hp_elog;
 288
 289         spin_lock(&ras_log_buf_lock);
 290
 291         rtas_call(ras_check_exception_token, 6, 1, NULL,
 292                   RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
 293                   RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
 294                   rtas_get_error_log_max());
 295
 296         pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
 297                                            PSERIES_ELOG_SECT_ID_HOTPLUG);
 298         hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
 299
 300         /*
 301          * Since PCI hotplug is not currently supported on pseries, put PCI
 302          * hotplug events on the ras_log_buf to be handled by rtas_errd.
 303          */
 304         if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
 305             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
 306             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 307                 queue_hotplug_event(hp_elog);
 308         else
 309                 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 310
 311         spin_unlock(&ras_log_buf_lock);
 312         return IRQ_HANDLED;
 313 }
 314
 315 /* Handle environmental and power warning (EPOW) interrupts. */
 316 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 317 {
 318         int status;
 319         int state;
 320         int critical;
 321
 322         status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX,
 323                                       &state);
 324
 325         if (state > 3)
 326                 critical = 1;           /* Time Critical */
 327         else
 328                 critical = 0;
 329
 330         spin_lock(&ras_log_buf_lock);
 331
 332         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 333                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 334                            virq_to_hw(irq),
 335                            RTAS_EPOW_WARNING,
 336                            critical, __pa(&ras_log_buf),
 337                                 rtas_get_error_log_max());
 338
 339         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 340
 341         rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
 342
 343         spin_unlock(&ras_log_buf_lock);
 344         return IRQ_HANDLED;
 345 }
 346
 347 /*
 348  * Handle hardware error interrupts.
 349  *
 350  * RTAS check-exception is called to collect data on the exception.  If
 351  * the error is deemed recoverable, we log a warning and return.
 352  * For nonrecoverable errors, an error is logged and we stop all processing
 353  * as quickly as possible in order to prevent propagation of the failure.
 354  */
 355 static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 356 {
 357         struct rtas_error_log *rtas_elog;
 358         int status;
 359         int fatal;
 360
 361         spin_lock(&ras_log_buf_lock);
 362
 363         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 364                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 365                            virq_to_hw(irq),
 366                            RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
 367                            __pa(&ras_log_buf),
 368                                 rtas_get_error_log_max());
 369
 370         rtas_elog = (struct rtas_error_log *)ras_log_buf;
 371
 372         if (status == 0 &&
 373             rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 374                 fatal = 1;
 375         else
 376                 fatal = 0;
 377
 378         /* format and print the extended information */
 379         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 380
 381         if (fatal) {
 382                 pr_emerg("Fatal hardware error detected. Check RTAS error"
 383                          " log for details. Powering off immediately\n");
 384                 emergency_sync();
 385                 kernel_power_off();
 386         } else {
 387                 pr_err("Recoverable hardware error detected\n");
 388         }
 389
 390         spin_unlock(&ras_log_buf_lock);
 391         return IRQ_HANDLED;
 392 }
 393
 394 /*
 395  * Some versions of FWNMI place the buffer inside the 4kB page starting at
 396  * 0x7000. Other versions place it inside the rtas buffer. We check both.
 397  * Minimum size of the buffer is 16 bytes.
 398  */
 399 #define VALID_FWNMI_BUFFER(A) \
 400         ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
 401         (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
 402
 403 static inline struct rtas_error_log *fwnmi_get_errlog(void)
 404 {
 405         return (struct rtas_error_log *)local_paca->mce_data_buf;
 406 }
 407
 408 static __be64 *fwnmi_get_savep(struct pt_regs *regs)
 409 {
 410         unsigned long savep_ra;
 411
 412         /* Mask top two bits */
 413         savep_ra = regs->gpr[3] & ~(0x3UL << 62);
 414         if (!VALID_FWNMI_BUFFER(savep_ra)) {
 415                 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 416                 return NULL;
 417         }
 418
 419         return __va(savep_ra);
 420 }
 421
 422 /*
 423  * Get the error information for errors coming through the
 424  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 425  * the actual r3 if possible, and a ptr to the error log entry
 426  * will be returned if found.
 427  *
 428  * Use one buffer mce_data_buf per cpu to store RTAS error.
 429  *
 430  * The mce_data_buf does not have any locks or protection around it,
 431  * if a second machine check comes in, or a system reset is done
 432  * before we have logged the error, then we will get corruption in the
 433  * error log.  This is preferable over holding off on calling
 434  * ibm,nmi-interlock which would result in us checkstopping if a
 435  * second machine check did come in.
 436  */
 437 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 438 {
 439         struct rtas_error_log *h;
 440         __be64 *savep;
 441
 442         savep = fwnmi_get_savep(regs);
 443         if (!savep)
 444                 return NULL;
 445
 446         regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
 447
 448         h = (struct rtas_error_log *)&savep[1];
 449         /* Use the per cpu buffer from paca to store rtas error log */
 450         memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 451         if (!rtas_error_extended(h)) {
 452                 memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
 453         } else {
 454                 int len, error_log_length;
 455
 456                 error_log_length = 8 + rtas_error_extended_log_length(h);
 457                 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 458                 memcpy(local_paca->mce_data_buf, h, len);
 459         }
 460
 461         return (struct rtas_error_log *)local_paca->mce_data_buf;
 462 }
 463
 464 /* Call this when done with the data returned by FWNMI_get_errinfo.
 465  * It will release the saved data area for other CPUs in the
 466  * partition to receive FWNMI errors.
 467  */
 468 static void fwnmi_release_errinfo(void)
 469 {
 470         struct rtas_args rtas_args;
 471         int ret;
 472
 473         /*
 474          * On pseries, the machine check stack is limited to under 4GB, so
 475          * args can be on-stack.
 476          */
 477         rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
 478         ret = be32_to_cpu(rtas_args.rets[0]);
 479         if (ret != 0)
 480                 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
 481 }
 482
 483 int pSeries_system_reset_exception(struct pt_regs *regs)
 484 {
 485 #ifdef __LITTLE_ENDIAN__
 486         /*
 487          * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
 488          * to detect the bad SRR1 pattern here. Flip the NIP back to correct
 489          * endian for reporting purposes. Unfortunately the MSR can't be fixed,
 490          * so clear it. It will be missing MSR_RI so we won't try to recover.
 491          */
 492         if ((be64_to_cpu(regs->msr) &
 493                         (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 494                          MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
 495                 regs->nip = be64_to_cpu((__be64)regs->nip);
 496                 regs->msr = 0;
 497         }
 498 #endif
 499
 500         if (fwnmi_active) {
 501                 __be64 *savep;
 502
 503                 /*
 504                  * Firmware (PowerVM and KVM) saves r3 to a save area like
 505                  * machine check, which is not exactly what PAPR (2.9)
 506                  * suggests but there is no way to detect otherwise, so this
 507                  * is the interface now.
 508                  *
 509                  * System resets do not save any error log or require an
 510                  * "ibm,nmi-interlock" rtas call to release.
 511                  */
 512
 513                 savep = fwnmi_get_savep(regs);
 514                 if (savep)
 515                         regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
 516         }
 517
 518         if (smp_handle_nmi_ipi(regs))
 519                 return 1;
 520
 521         return 0; /* need to perform reset */
 522 }
 523
 524 static int mce_handle_err_realmode(int disposition, u8 error_type)
 525 {
 526 #ifdef CONFIG_PPC_BOOK3S_64
 527         if (disposition == RTAS_DISP_NOT_RECOVERED) {
 528                 switch (error_type) {
 529                 case    MC_ERROR_TYPE_SLB:
 530                 case    MC_ERROR_TYPE_ERAT:
 531                         /*
 532                          * Store the old slb content in paca before flushing.
 533                          * Print this when we go to virtual mode.
 534                          * There are chances that we may hit MCE again if there
 535                          * is a parity error on the SLB entry we trying to read
 536                          * for saving. Hence limit the slb saving to single
 537                          * level of recursion.
 538                          */
 539                         if (local_paca->in_mce == 1)
 540                                 slb_save_contents(local_paca->mce_faulty_slbs);
 541                         flush_and_reload_slb();
 542                         disposition = RTAS_DISP_FULLY_RECOVERED;
 543                         break;
 544                 default:
 545                         break;
 546                 }
 547         } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 548                 /* Platform corrected itself but could be degraded */
 549                 pr_err("MCE: limited recovery, system may be degraded\n");
 550                 disposition = RTAS_DISP_FULLY_RECOVERED;
 551         }
 552 #endif
 553         return disposition;
 554 }
 555
 556 static int mce_handle_err_virtmode(struct pt_regs *regs,
 557                                    struct rtas_error_log *errp,
 558                                    struct pseries_mc_errorlog *mce_log,
 559                                    int disposition)
 560 {
 561         struct mce_error_info mce_err = { 0 };
 562         int initiator = rtas_error_initiator(errp);
 563         int severity = rtas_error_severity(errp);
 564         unsigned long eaddr = 0, paddr = 0;
 565         u8 error_type, err_sub_type;
 566
 567         if (!mce_log)
 568                 goto out;
 569
 570         error_type = mce_log->error_type;
 571         err_sub_type = rtas_mc_error_sub_type(mce_log);
 572
 573         if (initiator == RTAS_INITIATOR_UNKNOWN)
 574                 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 575         else if (initiator == RTAS_INITIATOR_CPU)
 576                 mce_err.initiator = MCE_INITIATOR_CPU;
 577         else if (initiator == RTAS_INITIATOR_PCI)
 578                 mce_err.initiator = MCE_INITIATOR_PCI;
 579         else if (initiator == RTAS_INITIATOR_ISA)
 580                 mce_err.initiator = MCE_INITIATOR_ISA;
 581         else if (initiator == RTAS_INITIATOR_MEMORY)
 582                 mce_err.initiator = MCE_INITIATOR_MEMORY;
 583         else if (initiator == RTAS_INITIATOR_POWERMGM)
 584                 mce_err.initiator = MCE_INITIATOR_POWERMGM;
 585         else
 586                 mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 587
 588         if (severity == RTAS_SEVERITY_NO_ERROR)
 589                 mce_err.severity = MCE_SEV_NO_ERROR;
 590         else if (severity == RTAS_SEVERITY_EVENT)
 591                 mce_err.severity = MCE_SEV_WARNING;
 592         else if (severity == RTAS_SEVERITY_WARNING)
 593                 mce_err.severity = MCE_SEV_WARNING;
 594         else if (severity == RTAS_SEVERITY_ERROR_SYNC)
 595                 mce_err.severity = MCE_SEV_SEVERE;
 596         else if (severity == RTAS_SEVERITY_ERROR)
 597                 mce_err.severity = MCE_SEV_SEVERE;
 598         else if (severity == RTAS_SEVERITY_FATAL)
 599                 mce_err.severity = MCE_SEV_FATAL;
 600         else
 601                 mce_err.severity = MCE_SEV_FATAL;
 602
 603         if (severity <= RTAS_SEVERITY_ERROR_SYNC)
 604                 mce_err.sync_error = true;
 605         else
 606                 mce_err.sync_error = false;
 607
 608         mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 609         mce_err.error_class = MCE_ECLASS_UNKNOWN;
 610
 611         switch (error_type) {
 612         case MC_ERROR_TYPE_UE:
 613                 mce_err.error_type = MCE_ERROR_TYPE_UE;
 614                 mce_common_process_ue(regs, &mce_err);
 615                 if (mce_err.ignore_event)
 616                         disposition = RTAS_DISP_FULLY_RECOVERED;
 617                 switch (err_sub_type) {
 618                 case MC_ERROR_UE_IFETCH:
 619                         mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
 620                         break;
 621                 case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
 622                         mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
 623                         break;
 624                 case MC_ERROR_UE_LOAD_STORE:
 625                         mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
 626                         break;
 627                 case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
 628                         mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
 629                         break;
 630                 case MC_ERROR_UE_INDETERMINATE:
 631                 default:
 632                         mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
 633                         break;
 634                 }
 635                 if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
 636                         eaddr = be64_to_cpu(mce_log->effective_address);
 637
 638                 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
 639                         paddr = be64_to_cpu(mce_log->logical_address);
 640                 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
 641                         unsigned long pfn;
 642
 643                         pfn = addr_to_pfn(regs, eaddr);
 644                         if (pfn != ULONG_MAX)
 645                                 paddr = pfn << PAGE_SHIFT;
 646                 }
 647
 648                 break;
 649         case MC_ERROR_TYPE_SLB:
 650                 mce_err.error_type = MCE_ERROR_TYPE_SLB;
 651                 switch (err_sub_type) {
 652                 case MC_ERROR_SLB_PARITY:
 653                         mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
 654                         break;
 655                 case MC_ERROR_SLB_MULTIHIT:
 656                         mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
 657                         break;
 658                 case MC_ERROR_SLB_INDETERMINATE:
 659                 default:
 660                         mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
 661                         break;
 662                 }
 663                 if (mce_log->sub_err_type & 0x80)
 664                         eaddr = be64_to_cpu(mce_log->effective_address);
 665                 break;
 666         case MC_ERROR_TYPE_ERAT:
 667                 mce_err.error_type = MCE_ERROR_TYPE_ERAT;
 668                 switch (err_sub_type) {
 669                 case MC_ERROR_ERAT_PARITY:
 670                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
 671                         break;
 672                 case MC_ERROR_ERAT_MULTIHIT:
 673                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
 674                         break;
 675                 case MC_ERROR_ERAT_INDETERMINATE:
 676                 default:
 677                         mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
 678                         break;
 679                 }
 680                 if (mce_log->sub_err_type & 0x80)
 681                         eaddr = be64_to_cpu(mce_log->effective_address);
 682                 break;
 683         case MC_ERROR_TYPE_TLB:
 684                 mce_err.error_type = MCE_ERROR_TYPE_TLB;
 685                 switch (err_sub_type) {
 686                 case MC_ERROR_TLB_PARITY:
 687                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
 688                         break;
 689                 case MC_ERROR_TLB_MULTIHIT:
 690                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
 691                         break;
 692                 case MC_ERROR_TLB_INDETERMINATE:
 693                 default:
 694                         mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
 695                         break;
 696                 }
 697                 if (mce_log->sub_err_type & 0x80)
 698                         eaddr = be64_to_cpu(mce_log->effective_address);
 699                 break;
 700         case MC_ERROR_TYPE_D_CACHE:
 701                 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 702                 break;
 703         case MC_ERROR_TYPE_I_CACHE:
 704                 mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 705                 break;
 706         case MC_ERROR_TYPE_UNKNOWN:
 707         default:
 708                 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 709                 break;
 710         }
 711 out:
 712         save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
 713                        &mce_err, regs->nip, eaddr, paddr);
 714         return disposition;
 715 }
 716
 717 static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 718 {
 719         struct pseries_errorlog *pseries_log;
 720         struct pseries_mc_errorlog *mce_log = NULL;
 721         int disposition = rtas_error_disposition(errp);
 722         u8 error_type;
 723
 724         if (!rtas_error_extended(errp))
 725                 goto out;
 726
 727         pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 728         if (!pseries_log)
 729                 goto out;
 730
 731         mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 732         error_type = mce_log->error_type;
 733
 734         disposition = mce_handle_err_realmode(disposition, error_type);
 735
 736         /*
 737          * Enable translation as we will be accessing per-cpu variables
 738          * in save_mce_event() which may fall outside RMO region, also
 739          * leave it enabled because subsequently we will be queuing work
 740          * to workqueues where again per-cpu variables accessed, besides
 741          * fwnmi_release_errinfo() crashes when called in realmode on
 742          * pseries.
 743          * Note: All the realmode handling like flushing SLB entries for
 744          *       SLB multihit is done by now.
 745          */
 746 out:
 747         mtmsr(mfmsr() | MSR_IR | MSR_DR);
 748         disposition = mce_handle_err_virtmode(regs, errp, mce_log,
 749                                               disposition);
 750         return disposition;
 751 }
 752
 753 /*
 754  * Process MCE rtas errlog event.
 755  */
 756 static void mce_process_errlog_event(struct irq_work *work)
 757 {
 758         struct rtas_error_log *err;
 759
 760         err = fwnmi_get_errlog();
 761         log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
 762 }
 763
 764 /*
 765  * See if we can recover from a machine check exception.
 766  * This is only called on power4 (or above) and only via
 767  * the Firmware Non-Maskable Interrupts (fwnmi) handler
 768  * which provides the error analysis for us.
 769  *
 770  * Return 1 if corrected (or delivered a signal).
 771  * Return 0 if there is nothing we can do.
 772  */
 773 static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 774 {
 775         int recovered = 0;
 776
 777         if (!(regs->msr & MSR_RI)) {
 778                 /* If MSR_RI isn't set, we cannot recover */
 779                 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 780                 recovered = 0;
 781         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 782                 /* Platform corrected itself */
 783                 recovered = 1;
 784         } else if (evt->severity == MCE_SEV_FATAL) {
 785                 /* Fatal machine check */
 786                 pr_err("Machine check interrupt is fatal\n");
 787                 recovered = 0;
 788         }
 789
 790         if (!recovered && evt->sync_error) {
 791                 /*
 792                  * Try to kill processes if we get a synchronous machine check
 793                  * (e.g., one caused by execution of this instruction). This
 794                  * will devolve into a panic if we try to kill init or are in
 795                  * an interrupt etc.
 796                  *
 797                  * TODO: Queue up this address for hwpoisioning later.
 798                  * TODO: This is not quite right for d-side machine
 799                  *       checks ->nip is not necessarily the important
 800                  *       address.
 801                  */
 802                 if ((user_mode(regs))) {
 803                         _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 804                         recovered = 1;
 805                 } else if (die_will_crash()) {
 806                         /*
 807                          * die() would kill the kernel, so better to go via
 808                          * the platform reboot code that will log the
 809                          * machine check.
 810                          */
 811                         recovered = 0;
 812                 } else {
 813                         die("Machine check", regs, SIGBUS);
 814                         recovered = 1;
 815                 }
 816         }
 817
 818         return recovered;
 819 }
 820
 821 /*
 822  * Handle a machine check.
 823  *
 824  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
 825  * should be present.  If so the handler which called us tells us if the
 826  * error was recovered (never true if RI=0).
 827  *
 828  * On hardware prior to Power 4 these exceptions were asynchronous which
 829  * means we can't tell exactly where it occurred and so we can't recover.
 830  */
 831 int pSeries_machine_check_exception(struct pt_regs *regs)
 832 {
 833         struct machine_check_event evt;
 834
 835         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 836                 return 0;
 837
 838         /* Print things out */
 839         if (evt.version != MCE_V1) {
 840                 pr_err("Machine Check Exception, Unknown event version %d !\n",
 841                        evt.version);
 842                 return 0;
 843         }
 844         machine_check_print_event_info(&evt, user_mode(regs), false);
 845
 846         if (recover_mce(regs, &evt))
 847                 return 1;
 848
 849         return 0;
 850 }
 851
 852 long pseries_machine_check_realmode(struct pt_regs *regs)
 853 {
 854         struct rtas_error_log *errp;
 855         int disposition;
 856
 857         if (fwnmi_active) {
 858                 errp = fwnmi_get_errinfo(regs);
 859                 /*
 860                  * Call to fwnmi_release_errinfo() in real mode causes kernel
 861                  * to panic. Hence we will call it as soon as we go into
 862                  * virtual mode.
 863                  */
 864                 disposition = mce_handle_error(regs, errp);
 865                 fwnmi_release_errinfo();
 866
 867                 /* Queue irq work to log this rtas event later. */
 868                 irq_work_queue(&mce_errlog_process_work);
 869
 870                 if (disposition == RTAS_DISP_FULLY_RECOVERED)
 871                         return 1;
 872         }
 873
 874         return 0;
 875 }