arch/arm64/mm/fault.c

   1 /*
   2  * Based on arch/arm/mm/fault.c
   3  *
   4  * Copyright (C) 1995  Linus Torvalds
   5  * Copyright (C) 1995-2004 Russell King
   6  * Copyright (C) 2012 ARM Ltd.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License version 2 as
  10  * published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include <linux/extable.h>
  22 #include <linux/signal.h>
  23 #include <linux/mm.h>
  24 #include <linux/hardirq.h>
  25 #include <linux/init.h>
  26 #include <linux/kprobes.h>
  27 #include <linux/uaccess.h>
  28 #include <linux/page-flags.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/sched/debug.h>
  31 #include <linux/highmem.h>
  32 #include <linux/perf_event.h>
  33 #include <linux/preempt.h>
  34 #include <linux/hugetlb.h>
  35
  36 #include <asm/bug.h>
  37 #include <asm/cmpxchg.h>
  38 #include <asm/cpufeature.h>
  39 #include <asm/exception.h>
  40 #include <asm/daifflags.h>
  41 #include <asm/debug-monitors.h>
  42 #include <asm/esr.h>
  43 #include <asm/sysreg.h>
  44 #include <asm/system_misc.h>
  45 #include <asm/pgtable.h>
  46 #include <asm/tlbflush.h>
  47 #include <asm/traps.h>
  48
  49 #include <acpi/ghes.h>
  50
  51 struct fault_info {
  52         int     (*fn)(unsigned long addr, unsigned int esr,
  53                       struct pt_regs *regs);
  54         int     sig;
  55         int     code;
  56         const char *name;
  57 };
  58
  59 static const struct fault_info fault_info[];
  60 static struct fault_info debug_fault_info[];
  61
  62 static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
  63 {
  64         return fault_info + (esr & ESR_ELx_FSC);
  65 }
  66
  67 static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
  68 {
  69         return debug_fault_info + DBG_ESR_EVT(esr);
  70 }
  71
  72 #ifdef CONFIG_KPROBES
  73 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
  74 {
  75         int ret = 0;
  76
  77         /* kprobe_running() needs smp_processor_id() */
  78         if (!user_mode(regs)) {
  79                 preempt_disable();
  80                 if (kprobe_running() && kprobe_fault_handler(regs, esr))
  81                         ret = 1;
  82                 preempt_enable();
  83         }
  84
  85         return ret;
  86 }
  87 #else
  88 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
  89 {
  90         return 0;
  91 }
  92 #endif
  93
  94 static void data_abort_decode(unsigned int esr)
  95 {
  96         pr_alert("Data abort info:\n");
  97
  98         if (esr & ESR_ELx_ISV) {
  99                 pr_alert("  Access size = %u byte(s)\n",
 100                          1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT));
 101                 pr_alert("  SSE = %lu, SRT = %lu\n",
 102                          (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT,
 103                          (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT);
 104                 pr_alert("  SF = %lu, AR = %lu\n",
 105                          (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
 106                          (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
 107         } else {
 108                 pr_alert("  ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK);
 109         }
 110
 111         pr_alert("  CM = %lu, WnR = %lu\n",
 112                  (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
 113                  (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
 114 }
 115
 116 static void mem_abort_decode(unsigned int esr)
 117 {
 118         pr_alert("Mem abort info:\n");
 119
 120         pr_alert("  ESR = 0x%08x\n", esr);
 121         pr_alert("  Exception class = %s, IL = %u bits\n",
 122                  esr_get_class_string(esr),
 123                  (esr & ESR_ELx_IL) ? 32 : 16);
 124         pr_alert("  SET = %lu, FnV = %lu\n",
 125                  (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
 126                  (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT);
 127         pr_alert("  EA = %lu, S1PTW = %lu\n",
 128                  (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
 129                  (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
 130
 131         if (esr_is_data_abort(esr))
 132                 data_abort_decode(esr);
 133 }
 134
 135 /*
 136  * Dump out the page tables associated with 'addr' in the currently active mm.
 137  */
 138 void show_pte(unsigned long addr)
 139 {
 140         struct mm_struct *mm;
 141         pgd_t *pgdp;
 142         pgd_t pgd;
 143
 144         if (addr < TASK_SIZE) {
 145                 /* TTBR0 */
 146                 mm = current->active_mm;
 147                 if (mm == &init_mm) {
 148                         pr_alert("[%016lx] user address but active_mm is swapper\n",
 149                                  addr);
 150                         return;
 151                 }
 152         } else if (addr >= VA_START) {
 153                 /* TTBR1 */
 154                 mm = &init_mm;
 155         } else {
 156                 pr_alert("[%016lx] address between user and kernel address ranges\n",
 157                          addr);
 158                 return;
 159         }
 160
 161         pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
 162                  mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
 163                  VA_BITS, mm->pgd);
 164         pgdp = pgd_offset(mm, addr);
 165         pgd = READ_ONCE(*pgdp);
 166         pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
 167
 168         do {
 169                 pud_t *pudp, pud;
 170                 pmd_t *pmdp, pmd;
 171                 pte_t *ptep, pte;
 172
 173                 if (pgd_none(pgd) || pgd_bad(pgd))
 174                         break;
 175
 176                 pudp = pud_offset(pgdp, addr);
 177                 pud = READ_ONCE(*pudp);
 178                 pr_cont(", pud=%016llx", pud_val(pud));
 179                 if (pud_none(pud) || pud_bad(pud))
 180                         break;
 181
 182                 pmdp = pmd_offset(pudp, addr);
 183                 pmd = READ_ONCE(*pmdp);
 184                 pr_cont(", pmd=%016llx", pmd_val(pmd));
 185                 if (pmd_none(pmd) || pmd_bad(pmd))
 186                         break;
 187
 188                 ptep = pte_offset_map(pmdp, addr);
 189                 pte = READ_ONCE(*ptep);
 190                 pr_cont(", pte=%016llx", pte_val(pte));
 191                 pte_unmap(ptep);
 192         } while(0);
 193
 194         pr_cont("\n");
 195 }
 196
 197 /*
 198  * This function sets the access flags (dirty, accessed), as well as write
 199  * permission, and only to a more permissive setting.
 200  *
 201  * It needs to cope with hardware update of the accessed/dirty state by other
 202  * agents in the system and can safely skip the __sync_icache_dcache() call as,
 203  * like set_pte_at(), the PTE is never changed from no-exec to exec here.
 204  *
 205  * Returns whether or not the PTE actually changed.
 206  */
 207 int ptep_set_access_flags(struct vm_area_struct *vma,
 208                           unsigned long address, pte_t *ptep,
 209                           pte_t entry, int dirty)
 210 {
 211         pteval_t old_pteval, pteval;
 212         pte_t pte = READ_ONCE(*ptep);
 213
 214         if (pte_same(pte, entry))
 215                 return 0;
 216
 217         /* only preserve the access flags and write permission */
 218         pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;
 219
 220         /*
 221          * Setting the flags must be done atomically to avoid racing with the
 222          * hardware update of the access/dirty state. The PTE_RDONLY bit must
 223          * be set to the most permissive (lowest value) of *ptep and entry
 224          * (calculated as: a & b == ~(~a | ~b)).
 225          */
 226         pte_val(entry) ^= PTE_RDONLY;
 227         pteval = pte_val(pte);
 228         do {
 229                 old_pteval = pteval;
 230                 pteval ^= PTE_RDONLY;
 231                 pteval |= pte_val(entry);
 232                 pteval ^= PTE_RDONLY;
 233                 pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
 234         } while (pteval != old_pteval);
 235
 236         flush_tlb_fix_spurious_fault(vma, address);
 237         return 1;
 238 }
 239
 240 static bool is_el1_instruction_abort(unsigned int esr)
 241 {
 242         return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
 243 }
 244
 245 static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
 246                                            struct pt_regs *regs)
 247 {
 248         unsigned int ec       = ESR_ELx_EC(esr);
 249         unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
 250
 251         if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
 252                 return false;
 253
 254         if (fsc_type == ESR_ELx_FSC_PERM)
 255                 return true;
 256
 257         if (addr < TASK_SIZE && system_uses_ttbr0_pan())
 258                 return fsc_type == ESR_ELx_FSC_FAULT &&
 259                         (regs->pstate & PSR_PAN_BIT);
 260
 261         return false;
 262 }
 263
 264 static void die_kernel_fault(const char *msg, unsigned long addr,
 265                              unsigned int esr, struct pt_regs *regs)
 266 {
 267         bust_spinlocks(1);
 268
 269         pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
 270                  addr);
 271
 272         mem_abort_decode(esr);
 273
 274         show_pte(addr);
 275         die("Oops", regs, esr);
 276         bust_spinlocks(0);
 277         do_exit(SIGKILL);
 278 }
 279
 280 static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 281                               struct pt_regs *regs)
 282 {
 283         const char *msg;
 284
 285         /*
 286          * Are we prepared to handle this kernel fault?
 287          * We are almost certainly not prepared to handle instruction faults.
 288          */
 289         if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
 290                 return;
 291
 292         if (is_el1_permission_fault(addr, esr, regs)) {
 293                 if (esr & ESR_ELx_WNR)
 294                         msg = "write to read-only memory";
 295                 else
 296                         msg = "read from unreadable memory";
 297         } else if (addr < PAGE_SIZE) {
 298                 msg = "NULL pointer dereference";
 299         } else {
 300                 msg = "paging request";
 301         }
 302
 303         die_kernel_fault(msg, addr, esr, regs);
 304 }
 305
 306 static void __do_user_fault(struct siginfo *info, unsigned int esr)
 307 {
 308         current->thread.fault_address = (unsigned long)info->si_addr;
 309
 310         /*
 311          * If the faulting address is in the kernel, we must sanitize the ESR.
 312          * From userspace's point of view, kernel-only mappings don't exist
 313          * at all, so we report them as level 0 translation faults.
 314          * (This is not quite the way that "no mapping there at all" behaves:
 315          * an alignment fault not caused by the memory type would take
 316          * precedence over translation fault for a real access to empty
 317          * space. Unfortunately we can't easily distinguish "alignment fault
 318          * not caused by memory type" from "alignment fault caused by memory
 319          * type", so we ignore this wrinkle and just return the translation
 320          * fault.)
 321          */
 322         if (current->thread.fault_address >= TASK_SIZE) {
 323                 switch (ESR_ELx_EC(esr)) {
 324                 case ESR_ELx_EC_DABT_LOW:
 325                         /*
 326                          * These bits provide only information about the
 327                          * faulting instruction, which userspace knows already.
 328                          * We explicitly clear bits which are architecturally
 329                          * RES0 in case they are given meanings in future.
 330                          * We always report the ESR as if the fault was taken
 331                          * to EL1 and so ISV and the bits in ISS[23:14] are
 332                          * clear. (In fact it always will be a fault to EL1.)
 333                          */
 334                         esr &= ESR_ELx_EC_MASK | ESR_ELx_IL |
 335                                 ESR_ELx_CM | ESR_ELx_WNR;
 336                         esr |= ESR_ELx_FSC_FAULT;
 337                         break;
 338                 case ESR_ELx_EC_IABT_LOW:
 339                         /*
 340                          * Claim a level 0 translation fault.
 341                          * All other bits are architecturally RES0 for faults
 342                          * reported with that DFSC value, so we clear them.
 343                          */
 344                         esr &= ESR_ELx_EC_MASK | ESR_ELx_IL;
 345                         esr |= ESR_ELx_FSC_FAULT;
 346                         break;
 347                 default:
 348                         /*
 349                          * This should never happen (entry.S only brings us
 350                          * into this code for insn and data aborts from a lower
 351                          * exception level). Fail safe by not providing an ESR
 352                          * context record at all.
 353                          */
 354                         WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
 355                         esr = 0;
 356                         break;
 357                 }
 358         }
 359
 360         current->thread.fault_code = esr;
 361         arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current);
 362 }
 363
 364 static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 365 {
 366         /*
 367          * If we are in kernel mode at this point, we have no context to
 368          * handle this fault with.
 369          */
 370         if (user_mode(regs)) {
 371                 const struct fault_info *inf = esr_to_fault_info(esr);
 372                 struct siginfo si;
 373
 374                 clear_siginfo(&si);
 375                 si.si_signo     = inf->sig;
 376                 si.si_code      = inf->code;
 377                 si.si_addr      = (void __user *)addr;
 378
 379                 __do_user_fault(&si, esr);
 380         } else {
 381                 __do_kernel_fault(addr, esr, regs);
 382         }
 383 }
 384
 385 #define VM_FAULT_BADMAP         0x010000
 386 #define VM_FAULT_BADACCESS      0x020000
 387
 388 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
 389                            unsigned int mm_flags, unsigned long vm_flags,
 390                            struct task_struct *tsk)
 391 {
 392         struct vm_area_struct *vma;
 393         vm_fault_t fault;
 394
 395         vma = find_vma(mm, addr);
 396         fault = VM_FAULT_BADMAP;
 397         if (unlikely(!vma))
 398                 goto out;
 399         if (unlikely(vma->vm_start > addr))
 400                 goto check_stack;
 401
 402         /*
 403          * Ok, we have a good vm_area for this memory access, so we can handle
 404          * it.
 405          */
 406 good_area:
 407         /*
 408          * Check that the permissions on the VMA allow for the fault which
 409          * occurred.
 410          */
 411         if (!(vma->vm_flags & vm_flags)) {
 412                 fault = VM_FAULT_BADACCESS;
 413                 goto out;
 414         }
 415
 416         return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
 417
 418 check_stack:
 419         if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
 420                 goto good_area;
 421 out:
 422         return fault;
 423 }
 424
 425 static bool is_el0_instruction_abort(unsigned int esr)
 426 {
 427         return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
 428 }
 429
 430 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 431                                    struct pt_regs *regs)
 432 {
 433         struct task_struct *tsk;
 434         struct mm_struct *mm;
 435         struct siginfo si;
 436         vm_fault_t fault, major = 0;
 437         unsigned long vm_flags = VM_READ | VM_WRITE;
 438         unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 439
 440         if (notify_page_fault(regs, esr))
 441                 return 0;
 442
 443         tsk = current;
 444         mm  = tsk->mm;
 445
 446         /*
 447          * If we're in an interrupt or have no user context, we must not take
 448          * the fault.
 449          */
 450         if (faulthandler_disabled() || !mm)
 451                 goto no_context;
 452
 453         if (user_mode(regs))
 454                 mm_flags |= FAULT_FLAG_USER;
 455
 456         if (is_el0_instruction_abort(esr)) {
 457                 vm_flags = VM_EXEC;
 458         } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
 459                 vm_flags = VM_WRITE;
 460                 mm_flags |= FAULT_FLAG_WRITE;
 461         }
 462
 463         if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) {
 464                 /* regs->orig_addr_limit may be 0 if we entered from EL0 */
 465                 if (regs->orig_addr_limit == KERNEL_DS)
 466                         die_kernel_fault("access to user memory with fs=KERNEL_DS",
 467                                          addr, esr, regs);
 468
 469                 if (is_el1_instruction_abort(esr))
 470                         die_kernel_fault("execution of user memory",
 471                                          addr, esr, regs);
 472
 473                 if (!search_exception_tables(regs->pc))
 474                         die_kernel_fault("access to user memory outside uaccess routines",
 475                                          addr, esr, regs);
 476         }
 477
 478         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 479
 480         /*
 481          * As per x86, we may deadlock here. However, since the kernel only
 482          * validly references user space from well defined areas of the code,
 483          * we can bug out early if this is from code which shouldn't.
 484          */
 485         if (!down_read_trylock(&mm->mmap_sem)) {
 486                 if (!user_mode(regs) && !search_exception_tables(regs->pc))
 487                         goto no_context;
 488 retry:
 489                 down_read(&mm->mmap_sem);
 490         } else {
 491                 /*
 492                  * The above down_read_trylock() might have succeeded in which
 493                  * case, we'll have missed the might_sleep() from down_read().
 494                  */
 495                 might_sleep();
 496 #ifdef CONFIG_DEBUG_VM
 497                 if (!user_mode(regs) && !search_exception_tables(regs->pc))
 498                         goto no_context;
 499 #endif
 500         }
 501
 502         fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
 503         major |= fault & VM_FAULT_MAJOR;
 504
 505         if (fault & VM_FAULT_RETRY) {
 506                 /*
 507                  * If we need to retry but a fatal signal is pending,
 508                  * handle the signal first. We do not need to release
 509                  * the mmap_sem because it would already be released
 510                  * in __lock_page_or_retry in mm/filemap.c.
 511                  */
 512                 if (fatal_signal_pending(current)) {
 513                         if (!user_mode(regs))
 514                                 goto no_context;
 515                         return 0;
 516                 }
 517
 518                 /*
 519                  * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
 520                  * starvation.
 521                  */
 522                 if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
 523                         mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
 524                         mm_flags |= FAULT_FLAG_TRIED;
 525                         goto retry;
 526                 }
 527         }
 528         up_read(&mm->mmap_sem);
 529
 530         /*
 531          * Handle the "normal" (no error) case first.
 532          */
 533         if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
 534                               VM_FAULT_BADACCESS)))) {
 535                 /*
 536                  * Major/minor page fault accounting is only done
 537                  * once. If we go through a retry, it is extremely
 538                  * likely that the page will be found in page cache at
 539                  * that point.
 540                  */
 541                 if (major) {
 542                         tsk->maj_flt++;
 543                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
 544                                       addr);
 545                 } else {
 546                         tsk->min_flt++;
 547                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
 548                                       addr);
 549                 }
 550
 551                 return 0;
 552         }
 553
 554         /*
 555          * If we are in kernel mode at this point, we have no context to
 556          * handle this fault with.
 557          */
 558         if (!user_mode(regs))
 559                 goto no_context;
 560
 561         if (fault & VM_FAULT_OOM) {
 562                 /*
 563                  * We ran out of memory, call the OOM killer, and return to
 564                  * userspace (which will retry the fault, or kill us if we got
 565                  * oom-killed).
 566                  */
 567                 pagefault_out_of_memory();
 568                 return 0;
 569         }
 570
 571         clear_siginfo(&si);
 572         si.si_addr = (void __user *)addr;
 573
 574         if (fault & VM_FAULT_SIGBUS) {
 575                 /*
 576                  * We had some memory, but were unable to successfully fix up
 577                  * this page fault.
 578                  */
 579                 si.si_signo     = SIGBUS;
 580                 si.si_code      = BUS_ADRERR;
 581         } else if (fault & VM_FAULT_HWPOISON_LARGE) {
 582                 unsigned int hindex = VM_FAULT_GET_HINDEX(fault);
 583
 584                 si.si_signo     = SIGBUS;
 585                 si.si_code      = BUS_MCEERR_AR;
 586                 si.si_addr_lsb  = hstate_index_to_shift(hindex);
 587         } else if (fault & VM_FAULT_HWPOISON) {
 588                 si.si_signo     = SIGBUS;
 589                 si.si_code      = BUS_MCEERR_AR;
 590                 si.si_addr_lsb  = PAGE_SHIFT;
 591         } else {
 592                 /*
 593                  * Something tried to access memory that isn't in our memory
 594                  * map.
 595                  */
 596                 si.si_signo     = SIGSEGV;
 597                 si.si_code      = fault == VM_FAULT_BADACCESS ?
 598                                   SEGV_ACCERR : SEGV_MAPERR;
 599         }
 600
 601         __do_user_fault(&si, esr);
 602         return 0;
 603
 604 no_context:
 605         __do_kernel_fault(addr, esr, regs);
 606         return 0;
 607 }
 608
 609 static int __kprobes do_translation_fault(unsigned long addr,
 610                                           unsigned int esr,
 611                                           struct pt_regs *regs)
 612 {
 613         if (addr < TASK_SIZE)
 614                 return do_page_fault(addr, esr, regs);
 615
 616         do_bad_area(addr, esr, regs);
 617         return 0;
 618 }
 619
 620 static int do_alignment_fault(unsigned long addr, unsigned int esr,
 621                               struct pt_regs *regs)
 622 {
 623         do_bad_area(addr, esr, regs);
 624         return 0;
 625 }
 626
 627 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 628 {
 629         return 1; /* "fault" */
 630 }
 631
 632 static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 633 {
 634         struct siginfo info;
 635         const struct fault_info *inf;
 636
 637         inf = esr_to_fault_info(esr);
 638
 639         /*
 640          * Synchronous aborts may interrupt code which had interrupts masked.
 641          * Before calling out into the wider kernel tell the interested
 642          * subsystems.
 643          */
 644         if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
 645                 if (interrupts_enabled(regs))
 646                         nmi_enter();
 647
 648                 ghes_notify_sea();
 649
 650                 if (interrupts_enabled(regs))
 651                         nmi_exit();
 652         }
 653
 654         clear_siginfo(&info);
 655         info.si_signo = inf->sig;
 656         info.si_errno = 0;
 657         info.si_code  = inf->code;
 658         if (esr & ESR_ELx_FnV)
 659                 info.si_addr = NULL;
 660         else
 661                 info.si_addr  = (void __user *)addr;
 662         arm64_notify_die(inf->name, regs, &info, esr);
 663
 664         return 0;
 665 }
 666
 667 static const struct fault_info fault_info[] = {
 668         { do_bad,               SIGKILL, SI_KERNEL,     "ttbr address size fault"       },
 669         { do_bad,               SIGKILL, SI_KERNEL,     "level 1 address size fault"    },
 670         { do_bad,               SIGKILL, SI_KERNEL,     "level 2 address size fault"    },
 671         { do_bad,               SIGKILL, SI_KERNEL,     "level 3 address size fault"    },
 672         { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 0 translation fault"     },
 673         { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 1 translation fault"     },
 674         { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 2 translation fault"     },
 675         { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
 676         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 8"                     },
 677         { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 access flag fault"     },
 678         { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 access flag fault"     },
 679         { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 3 access flag fault"     },
 680         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 12"                    },
 681         { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 permission fault"      },
 682         { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 permission fault"      },
 683         { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 3 permission fault"      },
 684         { do_sea,               SIGBUS,  BUS_OBJERR,    "synchronous external abort"    },
 685         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 17"                    },
 686         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 18"                    },
 687         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 19"                    },
 688         { do_sea,               SIGKILL, SI_KERNEL,     "level 0 (translation table walk)"      },
 689         { do_sea,               SIGKILL, SI_KERNEL,     "level 1 (translation table walk)"      },
 690         { do_sea,               SIGKILL, SI_KERNEL,     "level 2 (translation table walk)"      },
 691         { do_sea,               SIGKILL, SI_KERNEL,     "level 3 (translation table walk)"      },
 692         { do_sea,               SIGBUS,  BUS_OBJERR,    "synchronous parity or ECC error" },    // Reserved when RAS is implemented
 693         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 25"                    },
 694         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 26"                    },
 695         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 27"                    },
 696         { do_sea,               SIGKILL, SI_KERNEL,     "level 0 synchronous parity error (translation table walk)"     },      // Reserved when RAS is implemented
 697         { do_sea,               SIGKILL, SI_KERNEL,     "level 1 synchronous parity error (translation table walk)"     },      // Reserved when RAS is implemented
 698         { do_sea,               SIGKILL, SI_KERNEL,     "level 2 synchronous parity error (translation table walk)"     },      // Reserved when RAS is implemented
 699         { do_sea,               SIGKILL, SI_KERNEL,     "level 3 synchronous parity error (translation table walk)"     },      // Reserved when RAS is implemented
 700         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 32"                    },
 701         { do_alignment_fault,   SIGBUS,  BUS_ADRALN,    "alignment fault"               },
 702         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 34"                    },
 703         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 35"                    },
 704         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 36"                    },
 705         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 37"                    },
 706         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 38"                    },
 707         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 39"                    },
 708         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 40"                    },
 709         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 41"                    },
 710         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 42"                    },
 711         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 43"                    },
 712         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 44"                    },
 713         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 45"                    },
 714         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 46"                    },
 715         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 47"                    },
 716         { do_bad,               SIGKILL, SI_KERNEL,     "TLB conflict abort"            },
 717         { do_bad,               SIGKILL, SI_KERNEL,     "Unsupported atomic hardware update fault"      },
 718         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 50"                    },
 719         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 51"                    },
 720         { do_bad,               SIGKILL, SI_KERNEL,     "implementation fault (lockdown abort)" },
 721         { do_bad,               SIGBUS,  BUS_OBJERR,    "implementation fault (unsupported exclusive)" },
 722         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 54"                    },
 723         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 55"                    },
 724         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 56"                    },
 725         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 57"                    },
 726         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 58"                    },
 727         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 59"                    },
 728         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 60"                    },
 729         { do_bad,               SIGKILL, SI_KERNEL,     "section domain fault"          },
 730         { do_bad,               SIGKILL, SI_KERNEL,     "page domain fault"             },
 731         { do_bad,               SIGKILL, SI_KERNEL,     "unknown 63"                    },
 732 };
 733
 734 int handle_guest_sea(phys_addr_t addr, unsigned int esr)
 735 {
 736         return ghes_notify_sea();
 737 }
 738
 739 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 740                                          struct pt_regs *regs)
 741 {
 742         const struct fault_info *inf = esr_to_fault_info(esr);
 743         struct siginfo info;
 744
 745         if (!inf->fn(addr, esr, regs))
 746                 return;
 747
 748         if (!user_mode(regs)) {
 749                 pr_alert("Unhandled fault at 0x%016lx\n", addr);
 750                 mem_abort_decode(esr);
 751                 show_pte(addr);
 752         }
 753
 754         clear_siginfo(&info);
 755         info.si_signo = inf->sig;
 756         info.si_errno = 0;
 757         info.si_code  = inf->code;
 758         info.si_addr  = (void __user *)addr;
 759         arm64_notify_die(inf->name, regs, &info, esr);
 760 }
 761
 762 asmlinkage void __exception do_el0_irq_bp_hardening(void)
 763 {
 764         /* PC has already been checked in entry.S */
 765         arm64_apply_bp_hardening();
 766 }
 767
 768 asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
 769                                                    unsigned int esr,
 770                                                    struct pt_regs *regs)
 771 {
 772         /*
 773          * We've taken an instruction abort from userspace and not yet
 774          * re-enabled IRQs. If the address is a kernel address, apply
 775          * BP hardening prior to enabling IRQs and pre-emption.
 776          */
 777         if (addr > TASK_SIZE)
 778                 arm64_apply_bp_hardening();
 779
 780         local_daif_restore(DAIF_PROCCTX);
 781         do_mem_abort(addr, esr, regs);
 782 }
 783
 784
 785 asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 786                                            unsigned int esr,
 787                                            struct pt_regs *regs)
 788 {
 789         struct siginfo info;
 790
 791         if (user_mode(regs)) {
 792                 if (instruction_pointer(regs) > TASK_SIZE)
 793                         arm64_apply_bp_hardening();
 794                 local_daif_restore(DAIF_PROCCTX);
 795         }
 796
 797         clear_siginfo(&info);
 798         info.si_signo = SIGBUS;
 799         info.si_errno = 0;
 800         info.si_code  = BUS_ADRALN;
 801         info.si_addr  = (void __user *)addr;
 802         arm64_notify_die("SP/PC alignment exception", regs, &info, esr);
 803 }
 804
 805 int __init early_brk64(unsigned long addr, unsigned int esr,
 806                        struct pt_regs *regs);
 807
 808 /*
 809  * __refdata because early_brk64 is __init, but the reference to it is
 810  * clobbered at arch_initcall time.
 811  * See traps.c and debug-monitors.c:debug_traps_init().
 812  */
 813 static struct fault_info __refdata debug_fault_info[] = {
 814         { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware breakpoint"   },
 815         { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware single-step"  },
 816         { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware watchpoint"   },
 817         { do_bad,       SIGKILL,        SI_KERNEL,      "unknown 3"             },
 818         { do_bad,       SIGTRAP,        TRAP_BRKPT,     "aarch32 BKPT"          },
 819         { do_bad,       SIGKILL,        SI_KERNEL,      "aarch32 vector catch"  },
 820         { early_brk64,  SIGTRAP,        TRAP_BRKPT,     "aarch64 BRK"           },
 821         { do_bad,       SIGKILL,        SI_KERNEL,      "unknown 7"             },
 822 };
 823
 824 void __init hook_debug_fault_code(int nr,
 825                                   int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 826                                   int sig, int code, const char *name)
 827 {
 828         BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
 829
 830         debug_fault_info[nr].fn         = fn;
 831         debug_fault_info[nr].sig        = sig;
 832         debug_fault_info[nr].code       = code;
 833         debug_fault_info[nr].name       = name;
 834 }
 835
 836 asmlinkage int __exception do_debug_exception(unsigned long addr,
 837                                               unsigned int esr,
 838                                               struct pt_regs *regs)
 839 {
 840         const struct fault_info *inf = esr_to_debug_fault_info(esr);
 841         int rv;
 842
 843         /*
 844          * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
 845          * already disabled to preserve the last enabled/disabled addresses.
 846          */
 847         if (interrupts_enabled(regs))
 848                 trace_hardirqs_off();
 849
 850         if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
 851                 arm64_apply_bp_hardening();
 852
 853         if (!inf->fn(addr, esr, regs)) {
 854                 rv = 1;
 855         } else {
 856                 struct siginfo info;
 857
 858                 clear_siginfo(&info);
 859                 info.si_signo = inf->sig;
 860                 info.si_errno = 0;
 861                 info.si_code  = inf->code;
 862                 info.si_addr  = (void __user *)addr;
 863                 arm64_notify_die(inf->name, regs, &info, esr);
 864                 rv = 0;
 865         }
 866
 867         if (interrupts_enabled(regs))
 868                 trace_hardirqs_on();
 869
 870         return rv;
 871 }
 872 NOKPROBE_SYMBOL(do_debug_exception);