arch/nds32/mm/fault.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (C) 2005-2017 Andes Technology Corporation
   3
   4 #include <linux/extable.h>
   5 #include <linux/module.h>
   6 #include <linux/signal.h>
   7 #include <linux/ptrace.h>
   8 #include <linux/mm.h>
   9 #include <linux/init.h>
  10 #include <linux/hardirq.h>
  11 #include <linux/uaccess.h>
  12 #include <linux/perf_event.h>
  13
  14 #include <asm/tlbflush.h>
  15
  16 extern void die(const char *str, struct pt_regs *regs, long err);
  17
  18 /*
  19  * This is useful to dump out the page tables associated with
  20  * 'addr' in mm 'mm'.
  21  */
  22 void show_pte(struct mm_struct *mm, unsigned long addr)
  23 {
  24         pgd_t *pgd;
  25         if (!mm)
  26                 mm = &init_mm;
  27
  28         pr_alert("pgd = %p\n", mm->pgd);
  29         pgd = pgd_offset(mm, addr);
  30         pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
  31
  32         do {
  33                 p4d_t *p4d;
  34                 pud_t *pud;
  35                 pmd_t *pmd;
  36
  37                 if (pgd_none(*pgd))
  38                         break;
  39
  40                 if (pgd_bad(*pgd)) {
  41                         pr_alert("(bad)");
  42                         break;
  43                 }
  44
  45                 p4d = p4d_offset(pgd, addr);
  46                 pud = pud_offset(p4d, addr);
  47                 pmd = pmd_offset(pud, addr);
  48 #if PTRS_PER_PMD != 1
  49                 pr_alert(", *pmd=%08lx", pmd_val(*pmd));
  50 #endif
  51
  52                 if (pmd_none(*pmd))
  53                         break;
  54
  55                 if (pmd_bad(*pmd)) {
  56                         pr_alert("(bad)");
  57                         break;
  58                 }
  59
  60                 if (IS_ENABLED(CONFIG_HIGHMEM))
  61                 {
  62                         pte_t *pte;
  63                         /* We must not map this if we have highmem enabled */
  64                         pte = pte_offset_map(pmd, addr);
  65                         pr_alert(", *pte=%08lx", pte_val(*pte));
  66                         pte_unmap(pte);
  67                 }
  68         } while (0);
  69
  70         pr_alert("\n");
  71 }
  72
  73 void do_page_fault(unsigned long entry, unsigned long addr,
  74                    unsigned int error_code, struct pt_regs *regs)
  75 {
  76         struct task_struct *tsk;
  77         struct mm_struct *mm;
  78         struct vm_area_struct *vma;
  79         int si_code;
  80         vm_fault_t fault;
  81         unsigned int mask = VM_ACCESS_FLAGS;
  82         unsigned int flags = FAULT_FLAG_DEFAULT;
  83
  84         error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
  85         tsk = current;
  86         mm = tsk->mm;
  87         si_code = SEGV_MAPERR;
  88         /*
  89          * We fault-in kernel-space virtual memory on-demand. The
  90          * 'reference' page table is init_mm.pgd.
  91          *
  92          * NOTE! We MUST NOT take any locks for this case. We may
  93          * be in an interrupt or a critical region, and should
  94          * only copy the information from the master page table,
  95          * nothing more.
  96          */
  97         if (addr >= TASK_SIZE) {
  98                 if (user_mode(regs))
  99                         goto bad_area_nosemaphore;
 100
 101                 if (addr >= TASK_SIZE && addr < VMALLOC_END
 102                     && (entry == ENTRY_PTE_NOT_PRESENT))
 103                         goto vmalloc_fault;
 104                 else
 105                         goto no_context;
 106         }
 107
 108         /* Send a signal to the task for handling the unalignment access. */
 109         if (entry == ENTRY_GENERAL_EXCPETION
 110             && error_code == ETYPE_ALIGNMENT_CHECK) {
 111                 if (user_mode(regs))
 112                         goto bad_area_nosemaphore;
 113                 else
 114                         goto no_context;
 115         }
 116
 117         /*
 118          * If we're in an interrupt or have no user
 119          * context, we must not take the fault..
 120          */
 121         if (unlikely(faulthandler_disabled() || !mm))
 122                 goto no_context;
 123
 124         /*
 125          * As per x86, we may deadlock here. However, since the kernel only
 126          * validly references user space from well defined areas of the code,
 127          * we can bug out early if this is from code which shouldn't.
 128          */
 129         if (unlikely(!mmap_read_trylock(mm))) {
 130                 if (!user_mode(regs) &&
 131                     !search_exception_tables(instruction_pointer(regs)))
 132                         goto no_context;
 133 retry:
 134                 mmap_read_lock(mm);
 135         } else {
 136                 /*
 137                  * The above down_read_trylock() might have succeeded in which
 138                  * case, we'll have missed the might_sleep() from down_read().
 139                  */
 140                 might_sleep();
 141                 if (IS_ENABLED(CONFIG_DEBUG_VM)) {
 142                         if (!user_mode(regs) &&
 143                             !search_exception_tables(instruction_pointer(regs)))
 144                                 goto no_context;
 145                 }
 146         }
 147
 148         vma = find_vma(mm, addr);
 149
 150         if (unlikely(!vma))
 151                 goto bad_area;
 152
 153         if (vma->vm_start <= addr)
 154                 goto good_area;
 155
 156         if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
 157                 goto bad_area;
 158
 159         if (unlikely(expand_stack(vma, addr)))
 160                 goto bad_area;
 161
 162         /*
 163          * Ok, we have a good vm_area for this memory access, so
 164          * we can handle it..
 165          */
 166
 167 good_area:
 168         si_code = SEGV_ACCERR;
 169
 170         /* first do some preliminary protection checks */
 171         if (entry == ENTRY_PTE_NOT_PRESENT) {
 172                 if (error_code & ITYPE_mskINST)
 173                         mask = VM_EXEC;
 174                 else {
 175                         mask = VM_READ | VM_WRITE;
 176                 }
 177         } else if (entry == ENTRY_TLB_MISC) {
 178                 switch (error_code & ITYPE_mskETYPE) {
 179                 case RD_PROT:
 180                         mask = VM_READ;
 181                         break;
 182                 case WRT_PROT:
 183                         mask = VM_WRITE;
 184                         flags |= FAULT_FLAG_WRITE;
 185                         break;
 186                 case NOEXEC:
 187                         mask = VM_EXEC;
 188                         break;
 189                 case PAGE_MODIFY:
 190                         mask = VM_WRITE;
 191                         flags |= FAULT_FLAG_WRITE;
 192                         break;
 193                 case ACC_BIT:
 194                         BUG();
 195                 default:
 196                         break;
 197                 }
 198
 199         }
 200         if (!(vma->vm_flags & mask))
 201                 goto bad_area;
 202
 203         /*
 204          * If for any reason at all we couldn't handle the fault,
 205          * make sure we exit gracefully rather than endlessly redo
 206          * the fault.
 207          */
 208
 209         fault = handle_mm_fault(vma, addr, flags);
 210
 211         /*
 212          * If we need to retry but a fatal signal is pending, handle the
 213          * signal first. We do not need to release the mmap_lock because it
 214          * would already be released in __lock_page_or_retry in mm/filemap.c.
 215          */
 216         if (fault_signal_pending(fault, regs)) {
 217                 if (!user_mode(regs))
 218                         goto no_context;
 219                 return;
 220         }
 221
 222         if (unlikely(fault & VM_FAULT_ERROR)) {
 223                 if (fault & VM_FAULT_OOM)
 224                         goto out_of_memory;
 225                 else if (fault & VM_FAULT_SIGBUS)
 226                         goto do_sigbus;
 227                 else
 228                         goto bad_area;
 229         }
 230
 231         /*
 232          * Major/minor page fault accounting is only done on the initial
 233          * attempt. If we go through a retry, it is extremely likely that the
 234          * page will be found in page cache at that point.
 235          */
 236         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 237         if (flags & FAULT_FLAG_ALLOW_RETRY) {
 238                 if (fault & VM_FAULT_MAJOR) {
 239                         tsk->maj_flt++;
 240                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
 241                                       1, regs, addr);
 242                 } else {
 243                         tsk->min_flt++;
 244                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
 245                                       1, regs, addr);
 246                 }
 247                 if (fault & VM_FAULT_RETRY) {
 248                         flags |= FAULT_FLAG_TRIED;
 249
 250                         /* No need to mmap_read_unlock(mm) as we would
 251                          * have already released it in __lock_page_or_retry
 252                          * in mm/filemap.c.
 253                          */
 254                         goto retry;
 255                 }
 256         }
 257
 258         mmap_read_unlock(mm);
 259         return;
 260
 261         /*
 262          * Something tried to access memory that isn't in our memory map..
 263          * Fix it, but check if it's kernel or user first..
 264          */
 265 bad_area:
 266         mmap_read_unlock(mm);
 267
 268 bad_area_nosemaphore:
 269
 270         /* User mode accesses just cause a SIGSEGV */
 271
 272         if (user_mode(regs)) {
 273                 tsk->thread.address = addr;
 274                 tsk->thread.error_code = error_code;
 275                 tsk->thread.trap_no = entry;
 276                 force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
 277                 return;
 278         }
 279
 280 no_context:
 281
 282         /* Are we prepared to handle this kernel fault?
 283          *
 284          * (The kernel has valid exception-points in the source
 285          *  when it acesses user-memory. When it fails in one
 286          *  of those points, we find it in a table and do a jump
 287          *  to some fixup code that loads an appropriate error
 288          *  code)
 289          */
 290
 291         {
 292                 const struct exception_table_entry *entry;
 293
 294                 if ((entry =
 295                      search_exception_tables(instruction_pointer(regs))) !=
 296                     NULL) {
 297                         /* Adjust the instruction pointer in the stackframe */
 298                         instruction_pointer(regs) = entry->fixup;
 299                         return;
 300                 }
 301         }
 302
 303         /*
 304          * Oops. The kernel tried to access some bad page. We'll have to
 305          * terminate things with extreme prejudice.
 306          */
 307
 308         bust_spinlocks(1);
 309         pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 310                  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 311                  "paging request", addr);
 312
 313         show_pte(mm, addr);
 314         die("Oops", regs, error_code);
 315         bust_spinlocks(0);
 316         do_exit(SIGKILL);
 317
 318         return;
 319
 320         /*
 321          * We ran out of memory, or some other thing happened to us that made
 322          * us unable to handle the page fault gracefully.
 323          */
 324
 325 out_of_memory:
 326         mmap_read_unlock(mm);
 327         if (!user_mode(regs))
 328                 goto no_context;
 329         pagefault_out_of_memory();
 330         return;
 331
 332 do_sigbus:
 333         mmap_read_unlock(mm);
 334
 335         /* Kernel mode? Handle exceptions or die */
 336         if (!user_mode(regs))
 337                 goto no_context;
 338
 339         /*
 340          * Send a sigbus
 341          */
 342         tsk->thread.address = addr;
 343         tsk->thread.error_code = error_code;
 344         tsk->thread.trap_no = entry;
 345         force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
 346
 347         return;
 348
 349 vmalloc_fault:
 350         {
 351                 /*
 352                  * Synchronize this task's top level page-table
 353                  * with the 'reference' page table.
 354                  *
 355                  * Use current_pgd instead of tsk->active_mm->pgd
 356                  * since the latter might be unavailable if this
 357                  * code is executed in a misfortunately run irq
 358                  * (like inside schedule() between switch_mm and
 359                  *  switch_to...).
 360                  */
 361
 362                 unsigned int index = pgd_index(addr);
 363                 pgd_t *pgd, *pgd_k;
 364                 p4d_t *p4d, *p4d_k;
 365                 pud_t *pud, *pud_k;
 366                 pmd_t *pmd, *pmd_k;
 367                 pte_t *pte_k;
 368
 369                 pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index;
 370                 pgd_k = init_mm.pgd + index;
 371
 372                 if (!pgd_present(*pgd_k))
 373                         goto no_context;
 374
 375                 p4d = p4d_offset(pgd, addr);
 376                 p4d_k = p4d_offset(pgd_k, addr);
 377                 if (!p4d_present(*p4d_k))
 378                         goto no_context;
 379
 380                 pud = pud_offset(p4d, addr);
 381                 pud_k = pud_offset(p4d_k, addr);
 382                 if (!pud_present(*pud_k))
 383                         goto no_context;
 384
 385                 pmd = pmd_offset(pud, addr);
 386                 pmd_k = pmd_offset(pud_k, addr);
 387                 if (!pmd_present(*pmd_k))
 388                         goto no_context;
 389
 390                 if (!pmd_present(*pmd))
 391                         set_pmd(pmd, *pmd_k);
 392                 else
 393                         BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
 394
 395                 /*
 396                  * Since the vmalloc area is global, we don't
 397                  * need to copy individual PTE's, it is enough to
 398                  * copy the pgd pointer into the pte page of the
 399                  * root task. If that is there, we'll find our pte if
 400                  * it exists.
 401                  */
 402
 403                 /* Make sure the actual PTE exists as well to
 404                  * catch kernel vmalloc-area accesses to non-mapped
 405                  * addres. If we don't do this, this will just
 406                  * silently loop forever.
 407                  */
 408
 409                 pte_k = pte_offset_kernel(pmd_k, addr);
 410                 if (!pte_present(*pte_k))
 411                         goto no_context;
 412
 413                 return;
 414         }
 415 }