2 * TLB flush routines for radix kernels.
4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/hugetlb.h>
14 #include <linux/memblock.h>
16 #include <asm/ppc-opcode.h>
18 #include <asm/tlbflush.h>
19 #include <asm/trace.h>
20 #include <asm/cputhreads.h>
22 #define RIC_FLUSH_TLB 0
23 #define RIC_FLUSH_PWC 1
24 #define RIC_FLUSH_ALL 2
27 * tlbiel instruction for radix, set invalidation
28 * i.e., r=1 and is=01 or is=10 or is=11
30 static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
32 unsigned int ric, unsigned int prs)
36 unsigned int r = 1; /* radix format */
38 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
39 rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
41 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
42 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
46 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
50 asm volatile("ptesync": : :"memory");
53 * Flush the first set of the TLB, and the entire Page Walk Cache
54 * and partition table entries. Then flush the remaining sets of the
57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
58 for (set = 1; set < num_sets; set++)
59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
61 /* Do the same for process scoped entries. */
62 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
63 for (set = 1; set < num_sets; set++)
64 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
66 asm volatile("ptesync": : :"memory");
69 void radix__tlbiel_all(unsigned int action)
74 case TLB_INVAL_SCOPE_GLOBAL:
77 case TLB_INVAL_SCOPE_LPID:
84 if (early_cpu_has_feature(CPU_FTR_ARCH_300))
85 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
87 WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
89 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
92 static inline void __tlbiel_pid(unsigned long pid, int set,
95 unsigned long rb,rs,prs,r;
97 rb = PPC_BIT(53); /* IS = 1 */
98 rb |= set << PPC_BITLSHIFT(51);
99 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
100 prs = 1; /* process scoped */
101 r = 1; /* radix format */
103 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
104 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
105 trace_tlbie(0, 1, rb, rs, ric, prs, r);
108 static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
110 unsigned long rb,rs,prs,r;
112 rb = PPC_BIT(53); /* IS = 1 */
113 rs = pid << PPC_BITLSHIFT(31);
114 prs = 1; /* process scoped */
115 r = 1; /* radix format */
117 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
118 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
119 trace_tlbie(0, 0, rb, rs, ric, prs, r);
122 static inline void __tlbiel_va(unsigned long va, unsigned long pid,
123 unsigned long ap, unsigned long ric)
125 unsigned long rb,rs,prs,r;
127 rb = va & ~(PPC_BITMASK(52, 63));
128 rb |= ap << PPC_BITLSHIFT(58);
129 rs = pid << PPC_BITLSHIFT(31);
130 prs = 1; /* process scoped */
131 r = 1; /* radix format */
133 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
134 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
135 trace_tlbie(0, 1, rb, rs, ric, prs, r);
138 static inline void __tlbie_va(unsigned long va, unsigned long pid,
139 unsigned long ap, unsigned long ric)
141 unsigned long rb,rs,prs,r;
143 rb = va & ~(PPC_BITMASK(52, 63));
144 rb |= ap << PPC_BITLSHIFT(58);
145 rs = pid << PPC_BITLSHIFT(31);
146 prs = 1; /* process scoped */
147 r = 1; /* radix format */
149 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
150 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
151 trace_tlbie(0, 0, rb, rs, ric, prs, r);
154 static inline void fixup_tlbie(void)
156 unsigned long pid = 0;
157 unsigned long va = ((1UL << 52) - 1);
159 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
160 asm volatile("ptesync": : :"memory");
161 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
166 * We use 128 set in radix mode and 256 set in hpt mode.
168 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
172 asm volatile("ptesync": : :"memory");
175 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
176 * also flush the entire Page Walk Cache.
178 __tlbiel_pid(pid, 0, ric);
180 /* For PWC, only one flush is needed */
181 if (ric == RIC_FLUSH_PWC) {
182 asm volatile("ptesync": : :"memory");
186 /* For the remaining sets, just flush the TLB */
187 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
188 __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
190 asm volatile("ptesync": : :"memory");
191 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
194 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
196 asm volatile("ptesync": : :"memory");
199 * Workaround the fact that the "ric" argument to __tlbie_pid
200 * must be a compile-time contraint to match the "i" constraint
201 * in the asm statement.
205 __tlbie_pid(pid, RIC_FLUSH_TLB);
208 __tlbie_pid(pid, RIC_FLUSH_PWC);
212 __tlbie_pid(pid, RIC_FLUSH_ALL);
215 asm volatile("eieio; tlbsync; ptesync": : :"memory");
218 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
219 unsigned long pid, unsigned long page_size,
223 unsigned long ap = mmu_get_ap(psize);
225 for (addr = start; addr < end; addr += page_size)
226 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
229 static inline void _tlbiel_va(unsigned long va, unsigned long pid,
230 unsigned long psize, unsigned long ric)
232 unsigned long ap = mmu_get_ap(psize);
234 asm volatile("ptesync": : :"memory");
235 __tlbiel_va(va, pid, ap, ric);
236 asm volatile("ptesync": : :"memory");
239 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
240 unsigned long pid, unsigned long page_size,
241 unsigned long psize, bool also_pwc)
243 asm volatile("ptesync": : :"memory");
245 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
246 __tlbiel_va_range(start, end, pid, page_size, psize);
247 asm volatile("ptesync": : :"memory");
250 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
251 unsigned long pid, unsigned long page_size,
255 unsigned long ap = mmu_get_ap(psize);
257 for (addr = start; addr < end; addr += page_size)
258 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
261 static inline void _tlbie_va(unsigned long va, unsigned long pid,
262 unsigned long psize, unsigned long ric)
264 unsigned long ap = mmu_get_ap(psize);
266 asm volatile("ptesync": : :"memory");
267 __tlbie_va(va, pid, ap, ric);
269 asm volatile("eieio; tlbsync; ptesync": : :"memory");
272 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
273 unsigned long pid, unsigned long page_size,
274 unsigned long psize, bool also_pwc)
276 asm volatile("ptesync": : :"memory");
278 __tlbie_pid(pid, RIC_FLUSH_PWC);
279 __tlbie_va_range(start, end, pid, page_size, psize);
281 asm volatile("eieio; tlbsync; ptesync": : :"memory");
285 * Base TLB flushing operations:
287 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
288 * - flush_tlb_page(vma, vmaddr) flushes one page
289 * - flush_tlb_range(vma, start, end) flushes a range of pages
290 * - flush_tlb_kernel_range(start, end) flushes kernel pages
292 * - local_* variants of page and mm only apply to the current
295 void radix__local_flush_tlb_mm(struct mm_struct *mm)
300 pid = mm->context.id;
301 if (pid != MMU_NO_CONTEXT)
302 _tlbiel_pid(pid, RIC_FLUSH_TLB);
305 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
308 void radix__local_flush_all_mm(struct mm_struct *mm)
313 pid = mm->context.id;
314 if (pid != MMU_NO_CONTEXT)
315 _tlbiel_pid(pid, RIC_FLUSH_ALL);
318 EXPORT_SYMBOL(radix__local_flush_all_mm);
319 #endif /* CONFIG_SMP */
321 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
327 pid = mm->context.id;
328 if (pid != MMU_NO_CONTEXT)
329 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
333 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
335 #ifdef CONFIG_HUGETLB_PAGE
336 /* need the return fix for nohash.c */
337 if (is_vm_hugetlb_page(vma))
338 return radix__local_flush_hugetlb_page(vma, vmaddr);
340 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
342 EXPORT_SYMBOL(radix__local_flush_tlb_page);
344 static bool mm_needs_flush_escalation(struct mm_struct *mm)
347 * P9 nest MMU has issues with the page walk cache
348 * caching PTEs and not flushing them properly when
349 * RIC = 0 for a PID/LPID invalidate
351 return atomic_read(&mm->context.copros) != 0;
355 void radix__flush_tlb_mm(struct mm_struct *mm)
359 pid = mm->context.id;
360 if (unlikely(pid == MMU_NO_CONTEXT))
364 if (!mm_is_thread_local(mm)) {
365 if (mm_needs_flush_escalation(mm))
366 _tlbie_pid(pid, RIC_FLUSH_ALL);
368 _tlbie_pid(pid, RIC_FLUSH_TLB);
370 _tlbiel_pid(pid, RIC_FLUSH_TLB);
373 EXPORT_SYMBOL(radix__flush_tlb_mm);
375 void radix__flush_all_mm(struct mm_struct *mm)
379 pid = mm->context.id;
380 if (unlikely(pid == MMU_NO_CONTEXT))
384 if (!mm_is_thread_local(mm))
385 _tlbie_pid(pid, RIC_FLUSH_ALL);
387 _tlbiel_pid(pid, RIC_FLUSH_ALL);
390 EXPORT_SYMBOL(radix__flush_all_mm);
392 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
394 tlb->need_flush_all = 1;
396 EXPORT_SYMBOL(radix__flush_tlb_pwc);
398 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
403 pid = mm->context.id;
404 if (unlikely(pid == MMU_NO_CONTEXT))
408 if (!mm_is_thread_local(mm))
409 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
411 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
415 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
417 #ifdef CONFIG_HUGETLB_PAGE
418 if (is_vm_hugetlb_page(vma))
419 return radix__flush_hugetlb_page(vma, vmaddr);
421 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
423 EXPORT_SYMBOL(radix__flush_tlb_page);
425 #else /* CONFIG_SMP */
426 #define radix__flush_all_mm radix__local_flush_all_mm
427 #endif /* CONFIG_SMP */
429 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
431 _tlbie_pid(0, RIC_FLUSH_ALL);
433 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
435 #define TLB_FLUSH_ALL -1UL
438 * Number of pages above which we invalidate the entire PID rather than
439 * flush individual pages, for local and global flushes respectively.
441 * tlbie goes out to the interconnect and individual ops are more costly.
442 * It also does not iterate over sets like the local tlbiel variant when
443 * invalidating a full PID, so it has a far lower threshold to change from
444 * individual page flushes to full-pid flushes.
446 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
447 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
449 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
453 struct mm_struct *mm = vma->vm_mm;
455 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
456 unsigned long page_size = 1UL << page_shift;
457 unsigned long nr_pages = (end - start) >> page_shift;
460 #ifdef CONFIG_HUGETLB_PAGE
461 if (is_vm_hugetlb_page(vma))
462 return radix__flush_hugetlb_tlb_range(vma, start, end);
465 pid = mm->context.id;
466 if (unlikely(pid == MMU_NO_CONTEXT))
470 if (mm_is_thread_local(mm)) {
472 full = (end == TLB_FLUSH_ALL ||
473 nr_pages > tlb_local_single_page_flush_ceiling);
476 full = (end == TLB_FLUSH_ALL ||
477 nr_pages > tlb_single_page_flush_ceiling);
482 _tlbiel_pid(pid, RIC_FLUSH_TLB);
484 if (mm_needs_flush_escalation(mm))
485 _tlbie_pid(pid, RIC_FLUSH_ALL);
487 _tlbie_pid(pid, RIC_FLUSH_TLB);
491 unsigned long hstart, hend;
493 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
494 hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
495 hend = end >> HPAGE_PMD_SHIFT;
497 hstart <<= HPAGE_PMD_SHIFT;
498 hend <<= HPAGE_PMD_SHIFT;
503 asm volatile("ptesync": : :"memory");
505 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
507 __tlbiel_va_range(hstart, hend, pid,
508 HPAGE_PMD_SIZE, MMU_PAGE_2M);
509 asm volatile("ptesync": : :"memory");
511 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
513 __tlbie_va_range(hstart, hend, pid,
514 HPAGE_PMD_SIZE, MMU_PAGE_2M);
516 asm volatile("eieio; tlbsync; ptesync": : :"memory");
521 EXPORT_SYMBOL(radix__flush_tlb_range);
523 static int radix_get_mmu_psize(int page_size)
527 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
528 psize = mmu_virtual_psize;
529 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
531 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
538 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
539 unsigned long end, int psize);
541 void radix__tlb_flush(struct mmu_gather *tlb)
544 struct mm_struct *mm = tlb->mm;
545 int page_size = tlb->page_size;
548 * if page size is not something we understand, do a full mm flush
550 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
551 * that flushes the process table entry cache upon process teardown.
552 * See the comment for radix in arch_exit_mmap().
555 radix__flush_all_mm(mm);
556 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
557 if (!tlb->need_flush_all)
558 radix__flush_tlb_mm(mm);
560 radix__flush_all_mm(mm);
562 unsigned long start = tlb->start;
563 unsigned long end = tlb->end;
565 if (!tlb->need_flush_all)
566 radix__flush_tlb_range_psize(mm, start, end, psize);
568 radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
570 tlb->need_flush_all = 0;
573 static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
574 unsigned long start, unsigned long end,
575 int psize, bool also_pwc)
578 unsigned int page_shift = mmu_psize_defs[psize].shift;
579 unsigned long page_size = 1UL << page_shift;
580 unsigned long nr_pages = (end - start) >> page_shift;
583 pid = mm->context.id;
584 if (unlikely(pid == MMU_NO_CONTEXT))
588 if (mm_is_thread_local(mm)) {
590 full = (end == TLB_FLUSH_ALL ||
591 nr_pages > tlb_local_single_page_flush_ceiling);
594 full = (end == TLB_FLUSH_ALL ||
595 nr_pages > tlb_single_page_flush_ceiling);
599 if (!local && mm_needs_flush_escalation(mm))
603 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
605 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
608 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
610 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
615 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
616 unsigned long end, int psize)
618 return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
621 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
622 unsigned long end, int psize)
624 __radix__flush_tlb_range_psize(mm, start, end, psize, true);
627 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
628 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
630 unsigned long pid, end;
632 pid = mm->context.id;
633 if (unlikely(pid == MMU_NO_CONTEXT))
636 /* 4k page size, just blow the world */
637 if (PAGE_SIZE == 0x1000) {
638 radix__flush_all_mm(mm);
642 end = addr + HPAGE_PMD_SIZE;
644 /* Otherwise first do the PWC, then iterate the pages. */
647 if (mm_is_thread_local(mm)) {
648 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
650 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
655 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
657 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
658 unsigned long start, unsigned long end)
660 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
662 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
664 void radix__flush_tlb_all(void)
666 unsigned long rb,prs,r,rs;
667 unsigned long ric = RIC_FLUSH_ALL;
669 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
670 prs = 0; /* partition scoped */
671 r = 1; /* radix format */
672 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
674 asm volatile("ptesync": : :"memory");
676 * now flush guest entries by passing PRS = 1 and LPID != 0
678 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
679 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
681 * now flush host entires by passing PRS = 0 and LPID == 0
683 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
684 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
685 asm volatile("eieio; tlbsync; ptesync": : :"memory");
688 void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
689 unsigned long address)
692 * We track page size in pte only for DD1, So we can
693 * call this only on DD1.
695 if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
700 if (old_pte & R_PAGE_LARGE)
701 radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
703 radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
706 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
707 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
709 unsigned int pid = mm->context.id;
711 if (unlikely(pid == MMU_NO_CONTEXT))
715 * If this context hasn't run on that CPU before and KVM is
716 * around, there's a slim chance that the guest on another
717 * CPU just brought in obsolete translation into the TLB of
718 * this CPU due to a bad prefetch using the guest PID on
719 * the way into the hypervisor.
721 * We work around this here. If KVM is possible, we check if
722 * any sibling thread is in KVM. If it is, the window may exist
723 * and thus we flush that PID from the core.
725 * A potential future improvement would be to mark which PIDs
726 * have never been used on the system and avoid it if the PID
727 * is new and the process has no other cpumask bit set.
729 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
730 int cpu = smp_processor_id();
731 int sib = cpu_first_thread_sibling(cpu);
734 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
737 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
741 _tlbiel_pid(pid, RIC_FLUSH_ALL);
744 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
745 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */