2 * TLB flush routines for radix kernels.
4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/hugetlb.h>
14 #include <linux/memblock.h>
16 #include <asm/ppc-opcode.h>
18 #include <asm/tlbflush.h>
19 #include <asm/trace.h>
20 #include <asm/cputhreads.h>
22 #define RIC_FLUSH_TLB 0
23 #define RIC_FLUSH_PWC 1
24 #define RIC_FLUSH_ALL 2
27 * tlbiel instruction for radix, set invalidation
28 * i.e., r=1 and is=01 or is=10 or is=11
30 static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
32 unsigned int ric, unsigned int prs)
37 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
38 rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
40 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
41 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
45 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
49 asm volatile("ptesync": : :"memory");
52 * Flush the first set of the TLB, and the entire Page Walk Cache
53 * and partition table entries. Then flush the remaining sets of the
56 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
57 for (set = 1; set < num_sets; set++)
58 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
60 /* Do the same for process scoped entries. */
61 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
62 for (set = 1; set < num_sets; set++)
63 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
65 asm volatile("ptesync": : :"memory");
68 void radix__tlbiel_all(unsigned int action)
73 case TLB_INVAL_SCOPE_GLOBAL:
76 case TLB_INVAL_SCOPE_LPID:
83 if (early_cpu_has_feature(CPU_FTR_ARCH_300))
84 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
86 WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
88 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
91 static inline void __tlbiel_pid(unsigned long pid, int set,
94 unsigned long rb,rs,prs,r;
96 rb = PPC_BIT(53); /* IS = 1 */
97 rb |= set << PPC_BITLSHIFT(51);
98 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
99 prs = 1; /* process scoped */
100 r = 1; /* radix format */
102 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
103 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
104 trace_tlbie(0, 1, rb, rs, ric, prs, r);
107 static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
109 unsigned long rb,rs,prs,r;
111 rb = PPC_BIT(53); /* IS = 1 */
112 rs = pid << PPC_BITLSHIFT(31);
113 prs = 1; /* process scoped */
114 r = 1; /* radix format */
116 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
117 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
118 trace_tlbie(0, 0, rb, rs, ric, prs, r);
121 static inline void __tlbiel_va(unsigned long va, unsigned long pid,
122 unsigned long ap, unsigned long ric)
124 unsigned long rb,rs,prs,r;
126 rb = va & ~(PPC_BITMASK(52, 63));
127 rb |= ap << PPC_BITLSHIFT(58);
128 rs = pid << PPC_BITLSHIFT(31);
129 prs = 1; /* process scoped */
130 r = 1; /* radix format */
132 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
134 trace_tlbie(0, 1, rb, rs, ric, prs, r);
137 static inline void __tlbie_va(unsigned long va, unsigned long pid,
138 unsigned long ap, unsigned long ric)
140 unsigned long rb,rs,prs,r;
142 rb = va & ~(PPC_BITMASK(52, 63));
143 rb |= ap << PPC_BITLSHIFT(58);
144 rs = pid << PPC_BITLSHIFT(31);
145 prs = 1; /* process scoped */
146 r = 1; /* radix format */
148 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
149 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
150 trace_tlbie(0, 0, rb, rs, ric, prs, r);
153 static inline void fixup_tlbie(void)
155 unsigned long pid = 0;
156 unsigned long va = ((1UL << 52) - 1);
158 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
159 asm volatile("ptesync": : :"memory");
160 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
165 * We use 128 set in radix mode and 256 set in hpt mode.
167 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
171 asm volatile("ptesync": : :"memory");
174 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
175 * also flush the entire Page Walk Cache.
177 __tlbiel_pid(pid, 0, ric);
179 /* For PWC, only one flush is needed */
180 if (ric == RIC_FLUSH_PWC) {
181 asm volatile("ptesync": : :"memory");
185 /* For the remaining sets, just flush the TLB */
186 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
187 __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
189 asm volatile("ptesync": : :"memory");
190 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
193 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
195 asm volatile("ptesync": : :"memory");
198 * Workaround the fact that the "ric" argument to __tlbie_pid
199 * must be a compile-time contraint to match the "i" constraint
200 * in the asm statement.
204 __tlbie_pid(pid, RIC_FLUSH_TLB);
207 __tlbie_pid(pid, RIC_FLUSH_PWC);
211 __tlbie_pid(pid, RIC_FLUSH_ALL);
214 asm volatile("eieio; tlbsync; ptesync": : :"memory");
217 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
218 unsigned long pid, unsigned long page_size,
222 unsigned long ap = mmu_get_ap(psize);
224 for (addr = start; addr < end; addr += page_size)
225 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
228 static inline void _tlbiel_va(unsigned long va, unsigned long pid,
229 unsigned long psize, unsigned long ric)
231 unsigned long ap = mmu_get_ap(psize);
233 asm volatile("ptesync": : :"memory");
234 __tlbiel_va(va, pid, ap, ric);
235 asm volatile("ptesync": : :"memory");
238 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
239 unsigned long pid, unsigned long page_size,
240 unsigned long psize, bool also_pwc)
242 asm volatile("ptesync": : :"memory");
244 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
245 __tlbiel_va_range(start, end, pid, page_size, psize);
246 asm volatile("ptesync": : :"memory");
249 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
250 unsigned long pid, unsigned long page_size,
254 unsigned long ap = mmu_get_ap(psize);
256 for (addr = start; addr < end; addr += page_size)
257 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
260 static inline void _tlbie_va(unsigned long va, unsigned long pid,
261 unsigned long psize, unsigned long ric)
263 unsigned long ap = mmu_get_ap(psize);
265 asm volatile("ptesync": : :"memory");
266 __tlbie_va(va, pid, ap, ric);
268 asm volatile("eieio; tlbsync; ptesync": : :"memory");
271 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
272 unsigned long pid, unsigned long page_size,
273 unsigned long psize, bool also_pwc)
275 asm volatile("ptesync": : :"memory");
277 __tlbie_pid(pid, RIC_FLUSH_PWC);
278 __tlbie_va_range(start, end, pid, page_size, psize);
280 asm volatile("eieio; tlbsync; ptesync": : :"memory");
284 * Base TLB flushing operations:
286 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
287 * - flush_tlb_page(vma, vmaddr) flushes one page
288 * - flush_tlb_range(vma, start, end) flushes a range of pages
289 * - flush_tlb_kernel_range(start, end) flushes kernel pages
291 * - local_* variants of page and mm only apply to the current
294 void radix__local_flush_tlb_mm(struct mm_struct *mm)
299 pid = mm->context.id;
300 if (pid != MMU_NO_CONTEXT)
301 _tlbiel_pid(pid, RIC_FLUSH_TLB);
304 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
307 void radix__local_flush_all_mm(struct mm_struct *mm)
312 pid = mm->context.id;
313 if (pid != MMU_NO_CONTEXT)
314 _tlbiel_pid(pid, RIC_FLUSH_ALL);
317 EXPORT_SYMBOL(radix__local_flush_all_mm);
318 #endif /* CONFIG_SMP */
320 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
326 pid = mm->context.id;
327 if (pid != MMU_NO_CONTEXT)
328 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
332 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
334 #ifdef CONFIG_HUGETLB_PAGE
335 /* need the return fix for nohash.c */
336 if (is_vm_hugetlb_page(vma))
337 return radix__local_flush_hugetlb_page(vma, vmaddr);
339 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
341 EXPORT_SYMBOL(radix__local_flush_tlb_page);
343 static bool mm_needs_flush_escalation(struct mm_struct *mm)
346 * P9 nest MMU has issues with the page walk cache
347 * caching PTEs and not flushing them properly when
348 * RIC = 0 for a PID/LPID invalidate
350 return atomic_read(&mm->context.copros) != 0;
354 void radix__flush_tlb_mm(struct mm_struct *mm)
358 pid = mm->context.id;
359 if (unlikely(pid == MMU_NO_CONTEXT))
363 if (!mm_is_thread_local(mm)) {
364 if (mm_needs_flush_escalation(mm))
365 _tlbie_pid(pid, RIC_FLUSH_ALL);
367 _tlbie_pid(pid, RIC_FLUSH_TLB);
369 _tlbiel_pid(pid, RIC_FLUSH_TLB);
372 EXPORT_SYMBOL(radix__flush_tlb_mm);
374 void radix__flush_all_mm(struct mm_struct *mm)
378 pid = mm->context.id;
379 if (unlikely(pid == MMU_NO_CONTEXT))
383 if (!mm_is_thread_local(mm))
384 _tlbie_pid(pid, RIC_FLUSH_ALL);
386 _tlbiel_pid(pid, RIC_FLUSH_ALL);
389 EXPORT_SYMBOL(radix__flush_all_mm);
391 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
393 tlb->need_flush_all = 1;
395 EXPORT_SYMBOL(radix__flush_tlb_pwc);
397 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
402 pid = mm->context.id;
403 if (unlikely(pid == MMU_NO_CONTEXT))
407 if (!mm_is_thread_local(mm))
408 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
410 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
414 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
416 #ifdef CONFIG_HUGETLB_PAGE
417 if (is_vm_hugetlb_page(vma))
418 return radix__flush_hugetlb_page(vma, vmaddr);
420 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
422 EXPORT_SYMBOL(radix__flush_tlb_page);
424 #else /* CONFIG_SMP */
425 #define radix__flush_all_mm radix__local_flush_all_mm
426 #endif /* CONFIG_SMP */
428 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
430 _tlbie_pid(0, RIC_FLUSH_ALL);
432 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
434 #define TLB_FLUSH_ALL -1UL
437 * Number of pages above which we invalidate the entire PID rather than
438 * flush individual pages, for local and global flushes respectively.
440 * tlbie goes out to the interconnect and individual ops are more costly.
441 * It also does not iterate over sets like the local tlbiel variant when
442 * invalidating a full PID, so it has a far lower threshold to change from
443 * individual page flushes to full-pid flushes.
445 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
446 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
448 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
452 struct mm_struct *mm = vma->vm_mm;
454 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
455 unsigned long page_size = 1UL << page_shift;
456 unsigned long nr_pages = (end - start) >> page_shift;
459 #ifdef CONFIG_HUGETLB_PAGE
460 if (is_vm_hugetlb_page(vma))
461 return radix__flush_hugetlb_tlb_range(vma, start, end);
464 pid = mm->context.id;
465 if (unlikely(pid == MMU_NO_CONTEXT))
469 if (mm_is_thread_local(mm)) {
471 full = (end == TLB_FLUSH_ALL ||
472 nr_pages > tlb_local_single_page_flush_ceiling);
475 full = (end == TLB_FLUSH_ALL ||
476 nr_pages > tlb_single_page_flush_ceiling);
481 _tlbiel_pid(pid, RIC_FLUSH_TLB);
483 if (mm_needs_flush_escalation(mm))
484 _tlbie_pid(pid, RIC_FLUSH_ALL);
486 _tlbie_pid(pid, RIC_FLUSH_TLB);
490 unsigned long hstart, hend;
492 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
493 hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
494 hend = end >> HPAGE_PMD_SHIFT;
496 hstart <<= HPAGE_PMD_SHIFT;
497 hend <<= HPAGE_PMD_SHIFT;
502 asm volatile("ptesync": : :"memory");
504 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
506 __tlbiel_va_range(hstart, hend, pid,
507 HPAGE_PMD_SIZE, MMU_PAGE_2M);
508 asm volatile("ptesync": : :"memory");
510 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
512 __tlbie_va_range(hstart, hend, pid,
513 HPAGE_PMD_SIZE, MMU_PAGE_2M);
515 asm volatile("eieio; tlbsync; ptesync": : :"memory");
520 EXPORT_SYMBOL(radix__flush_tlb_range);
522 static int radix_get_mmu_psize(int page_size)
526 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
527 psize = mmu_virtual_psize;
528 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
530 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
537 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
538 unsigned long end, int psize);
540 void radix__tlb_flush(struct mmu_gather *tlb)
543 struct mm_struct *mm = tlb->mm;
544 int page_size = tlb->page_size;
547 * if page size is not something we understand, do a full mm flush
549 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
550 * that flushes the process table entry cache upon process teardown.
551 * See the comment for radix in arch_exit_mmap().
554 radix__flush_all_mm(mm);
555 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
556 if (!tlb->need_flush_all)
557 radix__flush_tlb_mm(mm);
559 radix__flush_all_mm(mm);
561 unsigned long start = tlb->start;
562 unsigned long end = tlb->end;
564 if (!tlb->need_flush_all)
565 radix__flush_tlb_range_psize(mm, start, end, psize);
567 radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
569 tlb->need_flush_all = 0;
572 static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
573 unsigned long start, unsigned long end,
574 int psize, bool also_pwc)
577 unsigned int page_shift = mmu_psize_defs[psize].shift;
578 unsigned long page_size = 1UL << page_shift;
579 unsigned long nr_pages = (end - start) >> page_shift;
582 pid = mm->context.id;
583 if (unlikely(pid == MMU_NO_CONTEXT))
587 if (mm_is_thread_local(mm)) {
589 full = (end == TLB_FLUSH_ALL ||
590 nr_pages > tlb_local_single_page_flush_ceiling);
593 full = (end == TLB_FLUSH_ALL ||
594 nr_pages > tlb_single_page_flush_ceiling);
598 if (!local && mm_needs_flush_escalation(mm))
602 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
604 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
607 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
609 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
614 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
615 unsigned long end, int psize)
617 return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
620 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
621 unsigned long end, int psize)
623 __radix__flush_tlb_range_psize(mm, start, end, psize, true);
626 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
627 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
629 unsigned long pid, end;
631 pid = mm->context.id;
632 if (unlikely(pid == MMU_NO_CONTEXT))
635 /* 4k page size, just blow the world */
636 if (PAGE_SIZE == 0x1000) {
637 radix__flush_all_mm(mm);
641 end = addr + HPAGE_PMD_SIZE;
643 /* Otherwise first do the PWC, then iterate the pages. */
646 if (mm_is_thread_local(mm)) {
647 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
649 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
654 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
656 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
657 unsigned long start, unsigned long end)
659 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
661 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
663 void radix__flush_tlb_all(void)
665 unsigned long rb,prs,r,rs;
666 unsigned long ric = RIC_FLUSH_ALL;
668 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
669 prs = 0; /* partition scoped */
670 r = 1; /* radix format */
671 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
673 asm volatile("ptesync": : :"memory");
675 * now flush guest entries by passing PRS = 1 and LPID != 0
677 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
678 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
680 * now flush host entires by passing PRS = 0 and LPID == 0
682 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
683 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
684 asm volatile("eieio; tlbsync; ptesync": : :"memory");
687 void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
688 unsigned long address)
691 * We track page size in pte only for DD1, So we can
692 * call this only on DD1.
694 if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
699 if (old_pte & R_PAGE_LARGE)
700 radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
702 radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
705 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
706 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
708 unsigned long pid = mm->context.id;
710 if (unlikely(pid == MMU_NO_CONTEXT))
714 * If this context hasn't run on that CPU before and KVM is
715 * around, there's a slim chance that the guest on another
716 * CPU just brought in obsolete translation into the TLB of
717 * this CPU due to a bad prefetch using the guest PID on
718 * the way into the hypervisor.
720 * We work around this here. If KVM is possible, we check if
721 * any sibling thread is in KVM. If it is, the window may exist
722 * and thus we flush that PID from the core.
724 * A potential future improvement would be to mark which PIDs
725 * have never been used on the system and avoid it if the PID
726 * is new and the process has no other cpumask bit set.
728 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
729 int cpu = smp_processor_id();
730 int sib = cpu_first_thread_sibling(cpu);
733 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
736 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
740 _tlbiel_pid(pid, RIC_FLUSH_ALL);
743 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
744 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */