Merge tag 'xtensa-20190917' of git://github.com/jcmvbkbc/linux-xtensa
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
27
28 #include <linux/fault-inject.h>
29 #include <linux/log2.h>
30 #include <linux/random.h>
31 #include <linux/seq_file.h>
32 #include <linux/stop_machine.h>
33
34 #include <asm/set_memory.h>
35
36 #include <drm/i915_drm.h>
37
38 #include "display/intel_frontbuffer.h"
39
40 #include "i915_drv.h"
41 #include "i915_scatterlist.h"
42 #include "i915_trace.h"
43 #include "i915_vgpu.h"
44 #include "intel_drv.h"
45
46 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
47
48 /**
49  * DOC: Global GTT views
50  *
51  * Background and previous state
52  *
53  * Historically objects could exists (be bound) in global GTT space only as
54  * singular instances with a view representing all of the object's backing pages
55  * in a linear fashion. This view will be called a normal view.
56  *
57  * To support multiple views of the same object, where the number of mapped
58  * pages is not equal to the backing store, or where the layout of the pages
59  * is not linear, concept of a GGTT view was added.
60  *
61  * One example of an alternative view is a stereo display driven by a single
62  * image. In this case we would have a framebuffer looking like this
63  * (2x2 pages):
64  *
65  *    12
66  *    34
67  *
68  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
69  * rendering. In contrast, fed to the display engine would be an alternative
70  * view which could look something like this:
71  *
72  *   1212
73  *   3434
74  *
75  * In this example both the size and layout of pages in the alternative view is
76  * different from the normal view.
77  *
78  * Implementation and usage
79  *
80  * GGTT views are implemented using VMAs and are distinguished via enum
81  * i915_ggtt_view_type and struct i915_ggtt_view.
82  *
83  * A new flavour of core GEM functions which work with GGTT bound objects were
84  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
85  * renaming  in large amounts of code. They take the struct i915_ggtt_view
86  * parameter encapsulating all metadata required to implement a view.
87  *
88  * As a helper for callers which are only interested in the normal view,
89  * globally const i915_ggtt_view_normal singleton instance exists. All old core
90  * GEM API functions, the ones not taking the view parameter, are operating on,
91  * or with the normal GGTT view.
92  *
93  * Code wanting to add or use a new GGTT view needs to:
94  *
95  * 1. Add a new enum with a suitable name.
96  * 2. Extend the metadata in the i915_ggtt_view structure if required.
97  * 3. Add support to i915_get_vma_pages().
98  *
99  * New views are required to build a scatter-gather table from within the
100  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
101  * exists for the lifetime of an VMA.
102  *
103  * Core API is designed to have copy semantics which means that passed in
104  * struct i915_ggtt_view does not need to be persistent (left around after
105  * calling the core API functions).
106  *
107  */
108
109 static int
110 i915_get_ggtt_vma_pages(struct i915_vma *vma);
111
112 static void gen6_ggtt_invalidate(struct drm_i915_private *i915)
113 {
114         struct intel_uncore *uncore = &i915->uncore;
115
116         /*
117          * Note that as an uncached mmio write, this will flush the
118          * WCB of the writes into the GGTT before it triggers the invalidate.
119          */
120         intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
121 }
122
123 static void guc_ggtt_invalidate(struct drm_i915_private *i915)
124 {
125         struct intel_uncore *uncore = &i915->uncore;
126
127         gen6_ggtt_invalidate(i915);
128         intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
129 }
130
131 static void gmch_ggtt_invalidate(struct drm_i915_private *i915)
132 {
133         intel_gtt_chipset_flush();
134 }
135
136 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
137 {
138         i915->ggtt.invalidate(i915);
139 }
140
141 static int ppgtt_bind_vma(struct i915_vma *vma,
142                           enum i915_cache_level cache_level,
143                           u32 unused)
144 {
145         u32 pte_flags;
146         int err;
147
148         if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
149                 err = vma->vm->allocate_va_range(vma->vm,
150                                                  vma->node.start, vma->size);
151                 if (err)
152                         return err;
153         }
154
155         /* Applicable to VLV, and gen8+ */
156         pte_flags = 0;
157         if (i915_gem_object_is_readonly(vma->obj))
158                 pte_flags |= PTE_READ_ONLY;
159
160         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
161
162         return 0;
163 }
164
165 static void ppgtt_unbind_vma(struct i915_vma *vma)
166 {
167         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
168 }
169
170 static int ppgtt_set_pages(struct i915_vma *vma)
171 {
172         GEM_BUG_ON(vma->pages);
173
174         vma->pages = vma->obj->mm.pages;
175
176         vma->page_sizes = vma->obj->mm.page_sizes;
177
178         return 0;
179 }
180
181 static void clear_pages(struct i915_vma *vma)
182 {
183         GEM_BUG_ON(!vma->pages);
184
185         if (vma->pages != vma->obj->mm.pages) {
186                 sg_free_table(vma->pages);
187                 kfree(vma->pages);
188         }
189         vma->pages = NULL;
190
191         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
192 }
193
194 static u64 gen8_pte_encode(dma_addr_t addr,
195                            enum i915_cache_level level,
196                            u32 flags)
197 {
198         gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
199
200         if (unlikely(flags & PTE_READ_ONLY))
201                 pte &= ~_PAGE_RW;
202
203         switch (level) {
204         case I915_CACHE_NONE:
205                 pte |= PPAT_UNCACHED;
206                 break;
207         case I915_CACHE_WT:
208                 pte |= PPAT_DISPLAY_ELLC;
209                 break;
210         default:
211                 pte |= PPAT_CACHED;
212                 break;
213         }
214
215         return pte;
216 }
217
218 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
219                                   const enum i915_cache_level level)
220 {
221         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
222         pde |= addr;
223         if (level != I915_CACHE_NONE)
224                 pde |= PPAT_CACHED_PDE;
225         else
226                 pde |= PPAT_UNCACHED;
227         return pde;
228 }
229
230 #define gen8_pdpe_encode gen8_pde_encode
231 #define gen8_pml4e_encode gen8_pde_encode
232
233 static u64 snb_pte_encode(dma_addr_t addr,
234                           enum i915_cache_level level,
235                           u32 flags)
236 {
237         gen6_pte_t pte = GEN6_PTE_VALID;
238         pte |= GEN6_PTE_ADDR_ENCODE(addr);
239
240         switch (level) {
241         case I915_CACHE_L3_LLC:
242         case I915_CACHE_LLC:
243                 pte |= GEN6_PTE_CACHE_LLC;
244                 break;
245         case I915_CACHE_NONE:
246                 pte |= GEN6_PTE_UNCACHED;
247                 break;
248         default:
249                 MISSING_CASE(level);
250         }
251
252         return pte;
253 }
254
255 static u64 ivb_pte_encode(dma_addr_t addr,
256                           enum i915_cache_level level,
257                           u32 flags)
258 {
259         gen6_pte_t pte = GEN6_PTE_VALID;
260         pte |= GEN6_PTE_ADDR_ENCODE(addr);
261
262         switch (level) {
263         case I915_CACHE_L3_LLC:
264                 pte |= GEN7_PTE_CACHE_L3_LLC;
265                 break;
266         case I915_CACHE_LLC:
267                 pte |= GEN6_PTE_CACHE_LLC;
268                 break;
269         case I915_CACHE_NONE:
270                 pte |= GEN6_PTE_UNCACHED;
271                 break;
272         default:
273                 MISSING_CASE(level);
274         }
275
276         return pte;
277 }
278
279 static u64 byt_pte_encode(dma_addr_t addr,
280                           enum i915_cache_level level,
281                           u32 flags)
282 {
283         gen6_pte_t pte = GEN6_PTE_VALID;
284         pte |= GEN6_PTE_ADDR_ENCODE(addr);
285
286         if (!(flags & PTE_READ_ONLY))
287                 pte |= BYT_PTE_WRITEABLE;
288
289         if (level != I915_CACHE_NONE)
290                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
291
292         return pte;
293 }
294
295 static u64 hsw_pte_encode(dma_addr_t addr,
296                           enum i915_cache_level level,
297                           u32 flags)
298 {
299         gen6_pte_t pte = GEN6_PTE_VALID;
300         pte |= HSW_PTE_ADDR_ENCODE(addr);
301
302         if (level != I915_CACHE_NONE)
303                 pte |= HSW_WB_LLC_AGE3;
304
305         return pte;
306 }
307
308 static u64 iris_pte_encode(dma_addr_t addr,
309                            enum i915_cache_level level,
310                            u32 flags)
311 {
312         gen6_pte_t pte = GEN6_PTE_VALID;
313         pte |= HSW_PTE_ADDR_ENCODE(addr);
314
315         switch (level) {
316         case I915_CACHE_NONE:
317                 break;
318         case I915_CACHE_WT:
319                 pte |= HSW_WT_ELLC_LLC_AGE3;
320                 break;
321         default:
322                 pte |= HSW_WB_ELLC_LLC_AGE3;
323                 break;
324         }
325
326         return pte;
327 }
328
329 static void stash_init(struct pagestash *stash)
330 {
331         pagevec_init(&stash->pvec);
332         spin_lock_init(&stash->lock);
333 }
334
335 static struct page *stash_pop_page(struct pagestash *stash)
336 {
337         struct page *page = NULL;
338
339         spin_lock(&stash->lock);
340         if (likely(stash->pvec.nr))
341                 page = stash->pvec.pages[--stash->pvec.nr];
342         spin_unlock(&stash->lock);
343
344         return page;
345 }
346
347 static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
348 {
349         unsigned int nr;
350
351         spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
352
353         nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
354         memcpy(stash->pvec.pages + stash->pvec.nr,
355                pvec->pages + pvec->nr - nr,
356                sizeof(pvec->pages[0]) * nr);
357         stash->pvec.nr += nr;
358
359         spin_unlock(&stash->lock);
360
361         pvec->nr -= nr;
362 }
363
364 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
365 {
366         struct pagevec stack;
367         struct page *page;
368
369         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
370                 i915_gem_shrink_all(vm->i915);
371
372         page = stash_pop_page(&vm->free_pages);
373         if (page)
374                 return page;
375
376         if (!vm->pt_kmap_wc)
377                 return alloc_page(gfp);
378
379         /* Look in our global stash of WC pages... */
380         page = stash_pop_page(&vm->i915->mm.wc_stash);
381         if (page)
382                 return page;
383
384         /*
385          * Otherwise batch allocate pages to amortize cost of set_pages_wc.
386          *
387          * We have to be careful as page allocation may trigger the shrinker
388          * (via direct reclaim) which will fill up the WC stash underneath us.
389          * So we add our WB pages into a temporary pvec on the stack and merge
390          * them into the WC stash after all the allocations are complete.
391          */
392         pagevec_init(&stack);
393         do {
394                 struct page *page;
395
396                 page = alloc_page(gfp);
397                 if (unlikely(!page))
398                         break;
399
400                 stack.pages[stack.nr++] = page;
401         } while (pagevec_space(&stack));
402
403         if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
404                 page = stack.pages[--stack.nr];
405
406                 /* Merge spare WC pages to the global stash */
407                 if (stack.nr)
408                         stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
409
410                 /* Push any surplus WC pages onto the local VM stash */
411                 if (stack.nr)
412                         stash_push_pagevec(&vm->free_pages, &stack);
413         }
414
415         /* Return unwanted leftovers */
416         if (unlikely(stack.nr)) {
417                 WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
418                 __pagevec_release(&stack);
419         }
420
421         return page;
422 }
423
424 static void vm_free_pages_release(struct i915_address_space *vm,
425                                   bool immediate)
426 {
427         struct pagevec *pvec = &vm->free_pages.pvec;
428         struct pagevec stack;
429
430         lockdep_assert_held(&vm->free_pages.lock);
431         GEM_BUG_ON(!pagevec_count(pvec));
432
433         if (vm->pt_kmap_wc) {
434                 /*
435                  * When we use WC, first fill up the global stash and then
436                  * only if full immediately free the overflow.
437                  */
438                 stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
439
440                 /*
441                  * As we have made some room in the VM's free_pages,
442                  * we can wait for it to fill again. Unless we are
443                  * inside i915_address_space_fini() and must
444                  * immediately release the pages!
445                  */
446                 if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
447                         return;
448
449                 /*
450                  * We have to drop the lock to allow ourselves to sleep,
451                  * so take a copy of the pvec and clear the stash for
452                  * others to use it as we sleep.
453                  */
454                 stack = *pvec;
455                 pagevec_reinit(pvec);
456                 spin_unlock(&vm->free_pages.lock);
457
458                 pvec = &stack;
459                 set_pages_array_wb(pvec->pages, pvec->nr);
460
461                 spin_lock(&vm->free_pages.lock);
462         }
463
464         __pagevec_release(pvec);
465 }
466
467 static void vm_free_page(struct i915_address_space *vm, struct page *page)
468 {
469         /*
470          * On !llc, we need to change the pages back to WB. We only do so
471          * in bulk, so we rarely need to change the page attributes here,
472          * but doing so requires a stop_machine() from deep inside arch/x86/mm.
473          * To make detection of the possible sleep more likely, use an
474          * unconditional might_sleep() for everybody.
475          */
476         might_sleep();
477         spin_lock(&vm->free_pages.lock);
478         while (!pagevec_space(&vm->free_pages.pvec))
479                 vm_free_pages_release(vm, false);
480         GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
481         pagevec_add(&vm->free_pages.pvec, page);
482         spin_unlock(&vm->free_pages.lock);
483 }
484
485 static void i915_address_space_init(struct i915_address_space *vm, int subclass)
486 {
487         kref_init(&vm->ref);
488
489         /*
490          * The vm->mutex must be reclaim safe (for use in the shrinker).
491          * Do a dummy acquire now under fs_reclaim so that any allocation
492          * attempt holding the lock is immediately reported by lockdep.
493          */
494         mutex_init(&vm->mutex);
495         lockdep_set_subclass(&vm->mutex, subclass);
496         i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
497
498         GEM_BUG_ON(!vm->total);
499         drm_mm_init(&vm->mm, 0, vm->total);
500         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
501
502         stash_init(&vm->free_pages);
503
504         INIT_LIST_HEAD(&vm->unbound_list);
505         INIT_LIST_HEAD(&vm->bound_list);
506 }
507
508 static void i915_address_space_fini(struct i915_address_space *vm)
509 {
510         spin_lock(&vm->free_pages.lock);
511         if (pagevec_count(&vm->free_pages.pvec))
512                 vm_free_pages_release(vm, true);
513         GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
514         spin_unlock(&vm->free_pages.lock);
515
516         drm_mm_takedown(&vm->mm);
517
518         mutex_destroy(&vm->mutex);
519 }
520
521 static int __setup_page_dma(struct i915_address_space *vm,
522                             struct i915_page_dma *p,
523                             gfp_t gfp)
524 {
525         p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
526         if (unlikely(!p->page))
527                 return -ENOMEM;
528
529         p->daddr = dma_map_page_attrs(vm->dma,
530                                       p->page, 0, PAGE_SIZE,
531                                       PCI_DMA_BIDIRECTIONAL,
532                                       DMA_ATTR_SKIP_CPU_SYNC |
533                                       DMA_ATTR_NO_WARN);
534         if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
535                 vm_free_page(vm, p->page);
536                 return -ENOMEM;
537         }
538
539         return 0;
540 }
541
542 static int setup_page_dma(struct i915_address_space *vm,
543                           struct i915_page_dma *p)
544 {
545         return __setup_page_dma(vm, p, __GFP_HIGHMEM);
546 }
547
548 static void cleanup_page_dma(struct i915_address_space *vm,
549                              struct i915_page_dma *p)
550 {
551         dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
552         vm_free_page(vm, p->page);
553 }
554
555 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
556
557 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
558 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
559 #define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
560 #define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
561
562 static void fill_page_dma(struct i915_address_space *vm,
563                           struct i915_page_dma *p,
564                           const u64 val)
565 {
566         u64 * const vaddr = kmap_atomic(p->page);
567
568         memset64(vaddr, val, PAGE_SIZE / sizeof(val));
569
570         kunmap_atomic(vaddr);
571 }
572
573 static void fill_page_dma_32(struct i915_address_space *vm,
574                              struct i915_page_dma *p,
575                              const u32 v)
576 {
577         fill_page_dma(vm, p, (u64)v << 32 | v);
578 }
579
580 static int
581 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
582 {
583         unsigned long size;
584
585         /*
586          * In order to utilize 64K pages for an object with a size < 2M, we will
587          * need to support a 64K scratch page, given that every 16th entry for a
588          * page-table operating in 64K mode must point to a properly aligned 64K
589          * region, including any PTEs which happen to point to scratch.
590          *
591          * This is only relevant for the 48b PPGTT where we support
592          * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
593          * scratch (read-only) between all vm, we create one 64k scratch page
594          * for all.
595          */
596         size = I915_GTT_PAGE_SIZE_4K;
597         if (i915_vm_is_4lvl(vm) &&
598             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
599                 size = I915_GTT_PAGE_SIZE_64K;
600                 gfp |= __GFP_NOWARN;
601         }
602         gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
603
604         do {
605                 int order = get_order(size);
606                 struct page *page;
607                 dma_addr_t addr;
608
609                 page = alloc_pages(gfp, order);
610                 if (unlikely(!page))
611                         goto skip;
612
613                 addr = dma_map_page_attrs(vm->dma,
614                                           page, 0, size,
615                                           PCI_DMA_BIDIRECTIONAL,
616                                           DMA_ATTR_SKIP_CPU_SYNC |
617                                           DMA_ATTR_NO_WARN);
618                 if (unlikely(dma_mapping_error(vm->dma, addr)))
619                         goto free_page;
620
621                 if (unlikely(!IS_ALIGNED(addr, size)))
622                         goto unmap_page;
623
624                 vm->scratch_page.page = page;
625                 vm->scratch_page.daddr = addr;
626                 vm->scratch_order = order;
627                 return 0;
628
629 unmap_page:
630                 dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
631 free_page:
632                 __free_pages(page, order);
633 skip:
634                 if (size == I915_GTT_PAGE_SIZE_4K)
635                         return -ENOMEM;
636
637                 size = I915_GTT_PAGE_SIZE_4K;
638                 gfp &= ~__GFP_NOWARN;
639         } while (1);
640 }
641
642 static void cleanup_scratch_page(struct i915_address_space *vm)
643 {
644         struct i915_page_dma *p = &vm->scratch_page;
645         int order = vm->scratch_order;
646
647         dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
648                        PCI_DMA_BIDIRECTIONAL);
649         __free_pages(p->page, order);
650 }
651
652 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
653 {
654         struct i915_page_table *pt;
655
656         pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
657         if (unlikely(!pt))
658                 return ERR_PTR(-ENOMEM);
659
660         if (unlikely(setup_px(vm, pt))) {
661                 kfree(pt);
662                 return ERR_PTR(-ENOMEM);
663         }
664
665         atomic_set(&pt->used, 0);
666
667         return pt;
668 }
669
670 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
671 {
672         cleanup_px(vm, pt);
673         kfree(pt);
674 }
675
676 static void gen8_initialize_pt(struct i915_address_space *vm,
677                                struct i915_page_table *pt)
678 {
679         fill_px(vm, pt, vm->scratch_pte);
680 }
681
682 static void gen6_initialize_pt(struct i915_address_space *vm,
683                                struct i915_page_table *pt)
684 {
685         fill32_px(vm, pt, vm->scratch_pte);
686 }
687
688 static struct i915_page_directory *__alloc_pd(void)
689 {
690         struct i915_page_directory *pd;
691
692         pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
693
694         if (unlikely(!pd))
695                 return NULL;
696
697         memset(&pd->base, 0, sizeof(pd->base));
698         atomic_set(&pd->used, 0);
699         spin_lock_init(&pd->lock);
700
701         /* for safety */
702         pd->entry[0] = NULL;
703
704         return pd;
705 }
706
707 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
708 {
709         struct i915_page_directory *pd;
710
711         pd = __alloc_pd();
712         if (unlikely(!pd))
713                 return ERR_PTR(-ENOMEM);
714
715         if (unlikely(setup_px(vm, pd))) {
716                 kfree(pd);
717                 return ERR_PTR(-ENOMEM);
718         }
719
720         return pd;
721 }
722
723 static inline bool pd_has_phys_page(const struct i915_page_directory * const pd)
724 {
725         return pd->base.page;
726 }
727
728 static void free_pd(struct i915_address_space *vm,
729                     struct i915_page_directory *pd)
730 {
731         if (likely(pd_has_phys_page(pd)))
732                 cleanup_px(vm, pd);
733
734         kfree(pd);
735 }
736
737 static void init_pd_with_page(struct i915_address_space *vm,
738                               struct i915_page_directory * const pd,
739                               struct i915_page_table *pt)
740 {
741         fill_px(vm, pd, gen8_pde_encode(px_dma(pt), I915_CACHE_LLC));
742         memset_p(pd->entry, pt, 512);
743 }
744
745 static void init_pd(struct i915_address_space *vm,
746                     struct i915_page_directory * const pd,
747                     struct i915_page_directory * const to)
748 {
749         GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd));
750
751         fill_px(vm, pd, gen8_pdpe_encode(px_dma(to), I915_CACHE_LLC));
752         memset_p(pd->entry, to, 512);
753 }
754
755 /*
756  * PDE TLBs are a pain to invalidate on GEN8+. When we modify
757  * the page table structures, we mark them dirty so that
758  * context switching/execlist queuing code takes extra steps
759  * to ensure that tlbs are flushed.
760  */
761 static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
762 {
763         ppgtt->pd_dirty_engines = ALL_ENGINES;
764 }
765
766 /* Removes entries from a single page table, releasing it if it's empty.
767  * Caller can use the return value to update higher-level entries.
768  */
769 static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
770                                 struct i915_page_table *pt,
771                                 u64 start, u64 length)
772 {
773         unsigned int num_entries = gen8_pte_count(start, length);
774         gen8_pte_t *vaddr;
775
776         vaddr = kmap_atomic_px(pt);
777         memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
778         kunmap_atomic(vaddr);
779
780         GEM_BUG_ON(num_entries > atomic_read(&pt->used));
781         return !atomic_sub_return(num_entries, &pt->used);
782 }
783
784 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
785                                struct i915_page_directory *pd,
786                                struct i915_page_table *pt,
787                                unsigned int pde)
788 {
789         gen8_pde_t *vaddr;
790
791         vaddr = kmap_atomic_px(pd);
792         vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
793         kunmap_atomic(vaddr);
794 }
795
796 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
797                                 struct i915_page_directory *pd,
798                                 u64 start, u64 length)
799 {
800         struct i915_page_table *pt;
801         u32 pde;
802
803         gen8_for_each_pde(pt, pd, start, length, pde) {
804                 bool free = false;
805
806                 GEM_BUG_ON(pt == vm->scratch_pt);
807
808                 if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
809                         continue;
810
811                 spin_lock(&pd->lock);
812                 if (!atomic_read(&pt->used)) {
813                         gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
814                         pd->entry[pde] = vm->scratch_pt;
815
816                         GEM_BUG_ON(!atomic_read(&pd->used));
817                         atomic_dec(&pd->used);
818                         free = true;
819                 }
820                 spin_unlock(&pd->lock);
821                 if (free)
822                         free_pt(vm, pt);
823         }
824
825         return !atomic_read(&pd->used);
826 }
827
828 static void gen8_ppgtt_set_pdpe(struct i915_page_directory *pdp,
829                                 struct i915_page_directory *pd,
830                                 unsigned int pdpe)
831 {
832         gen8_ppgtt_pdpe_t *vaddr;
833
834         if (!pd_has_phys_page(pdp))
835                 return;
836
837         vaddr = kmap_atomic_px(pdp);
838         vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
839         kunmap_atomic(vaddr);
840 }
841
842 /* Removes entries from a single page dir pointer, releasing it if it's empty.
843  * Caller can use the return value to update higher-level entries
844  */
845 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
846                                  struct i915_page_directory * const pdp,
847                                  u64 start, u64 length)
848 {
849         struct i915_page_directory *pd;
850         unsigned int pdpe;
851
852         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
853                 bool free = false;
854
855                 GEM_BUG_ON(pd == vm->scratch_pd);
856
857                 if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
858                         continue;
859
860                 spin_lock(&pdp->lock);
861                 if (!atomic_read(&pd->used)) {
862                         gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
863                         pdp->entry[pdpe] = vm->scratch_pd;
864
865                         GEM_BUG_ON(!atomic_read(&pdp->used));
866                         atomic_dec(&pdp->used);
867                         free = true;
868                 }
869                 spin_unlock(&pdp->lock);
870                 if (free)
871                         free_pd(vm, pd);
872         }
873
874         return !atomic_read(&pdp->used);
875 }
876
877 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
878                                   u64 start, u64 length)
879 {
880         gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
881 }
882
883 static void gen8_ppgtt_set_pml4e(struct i915_page_directory *pml4,
884                                  struct i915_page_directory *pdp,
885                                  unsigned int pml4e)
886 {
887         gen8_ppgtt_pml4e_t *vaddr;
888
889         vaddr = kmap_atomic_px(pml4);
890         vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
891         kunmap_atomic(vaddr);
892 }
893
894 /* Removes entries from a single pml4.
895  * This is the top-level structure in 4-level page tables used on gen8+.
896  * Empty entries are always scratch pml4e.
897  */
898 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
899                                   u64 start, u64 length)
900 {
901         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
902         struct i915_page_directory * const pml4 = ppgtt->pd;
903         struct i915_page_directory *pdp;
904         unsigned int pml4e;
905
906         GEM_BUG_ON(!i915_vm_is_4lvl(vm));
907
908         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
909                 bool free = false;
910                 GEM_BUG_ON(pdp == vm->scratch_pdp);
911
912                 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
913                         continue;
914
915                 spin_lock(&pml4->lock);
916                 if (!atomic_read(&pdp->used)) {
917                         gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
918                         pml4->entry[pml4e] = vm->scratch_pdp;
919                         free = true;
920                 }
921                 spin_unlock(&pml4->lock);
922                 if (free)
923                         free_pd(vm, pdp);
924         }
925 }
926
927 static inline struct sgt_dma {
928         struct scatterlist *sg;
929         dma_addr_t dma, max;
930 } sgt_dma(struct i915_vma *vma) {
931         struct scatterlist *sg = vma->pages->sgl;
932         dma_addr_t addr = sg_dma_address(sg);
933         return (struct sgt_dma) { sg, addr, addr + sg->length };
934 }
935
936 struct gen8_insert_pte {
937         u16 pml4e;
938         u16 pdpe;
939         u16 pde;
940         u16 pte;
941 };
942
943 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
944 {
945         return (struct gen8_insert_pte) {
946                  gen8_pml4e_index(start),
947                  gen8_pdpe_index(start),
948                  gen8_pde_index(start),
949                  gen8_pte_index(start),
950         };
951 }
952
953 static __always_inline bool
954 gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
955                               struct i915_page_directory *pdp,
956                               struct sgt_dma *iter,
957                               struct gen8_insert_pte *idx,
958                               enum i915_cache_level cache_level,
959                               u32 flags)
960 {
961         struct i915_page_directory *pd;
962         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
963         gen8_pte_t *vaddr;
964         bool ret;
965
966         GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
967         pd = i915_pd_entry(pdp, idx->pdpe);
968         vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
969         do {
970                 vaddr[idx->pte] = pte_encode | iter->dma;
971
972                 iter->dma += I915_GTT_PAGE_SIZE;
973                 if (iter->dma >= iter->max) {
974                         iter->sg = __sg_next(iter->sg);
975                         if (!iter->sg) {
976                                 ret = false;
977                                 break;
978                         }
979
980                         iter->dma = sg_dma_address(iter->sg);
981                         iter->max = iter->dma + iter->sg->length;
982                 }
983
984                 if (++idx->pte == GEN8_PTES) {
985                         idx->pte = 0;
986
987                         if (++idx->pde == I915_PDES) {
988                                 idx->pde = 0;
989
990                                 /* Limited by sg length for 3lvl */
991                                 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
992                                         idx->pdpe = 0;
993                                         ret = true;
994                                         break;
995                                 }
996
997                                 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
998                                 pd = pdp->entry[idx->pdpe];
999                         }
1000
1001                         kunmap_atomic(vaddr);
1002                         vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
1003                 }
1004         } while (1);
1005         kunmap_atomic(vaddr);
1006
1007         return ret;
1008 }
1009
1010 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1011                                    struct i915_vma *vma,
1012                                    enum i915_cache_level cache_level,
1013                                    u32 flags)
1014 {
1015         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1016         struct sgt_dma iter = sgt_dma(vma);
1017         struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1018
1019         gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
1020                                       cache_level, flags);
1021
1022         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1023 }
1024
1025 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1026                                            struct i915_page_directory *pml4,
1027                                            struct sgt_dma *iter,
1028                                            enum i915_cache_level cache_level,
1029                                            u32 flags)
1030 {
1031         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1032         u64 start = vma->node.start;
1033         dma_addr_t rem = iter->sg->length;
1034
1035         do {
1036                 struct gen8_insert_pte idx = gen8_insert_pte(start);
1037                 struct i915_page_directory *pdp =
1038                         i915_pdp_entry(pml4, idx.pml4e);
1039                 struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
1040                 unsigned int page_size;
1041                 bool maybe_64K = false;
1042                 gen8_pte_t encode = pte_encode;
1043                 gen8_pte_t *vaddr;
1044                 u16 index, max;
1045
1046                 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1047                     IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1048                     rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1049                         index = idx.pde;
1050                         max = I915_PDES;
1051                         page_size = I915_GTT_PAGE_SIZE_2M;
1052
1053                         encode |= GEN8_PDE_PS_2M;
1054
1055                         vaddr = kmap_atomic_px(pd);
1056                 } else {
1057                         struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
1058
1059                         index = idx.pte;
1060                         max = GEN8_PTES;
1061                         page_size = I915_GTT_PAGE_SIZE;
1062
1063                         if (!index &&
1064                             vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1065                             IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1066                             (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1067                              rem >= (max - index) * I915_GTT_PAGE_SIZE))
1068                                 maybe_64K = true;
1069
1070                         vaddr = kmap_atomic_px(pt);
1071                 }
1072
1073                 do {
1074                         GEM_BUG_ON(iter->sg->length < page_size);
1075                         vaddr[index++] = encode | iter->dma;
1076
1077                         start += page_size;
1078                         iter->dma += page_size;
1079                         rem -= page_size;
1080                         if (iter->dma >= iter->max) {
1081                                 iter->sg = __sg_next(iter->sg);
1082                                 if (!iter->sg)
1083                                         break;
1084
1085                                 rem = iter->sg->length;
1086                                 iter->dma = sg_dma_address(iter->sg);
1087                                 iter->max = iter->dma + rem;
1088
1089                                 if (maybe_64K && index < max &&
1090                                     !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1091                                       (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1092                                        rem >= (max - index) * I915_GTT_PAGE_SIZE)))
1093                                         maybe_64K = false;
1094
1095                                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1096                                         break;
1097                         }
1098                 } while (rem >= page_size && index < max);
1099
1100                 kunmap_atomic(vaddr);
1101
1102                 /*
1103                  * Is it safe to mark the 2M block as 64K? -- Either we have
1104                  * filled whole page-table with 64K entries, or filled part of
1105                  * it and have reached the end of the sg table and we have
1106                  * enough padding.
1107                  */
1108                 if (maybe_64K &&
1109                     (index == max ||
1110                      (i915_vm_has_scratch_64K(vma->vm) &&
1111                       !iter->sg && IS_ALIGNED(vma->node.start +
1112                                               vma->node.size,
1113                                               I915_GTT_PAGE_SIZE_2M)))) {
1114                         vaddr = kmap_atomic_px(pd);
1115                         vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1116                         kunmap_atomic(vaddr);
1117                         page_size = I915_GTT_PAGE_SIZE_64K;
1118
1119                         /*
1120                          * We write all 4K page entries, even when using 64K
1121                          * pages. In order to verify that the HW isn't cheating
1122                          * by using the 4K PTE instead of the 64K PTE, we want
1123                          * to remove all the surplus entries. If the HW skipped
1124                          * the 64K PTE, it will read/write into the scratch page
1125                          * instead - which we detect as missing results during
1126                          * selftests.
1127                          */
1128                         if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
1129                                 u16 i;
1130
1131                                 encode = vma->vm->scratch_pte;
1132                                 vaddr = kmap_atomic_px(i915_pt_entry(pd,
1133                                                                      idx.pde));
1134
1135                                 for (i = 1; i < index; i += 16)
1136                                         memset64(vaddr + i, encode, 15);
1137
1138                                 kunmap_atomic(vaddr);
1139                         }
1140                 }
1141
1142                 vma->page_sizes.gtt |= page_size;
1143         } while (iter->sg);
1144 }
1145
1146 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1147                                    struct i915_vma *vma,
1148                                    enum i915_cache_level cache_level,
1149                                    u32 flags)
1150 {
1151         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1152         struct sgt_dma iter = sgt_dma(vma);
1153         struct i915_page_directory * const pml4 = ppgtt->pd;
1154
1155         if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1156                 gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
1157                                                flags);
1158         } else {
1159                 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1160
1161                 while (gen8_ppgtt_insert_pte_entries(ppgtt,
1162                                                      i915_pdp_entry(pml4, idx.pml4e++),
1163                                                      &iter, &idx, cache_level,
1164                                                      flags))
1165                         GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1166
1167                 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1168         }
1169 }
1170
1171 static void gen8_free_page_tables(struct i915_address_space *vm,
1172                                   struct i915_page_directory *pd)
1173 {
1174         int i;
1175
1176         for (i = 0; i < I915_PDES; i++) {
1177                 if (pd->entry[i] != vm->scratch_pt)
1178                         free_pt(vm, pd->entry[i]);
1179         }
1180 }
1181
1182 static int gen8_init_scratch(struct i915_address_space *vm)
1183 {
1184         int ret;
1185
1186         /*
1187          * If everybody agrees to not to write into the scratch page,
1188          * we can reuse it for all vm, keeping contexts and processes separate.
1189          */
1190         if (vm->has_read_only &&
1191             vm->i915->kernel_context &&
1192             vm->i915->kernel_context->vm) {
1193                 struct i915_address_space *clone = vm->i915->kernel_context->vm;
1194
1195                 GEM_BUG_ON(!clone->has_read_only);
1196
1197                 vm->scratch_order = clone->scratch_order;
1198                 vm->scratch_pte = clone->scratch_pte;
1199                 vm->scratch_pt  = clone->scratch_pt;
1200                 vm->scratch_pd  = clone->scratch_pd;
1201                 vm->scratch_pdp = clone->scratch_pdp;
1202                 return 0;
1203         }
1204
1205         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1206         if (ret)
1207                 return ret;
1208
1209         vm->scratch_pte =
1210                 gen8_pte_encode(vm->scratch_page.daddr,
1211                                 I915_CACHE_LLC,
1212                                 vm->has_read_only);
1213
1214         vm->scratch_pt = alloc_pt(vm);
1215         if (IS_ERR(vm->scratch_pt)) {
1216                 ret = PTR_ERR(vm->scratch_pt);
1217                 goto free_scratch_page;
1218         }
1219
1220         vm->scratch_pd = alloc_pd(vm);
1221         if (IS_ERR(vm->scratch_pd)) {
1222                 ret = PTR_ERR(vm->scratch_pd);
1223                 goto free_pt;
1224         }
1225
1226         if (i915_vm_is_4lvl(vm)) {
1227                 vm->scratch_pdp = alloc_pd(vm);
1228                 if (IS_ERR(vm->scratch_pdp)) {
1229                         ret = PTR_ERR(vm->scratch_pdp);
1230                         goto free_pd;
1231                 }
1232         }
1233
1234         gen8_initialize_pt(vm, vm->scratch_pt);
1235         init_pd_with_page(vm, vm->scratch_pd, vm->scratch_pt);
1236         if (i915_vm_is_4lvl(vm))
1237                 init_pd(vm, vm->scratch_pdp, vm->scratch_pd);
1238
1239         return 0;
1240
1241 free_pd:
1242         free_pd(vm, vm->scratch_pd);
1243 free_pt:
1244         free_pt(vm, vm->scratch_pt);
1245 free_scratch_page:
1246         cleanup_scratch_page(vm);
1247
1248         return ret;
1249 }
1250
1251 static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
1252 {
1253         struct i915_address_space *vm = &ppgtt->vm;
1254         struct drm_i915_private *dev_priv = vm->i915;
1255         enum vgt_g2v_type msg;
1256         int i;
1257
1258         if (i915_vm_is_4lvl(vm)) {
1259                 const u64 daddr = px_dma(ppgtt->pd);
1260
1261                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1262                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1263
1264                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1265                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1266         } else {
1267                 for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1268                         const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1269
1270                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1271                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1272                 }
1273
1274                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1275                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1276         }
1277
1278         I915_WRITE(vgtif_reg(g2v_notify), msg);
1279
1280         return 0;
1281 }
1282
1283 static void gen8_free_scratch(struct i915_address_space *vm)
1284 {
1285         if (!vm->scratch_page.daddr)
1286                 return;
1287
1288         if (i915_vm_is_4lvl(vm))
1289                 free_pd(vm, vm->scratch_pdp);
1290         free_pd(vm, vm->scratch_pd);
1291         free_pt(vm, vm->scratch_pt);
1292         cleanup_scratch_page(vm);
1293 }
1294
1295 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1296                                     struct i915_page_directory *pdp)
1297 {
1298         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1299         int i;
1300
1301         for (i = 0; i < pdpes; i++) {
1302                 if (pdp->entry[i] == vm->scratch_pd)
1303                         continue;
1304
1305                 gen8_free_page_tables(vm, pdp->entry[i]);
1306                 free_pd(vm, pdp->entry[i]);
1307         }
1308
1309         free_pd(vm, pdp);
1310 }
1311
1312 static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
1313 {
1314         struct i915_page_directory * const pml4 = ppgtt->pd;
1315         int i;
1316
1317         for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1318                 struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
1319
1320                 if (pdp == ppgtt->vm.scratch_pdp)
1321                         continue;
1322
1323                 gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
1324         }
1325
1326         free_pd(&ppgtt->vm, pml4);
1327 }
1328
1329 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1330 {
1331         struct drm_i915_private *i915 = vm->i915;
1332         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1333
1334         if (intel_vgpu_active(i915))
1335                 gen8_ppgtt_notify_vgt(ppgtt, false);
1336
1337         if (i915_vm_is_4lvl(vm))
1338                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1339         else
1340                 gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
1341
1342         gen8_free_scratch(vm);
1343 }
1344
1345 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1346                                struct i915_page_directory *pd,
1347                                u64 start, u64 length)
1348 {
1349         struct i915_page_table *pt, *alloc = NULL;
1350         u64 from = start;
1351         unsigned int pde;
1352         int ret = 0;
1353
1354         spin_lock(&pd->lock);
1355         gen8_for_each_pde(pt, pd, start, length, pde) {
1356                 const int count = gen8_pte_count(start, length);
1357
1358                 if (pt == vm->scratch_pt) {
1359                         spin_unlock(&pd->lock);
1360
1361                         pt = fetch_and_zero(&alloc);
1362                         if (!pt)
1363                                 pt = alloc_pt(vm);
1364                         if (IS_ERR(pt)) {
1365                                 ret = PTR_ERR(pt);
1366                                 goto unwind;
1367                         }
1368
1369                         if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1370                                 gen8_initialize_pt(vm, pt);
1371
1372                         spin_lock(&pd->lock);
1373                         if (pd->entry[pde] == vm->scratch_pt) {
1374                                 gen8_ppgtt_set_pde(vm, pd, pt, pde);
1375                                 pd->entry[pde] = pt;
1376                                 atomic_inc(&pd->used);
1377                         } else {
1378                                 alloc = pt;
1379                                 pt = pd->entry[pde];
1380                         }
1381                 }
1382
1383                 atomic_add(count, &pt->used);
1384         }
1385         spin_unlock(&pd->lock);
1386         goto out;
1387
1388 unwind:
1389         gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1390 out:
1391         if (alloc)
1392                 free_pt(vm, alloc);
1393         return ret;
1394 }
1395
1396 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1397                                 struct i915_page_directory *pdp,
1398                                 u64 start, u64 length)
1399 {
1400         struct i915_page_directory *pd, *alloc = NULL;
1401         u64 from = start;
1402         unsigned int pdpe;
1403         int ret = 0;
1404
1405         spin_lock(&pdp->lock);
1406         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1407                 if (pd == vm->scratch_pd) {
1408                         spin_unlock(&pdp->lock);
1409
1410                         pd = fetch_and_zero(&alloc);
1411                         if (!pd)
1412                                 pd = alloc_pd(vm);
1413                         if (IS_ERR(pd)) {
1414                                 ret = PTR_ERR(pd);
1415                                 goto unwind;
1416                         }
1417
1418                         init_pd_with_page(vm, pd, vm->scratch_pt);
1419
1420                         spin_lock(&pdp->lock);
1421                         if (pdp->entry[pdpe] == vm->scratch_pd) {
1422                                 gen8_ppgtt_set_pdpe(pdp, pd, pdpe);
1423                                 pdp->entry[pdpe] = pd;
1424                                 atomic_inc(&pdp->used);
1425                         } else {
1426                                 alloc = pd;
1427                                 pd = pdp->entry[pdpe];
1428                         }
1429                 }
1430                 atomic_inc(&pd->used);
1431                 spin_unlock(&pdp->lock);
1432
1433                 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1434                 if (unlikely(ret))
1435                         goto unwind_pd;
1436
1437                 spin_lock(&pdp->lock);
1438                 atomic_dec(&pd->used);
1439         }
1440         spin_unlock(&pdp->lock);
1441         goto out;
1442
1443 unwind_pd:
1444         spin_lock(&pdp->lock);
1445         if (atomic_dec_and_test(&pd->used)) {
1446                 gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
1447                 pdp->entry[pdpe] = vm->scratch_pd;
1448                 GEM_BUG_ON(!atomic_read(&pdp->used));
1449                 atomic_dec(&pdp->used);
1450                 GEM_BUG_ON(alloc);
1451                 alloc = pd; /* defer the free to after the lock */
1452         }
1453         spin_unlock(&pdp->lock);
1454 unwind:
1455         gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1456 out:
1457         if (alloc)
1458                 free_pd(vm, alloc);
1459         return ret;
1460 }
1461
1462 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1463                                  u64 start, u64 length)
1464 {
1465         return gen8_ppgtt_alloc_pdp(vm,
1466                                     i915_vm_to_ppgtt(vm)->pd, start, length);
1467 }
1468
1469 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1470                                  u64 start, u64 length)
1471 {
1472         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1473         struct i915_page_directory * const pml4 = ppgtt->pd;
1474         struct i915_page_directory *pdp, *alloc = NULL;
1475         u64 from = start;
1476         int ret = 0;
1477         u32 pml4e;
1478
1479         spin_lock(&pml4->lock);
1480         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1481                 if (pdp == vm->scratch_pdp) {
1482                         spin_unlock(&pml4->lock);
1483
1484                         pdp = fetch_and_zero(&alloc);
1485                         if (!pdp)
1486                                 pdp = alloc_pd(vm);
1487                         if (IS_ERR(pdp)) {
1488                                 ret = PTR_ERR(pdp);
1489                                 goto unwind;
1490                         }
1491
1492                         init_pd(vm, pdp, vm->scratch_pd);
1493
1494                         spin_lock(&pml4->lock);
1495                         if (pml4->entry[pml4e] == vm->scratch_pdp) {
1496                                 gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1497                                 pml4->entry[pml4e] = pdp;
1498                         } else {
1499                                 alloc = pdp;
1500                                 pdp = pml4->entry[pml4e];
1501                         }
1502                 }
1503                 atomic_inc(&pdp->used);
1504                 spin_unlock(&pml4->lock);
1505
1506                 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1507                 if (unlikely(ret))
1508                         goto unwind_pdp;
1509
1510                 spin_lock(&pml4->lock);
1511                 atomic_dec(&pdp->used);
1512         }
1513         spin_unlock(&pml4->lock);
1514         goto out;
1515
1516 unwind_pdp:
1517         spin_lock(&pml4->lock);
1518         if (atomic_dec_and_test(&pdp->used)) {
1519                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1520                 pml4->entry[pml4e] = vm->scratch_pdp;
1521                 GEM_BUG_ON(alloc);
1522                 alloc = pdp; /* defer the free until after the lock */
1523         }
1524         spin_unlock(&pml4->lock);
1525 unwind:
1526         gen8_ppgtt_clear_4lvl(vm, from, start - from);
1527 out:
1528         if (alloc)
1529                 free_pd(vm, alloc);
1530         return ret;
1531 }
1532
1533 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
1534 {
1535         struct i915_address_space *vm = &ppgtt->vm;
1536         struct i915_page_directory *pdp = ppgtt->pd;
1537         struct i915_page_directory *pd;
1538         u64 start = 0, length = ppgtt->vm.total;
1539         u64 from = start;
1540         unsigned int pdpe;
1541
1542         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1543                 pd = alloc_pd(vm);
1544                 if (IS_ERR(pd))
1545                         goto unwind;
1546
1547                 init_pd_with_page(vm, pd, vm->scratch_pt);
1548                 gen8_ppgtt_set_pdpe(pdp, pd, pdpe);
1549
1550                 atomic_inc(&pdp->used);
1551         }
1552
1553         atomic_inc(&pdp->used); /* never remove */
1554
1555         return 0;
1556
1557 unwind:
1558         start -= from;
1559         gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1560                 gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
1561                 free_pd(vm, pd);
1562         }
1563         atomic_set(&pdp->used, 0);
1564         return -ENOMEM;
1565 }
1566
1567 static void ppgtt_init(struct drm_i915_private *i915,
1568                        struct i915_ppgtt *ppgtt)
1569 {
1570         ppgtt->vm.i915 = i915;
1571         ppgtt->vm.dma = &i915->drm.pdev->dev;
1572         ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
1573
1574         i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
1575
1576         ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
1577         ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
1578         ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
1579         ppgtt->vm.vma_ops.clear_pages = clear_pages;
1580 }
1581
1582 /*
1583  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1584  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1585  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1586  * space.
1587  *
1588  */
1589 static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
1590 {
1591         struct i915_ppgtt *ppgtt;
1592         int err;
1593
1594         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1595         if (!ppgtt)
1596                 return ERR_PTR(-ENOMEM);
1597
1598         ppgtt_init(i915, ppgtt);
1599
1600         /*
1601          * From bdw, there is hw support for read-only pages in the PPGTT.
1602          *
1603          * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
1604          * for now.
1605          */
1606         ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11;
1607
1608         /* There are only few exceptions for gen >=6. chv and bxt.
1609          * And we are not sure about the latter so play safe for now.
1610          */
1611         if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
1612                 ppgtt->vm.pt_kmap_wc = true;
1613
1614         err = gen8_init_scratch(&ppgtt->vm);
1615         if (err)
1616                 goto err_free;
1617
1618         ppgtt->pd = __alloc_pd();
1619         if (!ppgtt->pd) {
1620                 err = -ENOMEM;
1621                 goto err_free_scratch;
1622         }
1623
1624         if (i915_vm_is_4lvl(&ppgtt->vm)) {
1625                 err = setup_px(&ppgtt->vm, ppgtt->pd);
1626                 if (err)
1627                         goto err_free_pdp;
1628
1629                 init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp);
1630
1631                 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1632                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
1633                 ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
1634         } else {
1635                 /*
1636                  * We don't need to setup dma for top level pdp, only
1637                  * for entries. So point entries to scratch.
1638                  */
1639                 memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd,
1640                          GEN8_3LVL_PDPES);
1641
1642                 if (intel_vgpu_active(i915)) {
1643                         err = gen8_preallocate_top_level_pdp(ppgtt);
1644                         if (err)
1645                                 goto err_free_pdp;
1646                 }
1647
1648                 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1649                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
1650                 ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
1651         }
1652
1653         if (intel_vgpu_active(i915))
1654                 gen8_ppgtt_notify_vgt(ppgtt, true);
1655
1656         ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1657
1658         return ppgtt;
1659
1660 err_free_pdp:
1661         free_pd(&ppgtt->vm, ppgtt->pd);
1662 err_free_scratch:
1663         gen8_free_scratch(&ppgtt->vm);
1664 err_free:
1665         kfree(ppgtt);
1666         return ERR_PTR(err);
1667 }
1668
1669 /* Write pde (index) from the page directory @pd to the page table @pt */
1670 static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
1671                                   const unsigned int pde,
1672                                   const struct i915_page_table *pt)
1673 {
1674         /* Caller needs to make sure the write completes if necessary */
1675         iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1676                   ppgtt->pd_addr + pde);
1677 }
1678
1679 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1680 {
1681         struct intel_engine_cs *engine;
1682         u32 ecochk, ecobits;
1683         enum intel_engine_id id;
1684
1685         ecobits = I915_READ(GAC_ECO_BITS);
1686         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1687
1688         ecochk = I915_READ(GAM_ECOCHK);
1689         if (IS_HASWELL(dev_priv)) {
1690                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1691         } else {
1692                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1693                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1694         }
1695         I915_WRITE(GAM_ECOCHK, ecochk);
1696
1697         for_each_engine(engine, dev_priv, id) {
1698                 /* GFX_MODE is per-ring on gen7+ */
1699                 ENGINE_WRITE(engine,
1700                              RING_MODE_GEN7,
1701                              _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1702         }
1703 }
1704
1705 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1706 {
1707         u32 ecochk, gab_ctl, ecobits;
1708
1709         ecobits = I915_READ(GAC_ECO_BITS);
1710         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1711                    ECOBITS_PPGTT_CACHE64B);
1712
1713         gab_ctl = I915_READ(GAB_CTL);
1714         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1715
1716         ecochk = I915_READ(GAM_ECOCHK);
1717         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1718
1719         if (HAS_PPGTT(dev_priv)) /* may be disabled for VT-d */
1720                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1721 }
1722
1723 /* PPGTT support for Sandybdrige/Gen6 and later */
1724 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1725                                    u64 start, u64 length)
1726 {
1727         struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1728         const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
1729         const gen6_pte_t scratch_pte = vm->scratch_pte;
1730         unsigned int pde = first_entry / GEN6_PTES;
1731         unsigned int pte = first_entry % GEN6_PTES;
1732         unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
1733
1734         while (num_entries) {
1735                 struct i915_page_table * const pt =
1736                         i915_pt_entry(ppgtt->base.pd, pde++);
1737                 const unsigned int count = min(num_entries, GEN6_PTES - pte);
1738                 gen6_pte_t *vaddr;
1739
1740                 GEM_BUG_ON(pt == vm->scratch_pt);
1741
1742                 num_entries -= count;
1743
1744                 GEM_BUG_ON(count > atomic_read(&pt->used));
1745                 if (!atomic_sub_return(count, &pt->used))
1746                         ppgtt->scan_for_unused_pt = true;
1747
1748                 /*
1749                  * Note that the hw doesn't support removing PDE on the fly
1750                  * (they are cached inside the context with no means to
1751                  * invalidate the cache), so we can only reset the PTE
1752                  * entries back to scratch.
1753                  */
1754
1755                 vaddr = kmap_atomic_px(pt);
1756                 memset32(vaddr + pte, scratch_pte, count);
1757                 kunmap_atomic(vaddr);
1758
1759                 pte = 0;
1760         }
1761 }
1762
1763 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1764                                       struct i915_vma *vma,
1765                                       enum i915_cache_level cache_level,
1766                                       u32 flags)
1767 {
1768         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1769         struct i915_page_directory * const pd = ppgtt->pd;
1770         unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
1771         unsigned act_pt = first_entry / GEN6_PTES;
1772         unsigned act_pte = first_entry % GEN6_PTES;
1773         const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1774         struct sgt_dma iter = sgt_dma(vma);
1775         gen6_pte_t *vaddr;
1776
1777         GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt);
1778
1779         vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
1780         do {
1781                 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1782
1783                 iter.dma += I915_GTT_PAGE_SIZE;
1784                 if (iter.dma == iter.max) {
1785                         iter.sg = __sg_next(iter.sg);
1786                         if (!iter.sg)
1787                                 break;
1788
1789                         iter.dma = sg_dma_address(iter.sg);
1790                         iter.max = iter.dma + iter.sg->length;
1791                 }
1792
1793                 if (++act_pte == GEN6_PTES) {
1794                         kunmap_atomic(vaddr);
1795                         vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
1796                         act_pte = 0;
1797                 }
1798         } while (1);
1799         kunmap_atomic(vaddr);
1800
1801         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1802 }
1803
1804 static int gen6_alloc_va_range(struct i915_address_space *vm,
1805                                u64 start, u64 length)
1806 {
1807         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1808         struct i915_page_directory * const pd = ppgtt->base.pd;
1809         struct i915_page_table *pt, *alloc = NULL;
1810         intel_wakeref_t wakeref;
1811         u64 from = start;
1812         unsigned int pde;
1813         bool flush = false;
1814         int ret = 0;
1815
1816         wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
1817
1818         spin_lock(&pd->lock);
1819         gen6_for_each_pde(pt, pd, start, length, pde) {
1820                 const unsigned int count = gen6_pte_count(start, length);
1821
1822                 if (pt == vm->scratch_pt) {
1823                         spin_unlock(&pd->lock);
1824
1825                         pt = fetch_and_zero(&alloc);
1826                         if (!pt)
1827                                 pt = alloc_pt(vm);
1828                         if (IS_ERR(pt)) {
1829                                 ret = PTR_ERR(pt);
1830                                 goto unwind_out;
1831                         }
1832
1833                         gen6_initialize_pt(vm, pt);
1834
1835                         spin_lock(&pd->lock);
1836                         if (pd->entry[pde] == vm->scratch_pt) {
1837                                 pd->entry[pde] = pt;
1838                                 if (i915_vma_is_bound(ppgtt->vma,
1839                                                       I915_VMA_GLOBAL_BIND)) {
1840                                         gen6_write_pde(ppgtt, pde, pt);
1841                                         flush = true;
1842                                 }
1843                         } else {
1844                                 alloc = pt;
1845                                 pt = pd->entry[pde];
1846                         }
1847                 }
1848
1849                 atomic_add(count, &pt->used);
1850         }
1851         spin_unlock(&pd->lock);
1852
1853         if (flush) {
1854                 mark_tlbs_dirty(&ppgtt->base);
1855                 gen6_ggtt_invalidate(vm->i915);
1856         }
1857
1858         goto out;
1859
1860 unwind_out:
1861         gen6_ppgtt_clear_range(vm, from, start - from);
1862 out:
1863         if (alloc)
1864                 free_pt(vm, alloc);
1865         intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
1866         return ret;
1867 }
1868
1869 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
1870 {
1871         struct i915_address_space * const vm = &ppgtt->base.vm;
1872         struct i915_page_directory * const pd = ppgtt->base.pd;
1873         struct i915_page_table *unused;
1874         u32 pde;
1875         int ret;
1876
1877         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1878         if (ret)
1879                 return ret;
1880
1881         vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1882                                          I915_CACHE_NONE,
1883                                          PTE_READ_ONLY);
1884
1885         vm->scratch_pt = alloc_pt(vm);
1886         if (IS_ERR(vm->scratch_pt)) {
1887                 cleanup_scratch_page(vm);
1888                 return PTR_ERR(vm->scratch_pt);
1889         }
1890
1891         gen6_initialize_pt(vm, vm->scratch_pt);
1892
1893         gen6_for_all_pdes(unused, pd, pde)
1894                 pd->entry[pde] = vm->scratch_pt;
1895
1896         return 0;
1897 }
1898
1899 static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
1900 {
1901         free_pt(vm, vm->scratch_pt);
1902         cleanup_scratch_page(vm);
1903 }
1904
1905 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
1906 {
1907         struct i915_page_directory * const pd = ppgtt->base.pd;
1908         struct i915_page_table *pt;
1909         u32 pde;
1910
1911         gen6_for_all_pdes(pt, pd, pde)
1912                 if (pt != ppgtt->base.vm.scratch_pt)
1913                         free_pt(&ppgtt->base.vm, pt);
1914 }
1915
1916 struct gen6_ppgtt_cleanup_work {
1917         struct work_struct base;
1918         struct i915_vma *vma;
1919 };
1920
1921 static void gen6_ppgtt_cleanup_work(struct work_struct *wrk)
1922 {
1923         struct gen6_ppgtt_cleanup_work *work =
1924                 container_of(wrk, typeof(*work), base);
1925         /* Side note, vma->vm is the GGTT not the ppgtt we just destroyed! */
1926         struct drm_i915_private *i915 = work->vma->vm->i915;
1927
1928         mutex_lock(&i915->drm.struct_mutex);
1929         i915_vma_destroy(work->vma);
1930         mutex_unlock(&i915->drm.struct_mutex);
1931
1932         kfree(work);
1933 }
1934
1935 static int nop_set_pages(struct i915_vma *vma)
1936 {
1937         return -ENODEV;
1938 }
1939
1940 static void nop_clear_pages(struct i915_vma *vma)
1941 {
1942 }
1943
1944 static int nop_bind(struct i915_vma *vma,
1945                     enum i915_cache_level cache_level,
1946                     u32 unused)
1947 {
1948         return -ENODEV;
1949 }
1950
1951 static void nop_unbind(struct i915_vma *vma)
1952 {
1953 }
1954
1955 static const struct i915_vma_ops nop_vma_ops = {
1956         .set_pages = nop_set_pages,
1957         .clear_pages = nop_clear_pages,
1958         .bind_vma = nop_bind,
1959         .unbind_vma = nop_unbind,
1960 };
1961
1962 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1963 {
1964         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1965         struct gen6_ppgtt_cleanup_work *work = ppgtt->work;
1966
1967         /* FIXME remove the struct_mutex to bring the locking under control */
1968         INIT_WORK(&work->base, gen6_ppgtt_cleanup_work);
1969         work->vma = ppgtt->vma;
1970         work->vma->ops = &nop_vma_ops;
1971         schedule_work(&work->base);
1972
1973         gen6_ppgtt_free_pd(ppgtt);
1974         gen6_ppgtt_free_scratch(vm);
1975         kfree(ppgtt->base.pd);
1976 }
1977
1978 static int pd_vma_set_pages(struct i915_vma *vma)
1979 {
1980         vma->pages = ERR_PTR(-ENODEV);
1981         return 0;
1982 }
1983
1984 static void pd_vma_clear_pages(struct i915_vma *vma)
1985 {
1986         GEM_BUG_ON(!vma->pages);
1987
1988         vma->pages = NULL;
1989 }
1990
1991 static int pd_vma_bind(struct i915_vma *vma,
1992                        enum i915_cache_level cache_level,
1993                        u32 unused)
1994 {
1995         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
1996         struct gen6_ppgtt *ppgtt = vma->private;
1997         u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
1998         struct i915_page_table *pt;
1999         unsigned int pde;
2000
2001         ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
2002         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
2003
2004         gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
2005                 gen6_write_pde(ppgtt, pde, pt);
2006
2007         mark_tlbs_dirty(&ppgtt->base);
2008         gen6_ggtt_invalidate(ppgtt->base.vm.i915);
2009
2010         return 0;
2011 }
2012
2013 static void pd_vma_unbind(struct i915_vma *vma)
2014 {
2015         struct gen6_ppgtt *ppgtt = vma->private;
2016         struct i915_page_directory * const pd = ppgtt->base.pd;
2017         struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
2018         struct i915_page_table *pt;
2019         unsigned int pde;
2020
2021         if (!ppgtt->scan_for_unused_pt)
2022                 return;
2023
2024         /* Free all no longer used page tables */
2025         gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
2026                 if (atomic_read(&pt->used) || pt == scratch_pt)
2027                         continue;
2028
2029                 free_pt(&ppgtt->base.vm, pt);
2030                 pd->entry[pde] = scratch_pt;
2031         }
2032
2033         ppgtt->scan_for_unused_pt = false;
2034 }
2035
2036 static const struct i915_vma_ops pd_vma_ops = {
2037         .set_pages = pd_vma_set_pages,
2038         .clear_pages = pd_vma_clear_pages,
2039         .bind_vma = pd_vma_bind,
2040         .unbind_vma = pd_vma_unbind,
2041 };
2042
2043 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
2044 {
2045         struct drm_i915_private *i915 = ppgtt->base.vm.i915;
2046         struct i915_ggtt *ggtt = &i915->ggtt;
2047         struct i915_vma *vma;
2048
2049         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
2050         GEM_BUG_ON(size > ggtt->vm.total);
2051
2052         vma = i915_vma_alloc();
2053         if (!vma)
2054                 return ERR_PTR(-ENOMEM);
2055
2056         i915_active_init(i915, &vma->active, NULL);
2057         INIT_ACTIVE_REQUEST(&vma->last_fence);
2058
2059         vma->vm = &ggtt->vm;
2060         vma->ops = &pd_vma_ops;
2061         vma->private = ppgtt;
2062
2063         vma->size = size;
2064         vma->fence_size = size;
2065         vma->flags = I915_VMA_GGTT;
2066         vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
2067
2068         INIT_LIST_HEAD(&vma->obj_link);
2069         INIT_LIST_HEAD(&vma->closed_link);
2070
2071         mutex_lock(&vma->vm->mutex);
2072         list_add(&vma->vm_link, &vma->vm->unbound_list);
2073         mutex_unlock(&vma->vm->mutex);
2074
2075         return vma;
2076 }
2077
2078 int gen6_ppgtt_pin(struct i915_ppgtt *base)
2079 {
2080         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
2081         int err;
2082
2083         GEM_BUG_ON(ppgtt->base.vm.closed);
2084
2085         /*
2086          * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
2087          * which will be pinned into every active context.
2088          * (When vma->pin_count becomes atomic, I expect we will naturally
2089          * need a larger, unpacked, type and kill this redundancy.)
2090          */
2091         if (ppgtt->pin_count++)
2092                 return 0;
2093
2094         /*
2095          * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2096          * allocator works in address space sizes, so it's multiplied by page
2097          * size. We allocate at the top of the GTT to avoid fragmentation.
2098          */
2099         err = i915_vma_pin(ppgtt->vma,
2100                            0, GEN6_PD_ALIGN,
2101                            PIN_GLOBAL | PIN_HIGH);
2102         if (err)
2103                 goto unpin;
2104
2105         return 0;
2106
2107 unpin:
2108         ppgtt->pin_count = 0;
2109         return err;
2110 }
2111
2112 void gen6_ppgtt_unpin(struct i915_ppgtt *base)
2113 {
2114         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
2115
2116         GEM_BUG_ON(!ppgtt->pin_count);
2117         if (--ppgtt->pin_count)
2118                 return;
2119
2120         i915_vma_unpin(ppgtt->vma);
2121 }
2122
2123 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
2124 {
2125         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
2126
2127         if (!ppgtt->pin_count)
2128                 return;
2129
2130         ppgtt->pin_count = 0;
2131         i915_vma_unpin(ppgtt->vma);
2132 }
2133
2134 static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
2135 {
2136         struct i915_ggtt * const ggtt = &i915->ggtt;
2137         struct gen6_ppgtt *ppgtt;
2138         int err;
2139
2140         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2141         if (!ppgtt)
2142                 return ERR_PTR(-ENOMEM);
2143
2144         ppgtt_init(i915, &ppgtt->base);
2145
2146         ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
2147         ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
2148         ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
2149         ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
2150
2151         ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
2152
2153         ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL);
2154         if (!ppgtt->work) {
2155                 err = -ENOMEM;
2156                 goto err_free;
2157         }
2158
2159         ppgtt->base.pd = __alloc_pd();
2160         if (!ppgtt->base.pd) {
2161                 err = -ENOMEM;
2162                 goto err_work;
2163         }
2164
2165         err = gen6_ppgtt_init_scratch(ppgtt);
2166         if (err)
2167                 goto err_pd;
2168
2169         ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
2170         if (IS_ERR(ppgtt->vma)) {
2171                 err = PTR_ERR(ppgtt->vma);
2172                 goto err_scratch;
2173         }
2174
2175         return &ppgtt->base;
2176
2177 err_scratch:
2178         gen6_ppgtt_free_scratch(&ppgtt->base.vm);
2179 err_pd:
2180         kfree(ppgtt->base.pd);
2181 err_work:
2182         kfree(ppgtt->work);
2183 err_free:
2184         kfree(ppgtt);
2185         return ERR_PTR(err);
2186 }
2187
2188 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2189 {
2190         /* This function is for gtt related workarounds. This function is
2191          * called on driver load and after a GPU reset, so you can place
2192          * workarounds here even if they get overwritten by GPU reset.
2193          */
2194         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
2195         if (IS_BROADWELL(dev_priv))
2196                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2197         else if (IS_CHERRYVIEW(dev_priv))
2198                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2199         else if (IS_GEN9_LP(dev_priv))
2200                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2201         else if (INTEL_GEN(dev_priv) >= 9)
2202                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2203
2204         /*
2205          * To support 64K PTEs we need to first enable the use of the
2206          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2207          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2208          * shouldn't be needed after GEN10.
2209          *
2210          * 64K pages were first introduced from BDW+, although technically they
2211          * only *work* from gen9+. For pre-BDW we instead have the option for
2212          * 32K pages, but we don't currently have any support for it in our
2213          * driver.
2214          */
2215         if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2216             INTEL_GEN(dev_priv) <= 10)
2217                 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2218                            I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2219                            GAMW_ECO_ENABLE_64K_IPS_FIELD);
2220 }
2221
2222 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2223 {
2224         gtt_write_workarounds(dev_priv);
2225
2226         if (IS_GEN(dev_priv, 6))
2227                 gen6_ppgtt_enable(dev_priv);
2228         else if (IS_GEN(dev_priv, 7))
2229                 gen7_ppgtt_enable(dev_priv);
2230
2231         return 0;
2232 }
2233
2234 static struct i915_ppgtt *
2235 __ppgtt_create(struct drm_i915_private *i915)
2236 {
2237         if (INTEL_GEN(i915) < 8)
2238                 return gen6_ppgtt_create(i915);
2239         else
2240                 return gen8_ppgtt_create(i915);
2241 }
2242
2243 struct i915_ppgtt *
2244 i915_ppgtt_create(struct drm_i915_private *i915)
2245 {
2246         struct i915_ppgtt *ppgtt;
2247
2248         ppgtt = __ppgtt_create(i915);
2249         if (IS_ERR(ppgtt))
2250                 return ppgtt;
2251
2252         trace_i915_ppgtt_create(&ppgtt->vm);
2253
2254         return ppgtt;
2255 }
2256
2257 static void ppgtt_destroy_vma(struct i915_address_space *vm)
2258 {
2259         struct list_head *phases[] = {
2260                 &vm->bound_list,
2261                 &vm->unbound_list,
2262                 NULL,
2263         }, **phase;
2264
2265         vm->closed = true;
2266         for (phase = phases; *phase; phase++) {
2267                 struct i915_vma *vma, *vn;
2268
2269                 list_for_each_entry_safe(vma, vn, *phase, vm_link)
2270                         i915_vma_destroy(vma);
2271         }
2272 }
2273
2274 void i915_vm_release(struct kref *kref)
2275 {
2276         struct i915_address_space *vm =
2277                 container_of(kref, struct i915_address_space, ref);
2278
2279         GEM_BUG_ON(i915_is_ggtt(vm));
2280         trace_i915_ppgtt_release(vm);
2281
2282         ppgtt_destroy_vma(vm);
2283
2284         GEM_BUG_ON(!list_empty(&vm->bound_list));
2285         GEM_BUG_ON(!list_empty(&vm->unbound_list));
2286
2287         vm->cleanup(vm);
2288         i915_address_space_fini(vm);
2289
2290         kfree(vm);
2291 }
2292
2293 /* Certain Gen5 chipsets require require idling the GPU before
2294  * unmapping anything from the GTT when VT-d is enabled.
2295  */
2296 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2297 {
2298         /* Query intel_iommu to see if we need the workaround. Presumably that
2299          * was loaded first.
2300          */
2301         return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
2302 }
2303
2304 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2305 {
2306         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2307
2308         /* Don't bother messing with faults pre GEN6 as we have little
2309          * documentation supporting that it's a good idea.
2310          */
2311         if (INTEL_GEN(dev_priv) < 6)
2312                 return;
2313
2314         i915_check_and_clear_faults(dev_priv);
2315
2316         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
2317
2318         i915_ggtt_invalidate(dev_priv);
2319 }
2320
2321 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2322                                struct sg_table *pages)
2323 {
2324         do {
2325                 if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
2326                                      pages->sgl, pages->nents,
2327                                      PCI_DMA_BIDIRECTIONAL,
2328                                      DMA_ATTR_NO_WARN))
2329                         return 0;
2330
2331                 /*
2332                  * If the DMA remap fails, one cause can be that we have
2333                  * too many objects pinned in a small remapping table,
2334                  * such as swiotlb. Incrementally purge all other objects and
2335                  * try again - if there are no more pages to remove from
2336                  * the DMA remapper, i915_gem_shrink will return 0.
2337                  */
2338                 GEM_BUG_ON(obj->mm.pages == pages);
2339         } while (i915_gem_shrink(to_i915(obj->base.dev),
2340                                  obj->base.size >> PAGE_SHIFT, NULL,
2341                                  I915_SHRINK_BOUND |
2342                                  I915_SHRINK_UNBOUND));
2343
2344         return -ENOSPC;
2345 }
2346
2347 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2348 {
2349         writeq(pte, addr);
2350 }
2351
2352 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2353                                   dma_addr_t addr,
2354                                   u64 offset,
2355                                   enum i915_cache_level level,
2356                                   u32 unused)
2357 {
2358         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2359         gen8_pte_t __iomem *pte =
2360                 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
2361
2362         gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
2363
2364         ggtt->invalidate(vm->i915);
2365 }
2366
2367 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2368                                      struct i915_vma *vma,
2369                                      enum i915_cache_level level,
2370                                      u32 flags)
2371 {
2372         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2373         struct sgt_iter sgt_iter;
2374         gen8_pte_t __iomem *gtt_entries;
2375         const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
2376         dma_addr_t addr;
2377
2378         /*
2379          * Note that we ignore PTE_READ_ONLY here. The caller must be careful
2380          * not to allow the user to override access to a read only page.
2381          */
2382
2383         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2384         gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
2385         for_each_sgt_dma(addr, sgt_iter, vma->pages)
2386                 gen8_set_pte(gtt_entries++, pte_encode | addr);
2387
2388         /*
2389          * We want to flush the TLBs only after we're certain all the PTE
2390          * updates have finished.
2391          */
2392         ggtt->invalidate(vm->i915);
2393 }
2394
2395 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2396                                   dma_addr_t addr,
2397                                   u64 offset,
2398                                   enum i915_cache_level level,
2399                                   u32 flags)
2400 {
2401         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2402         gen6_pte_t __iomem *pte =
2403                 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
2404
2405         iowrite32(vm->pte_encode(addr, level, flags), pte);
2406
2407         ggtt->invalidate(vm->i915);
2408 }
2409
2410 /*
2411  * Binds an object into the global gtt with the specified cache level. The object
2412  * will be accessible to the GPU via commands whose operands reference offsets
2413  * within the global GTT as well as accessible by the GPU through the GMADR
2414  * mapped BAR (dev_priv->mm.gtt->gtt).
2415  */
2416 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2417                                      struct i915_vma *vma,
2418                                      enum i915_cache_level level,
2419                                      u32 flags)
2420 {
2421         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2422         gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2423         unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
2424         struct sgt_iter iter;
2425         dma_addr_t addr;
2426         for_each_sgt_dma(addr, iter, vma->pages)
2427                 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2428
2429         /*
2430          * We want to flush the TLBs only after we're certain all the PTE
2431          * updates have finished.
2432          */
2433         ggtt->invalidate(vm->i915);
2434 }
2435
2436 static void nop_clear_range(struct i915_address_space *vm,
2437                             u64 start, u64 length)
2438 {
2439 }
2440
2441 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2442                                   u64 start, u64 length)
2443 {
2444         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2445         unsigned first_entry = start / I915_GTT_PAGE_SIZE;
2446         unsigned num_entries = length / I915_GTT_PAGE_SIZE;
2447         const gen8_pte_t scratch_pte = vm->scratch_pte;
2448         gen8_pte_t __iomem *gtt_base =
2449                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2450         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2451         int i;
2452
2453         if (WARN(num_entries > max_entries,
2454                  "First entry = %d; Num entries = %d (max=%d)\n",
2455                  first_entry, num_entries, max_entries))
2456                 num_entries = max_entries;
2457
2458         for (i = 0; i < num_entries; i++)
2459                 gen8_set_pte(&gtt_base[i], scratch_pte);
2460 }
2461
2462 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2463 {
2464         struct drm_i915_private *dev_priv = vm->i915;
2465
2466         /*
2467          * Make sure the internal GAM fifo has been cleared of all GTT
2468          * writes before exiting stop_machine(). This guarantees that
2469          * any aperture accesses waiting to start in another process
2470          * cannot back up behind the GTT writes causing a hang.
2471          * The register can be any arbitrary GAM register.
2472          */
2473         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2474 }
2475
2476 struct insert_page {
2477         struct i915_address_space *vm;
2478         dma_addr_t addr;
2479         u64 offset;
2480         enum i915_cache_level level;
2481 };
2482
2483 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2484 {
2485         struct insert_page *arg = _arg;
2486
2487         gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2488         bxt_vtd_ggtt_wa(arg->vm);
2489
2490         return 0;
2491 }
2492
2493 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2494                                           dma_addr_t addr,
2495                                           u64 offset,
2496                                           enum i915_cache_level level,
2497                                           u32 unused)
2498 {
2499         struct insert_page arg = { vm, addr, offset, level };
2500
2501         stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2502 }
2503
2504 struct insert_entries {
2505         struct i915_address_space *vm;
2506         struct i915_vma *vma;
2507         enum i915_cache_level level;
2508         u32 flags;
2509 };
2510
2511 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2512 {
2513         struct insert_entries *arg = _arg;
2514
2515         gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
2516         bxt_vtd_ggtt_wa(arg->vm);
2517
2518         return 0;
2519 }
2520
2521 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2522                                              struct i915_vma *vma,
2523                                              enum i915_cache_level level,
2524                                              u32 flags)
2525 {
2526         struct insert_entries arg = { vm, vma, level, flags };
2527
2528         stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2529 }
2530
2531 struct clear_range {
2532         struct i915_address_space *vm;
2533         u64 start;
2534         u64 length;
2535 };
2536
2537 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2538 {
2539         struct clear_range *arg = _arg;
2540
2541         gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2542         bxt_vtd_ggtt_wa(arg->vm);
2543
2544         return 0;
2545 }
2546
2547 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2548                                           u64 start,
2549                                           u64 length)
2550 {
2551         struct clear_range arg = { vm, start, length };
2552
2553         stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2554 }
2555
2556 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2557                                   u64 start, u64 length)
2558 {
2559         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2560         unsigned first_entry = start / I915_GTT_PAGE_SIZE;
2561         unsigned num_entries = length / I915_GTT_PAGE_SIZE;
2562         gen6_pte_t scratch_pte, __iomem *gtt_base =
2563                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2564         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2565         int i;
2566
2567         if (WARN(num_entries > max_entries,
2568                  "First entry = %d; Num entries = %d (max=%d)\n",
2569                  first_entry, num_entries, max_entries))
2570                 num_entries = max_entries;
2571
2572         scratch_pte = vm->scratch_pte;
2573
2574         for (i = 0; i < num_entries; i++)
2575                 iowrite32(scratch_pte, &gtt_base[i]);
2576 }
2577
2578 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2579                                   dma_addr_t addr,
2580                                   u64 offset,
2581                                   enum i915_cache_level cache_level,
2582                                   u32 unused)
2583 {
2584         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2585                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2586
2587         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2588 }
2589
2590 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2591                                      struct i915_vma *vma,
2592                                      enum i915_cache_level cache_level,
2593                                      u32 unused)
2594 {
2595         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2596                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2597
2598         intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2599                                     flags);
2600 }
2601
2602 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2603                                   u64 start, u64 length)
2604 {
2605         intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2606 }
2607
2608 static int ggtt_bind_vma(struct i915_vma *vma,
2609                          enum i915_cache_level cache_level,
2610                          u32 flags)
2611 {
2612         struct drm_i915_private *i915 = vma->vm->i915;
2613         struct drm_i915_gem_object *obj = vma->obj;
2614         intel_wakeref_t wakeref;
2615         u32 pte_flags;
2616
2617         /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
2618         pte_flags = 0;
2619         if (i915_gem_object_is_readonly(obj))
2620                 pte_flags |= PTE_READ_ONLY;
2621
2622         with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2623                 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2624
2625         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2626
2627         /*
2628          * Without aliasing PPGTT there's no difference between
2629          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2630          * upgrade to both bound if we bind either to avoid double-binding.
2631          */
2632         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2633
2634         return 0;
2635 }
2636
2637 static void ggtt_unbind_vma(struct i915_vma *vma)
2638 {
2639         struct drm_i915_private *i915 = vma->vm->i915;
2640         intel_wakeref_t wakeref;
2641
2642         with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2643                 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2644 }
2645
2646 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2647                                  enum i915_cache_level cache_level,
2648                                  u32 flags)
2649 {
2650         struct drm_i915_private *i915 = vma->vm->i915;
2651         u32 pte_flags;
2652         int ret;
2653
2654         /* Currently applicable only to VLV */
2655         pte_flags = 0;
2656         if (i915_gem_object_is_readonly(vma->obj))
2657                 pte_flags |= PTE_READ_ONLY;
2658
2659         if (flags & I915_VMA_LOCAL_BIND) {
2660                 struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2661
2662                 if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
2663                         ret = appgtt->vm.allocate_va_range(&appgtt->vm,
2664                                                            vma->node.start,
2665                                                            vma->size);
2666                         if (ret)
2667                                 return ret;
2668                 }
2669
2670                 appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
2671                                           pte_flags);
2672         }
2673
2674         if (flags & I915_VMA_GLOBAL_BIND) {
2675                 intel_wakeref_t wakeref;
2676
2677                 with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2678                         vma->vm->insert_entries(vma->vm, vma,
2679                                                 cache_level, pte_flags);
2680                 }
2681         }
2682
2683         return 0;
2684 }
2685
2686 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2687 {
2688         struct drm_i915_private *i915 = vma->vm->i915;
2689
2690         if (vma->flags & I915_VMA_GLOBAL_BIND) {
2691                 struct i915_address_space *vm = vma->vm;
2692                 intel_wakeref_t wakeref;
2693
2694                 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2695                         vm->clear_range(vm, vma->node.start, vma->size);
2696         }
2697
2698         if (vma->flags & I915_VMA_LOCAL_BIND) {
2699                 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
2700
2701                 vm->clear_range(vm, vma->node.start, vma->size);
2702         }
2703 }
2704
2705 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2706                                struct sg_table *pages)
2707 {
2708         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2709         struct device *kdev = &dev_priv->drm.pdev->dev;
2710         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2711
2712         if (unlikely(ggtt->do_idle_maps)) {
2713                 if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
2714                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2715                         /* Wait a bit, in hopes it avoids the hang */
2716                         udelay(10);
2717                 }
2718         }
2719
2720         dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2721 }
2722
2723 static int ggtt_set_pages(struct i915_vma *vma)
2724 {
2725         int ret;
2726
2727         GEM_BUG_ON(vma->pages);
2728
2729         ret = i915_get_ggtt_vma_pages(vma);
2730         if (ret)
2731                 return ret;
2732
2733         vma->page_sizes = vma->obj->mm.page_sizes;
2734
2735         return 0;
2736 }
2737
2738 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2739                                   unsigned long color,
2740                                   u64 *start,
2741                                   u64 *end)
2742 {
2743         if (node->allocated && node->color != color)
2744                 *start += I915_GTT_PAGE_SIZE;
2745
2746         /* Also leave a space between the unallocated reserved node after the
2747          * GTT and any objects within the GTT, i.e. we use the color adjustment
2748          * to insert a guard page to prevent prefetches crossing over the
2749          * GTT boundary.
2750          */
2751         node = list_next_entry(node, node_list);
2752         if (node->color != color)
2753                 *end -= I915_GTT_PAGE_SIZE;
2754 }
2755
2756 static int init_aliasing_ppgtt(struct drm_i915_private *i915)
2757 {
2758         struct i915_ggtt *ggtt = &i915->ggtt;
2759         struct i915_ppgtt *ppgtt;
2760         int err;
2761
2762         ppgtt = i915_ppgtt_create(i915);
2763         if (IS_ERR(ppgtt))
2764                 return PTR_ERR(ppgtt);
2765
2766         if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
2767                 err = -ENODEV;
2768                 goto err_ppgtt;
2769         }
2770
2771         /*
2772          * Note we only pre-allocate as far as the end of the global
2773          * GTT. On 48b / 4-level page-tables, the difference is very,
2774          * very significant! We have to preallocate as GVT/vgpu does
2775          * not like the page directory disappearing.
2776          */
2777         err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
2778         if (err)
2779                 goto err_ppgtt;
2780
2781         i915->mm.aliasing_ppgtt = ppgtt;
2782
2783         GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
2784         ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
2785
2786         GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
2787         ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
2788
2789         return 0;
2790
2791 err_ppgtt:
2792         i915_vm_put(&ppgtt->vm);
2793         return err;
2794 }
2795
2796 static void fini_aliasing_ppgtt(struct drm_i915_private *i915)
2797 {
2798         struct i915_ggtt *ggtt = &i915->ggtt;
2799         struct i915_ppgtt *ppgtt;
2800
2801         ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2802         if (!ppgtt)
2803                 return;
2804
2805         i915_vm_put(&ppgtt->vm);
2806
2807         ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
2808         ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
2809 }
2810
2811 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
2812 {
2813         u64 size;
2814         int ret;
2815
2816         if (!USES_GUC(ggtt->vm.i915))
2817                 return 0;
2818
2819         GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
2820         size = ggtt->vm.total - GUC_GGTT_TOP;
2821
2822         ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
2823                                    GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
2824                                    PIN_NOEVICT);
2825         if (ret)
2826                 DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
2827
2828         return ret;
2829 }
2830
2831 static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
2832 {
2833         if (drm_mm_node_allocated(&ggtt->uc_fw))
2834                 drm_mm_remove_node(&ggtt->uc_fw);
2835 }
2836
2837 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2838 {
2839         /* Let GEM Manage all of the aperture.
2840          *
2841          * However, leave one page at the end still bound to the scratch page.
2842          * There are a number of places where the hardware apparently prefetches
2843          * past the end of the object, and we've seen multiple hangs with the
2844          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2845          * aperture.  One page should be enough to keep any prefetching inside
2846          * of the aperture.
2847          */
2848         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2849         unsigned long hole_start, hole_end;
2850         struct drm_mm_node *entry;
2851         int ret;
2852
2853         /*
2854          * GuC requires all resources that we're sharing with it to be placed in
2855          * non-WOPCM memory. If GuC is not present or not in use we still need a
2856          * small bias as ring wraparound at offset 0 sometimes hangs. No idea
2857          * why.
2858          */
2859         ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
2860                                intel_wopcm_guc_size(&dev_priv->wopcm));
2861
2862         ret = intel_vgt_balloon(dev_priv);
2863         if (ret)
2864                 return ret;
2865
2866         /* Reserve a mappable slot for our lockless error capture */
2867         ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
2868                                           PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2869                                           0, ggtt->mappable_end,
2870                                           DRM_MM_INSERT_LOW);
2871         if (ret)
2872                 return ret;
2873
2874         /*
2875          * The upper portion of the GuC address space has a sizeable hole
2876          * (several MB) that is inaccessible by GuC. Reserve this range within
2877          * GGTT as it can comfortably hold GuC/HuC firmware images.
2878          */
2879         ret = ggtt_reserve_guc_top(ggtt);
2880         if (ret)
2881                 goto err_reserve;
2882
2883         /* Clear any non-preallocated blocks */
2884         drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
2885                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2886                               hole_start, hole_end);
2887                 ggtt->vm.clear_range(&ggtt->vm, hole_start,
2888                                      hole_end - hole_start);
2889         }
2890
2891         /* And finally clear the reserved guard page */
2892         ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
2893
2894         if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) {
2895                 ret = init_aliasing_ppgtt(dev_priv);
2896                 if (ret)
2897                         goto err_appgtt;
2898         }
2899
2900         return 0;
2901
2902 err_appgtt:
2903         ggtt_release_guc_top(ggtt);
2904 err_reserve:
2905         drm_mm_remove_node(&ggtt->error_capture);
2906         return ret;
2907 }
2908
2909 /**
2910  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2911  * @dev_priv: i915 device
2912  */
2913 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2914 {
2915         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2916         struct i915_vma *vma, *vn;
2917         struct pagevec *pvec;
2918
2919         ggtt->vm.closed = true;
2920
2921         mutex_lock(&dev_priv->drm.struct_mutex);
2922         fini_aliasing_ppgtt(dev_priv);
2923
2924         list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
2925                 WARN_ON(i915_vma_unbind(vma));
2926
2927         if (drm_mm_node_allocated(&ggtt->error_capture))
2928                 drm_mm_remove_node(&ggtt->error_capture);
2929
2930         ggtt_release_guc_top(ggtt);
2931
2932         if (drm_mm_initialized(&ggtt->vm.mm)) {
2933                 intel_vgt_deballoon(dev_priv);
2934                 i915_address_space_fini(&ggtt->vm);
2935         }
2936
2937         ggtt->vm.cleanup(&ggtt->vm);
2938
2939         pvec = &dev_priv->mm.wc_stash.pvec;
2940         if (pvec->nr) {
2941                 set_pages_array_wb(pvec->pages, pvec->nr);
2942                 __pagevec_release(pvec);
2943         }
2944
2945         mutex_unlock(&dev_priv->drm.struct_mutex);
2946
2947         arch_phys_wc_del(ggtt->mtrr);
2948         io_mapping_fini(&ggtt->iomap);
2949
2950         i915_gem_cleanup_stolen(dev_priv);
2951 }
2952
2953 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2954 {
2955         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2956         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2957         return snb_gmch_ctl << 20;
2958 }
2959
2960 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2961 {
2962         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2963         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2964         if (bdw_gmch_ctl)
2965                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2966
2967 #ifdef CONFIG_X86_32
2968         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
2969         if (bdw_gmch_ctl > 4)
2970                 bdw_gmch_ctl = 4;
2971 #endif
2972
2973         return bdw_gmch_ctl << 20;
2974 }
2975
2976 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2977 {
2978         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2979         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2980
2981         if (gmch_ctrl)
2982                 return 1 << (20 + gmch_ctrl);
2983
2984         return 0;
2985 }
2986
2987 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2988 {
2989         struct drm_i915_private *dev_priv = ggtt->vm.i915;
2990         struct pci_dev *pdev = dev_priv->drm.pdev;
2991         phys_addr_t phys_addr;
2992         int ret;
2993
2994         /* For Modern GENs the PTEs and register space are split in the BAR */
2995         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2996
2997         /*
2998          * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
2999          * will be dropped. For WC mappings in general we have 64 byte burst
3000          * writes when the WC buffer is flushed, so we can't use it, but have to
3001          * resort to an uncached mapping. The WC issue is easily caught by the
3002          * readback check when writing GTT PTE entries.
3003          */
3004         if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3005                 ggtt->gsm = ioremap_nocache(phys_addr, size);
3006         else
3007                 ggtt->gsm = ioremap_wc(phys_addr, size);
3008         if (!ggtt->gsm) {
3009                 DRM_ERROR("Failed to map the ggtt page table\n");
3010                 return -ENOMEM;
3011         }
3012
3013         ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
3014         if (ret) {
3015                 DRM_ERROR("Scratch setup failed\n");
3016                 /* iounmap will also get called at remove, but meh */
3017                 iounmap(ggtt->gsm);
3018                 return ret;
3019         }
3020
3021         ggtt->vm.scratch_pte =
3022                 ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
3023                                     I915_CACHE_NONE, 0);
3024
3025         return 0;
3026 }
3027
3028 static struct intel_ppat_entry *
3029 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3030 {
3031         struct intel_ppat_entry *entry = &ppat->entries[index];
3032
3033         GEM_BUG_ON(index >= ppat->max_entries);
3034         GEM_BUG_ON(test_bit(index, ppat->used));
3035
3036         entry->ppat = ppat;
3037         entry->value = value;
3038         kref_init(&entry->ref);
3039         set_bit(index, ppat->used);
3040         set_bit(index, ppat->dirty);
3041
3042         return entry;
3043 }
3044
3045 static void __free_ppat_entry(struct intel_ppat_entry *entry)
3046 {
3047         struct intel_ppat *ppat = entry->ppat;
3048         unsigned int index = entry - ppat->entries;
3049
3050         GEM_BUG_ON(index >= ppat->max_entries);
3051         GEM_BUG_ON(!test_bit(index, ppat->used));
3052
3053         entry->value = ppat->clear_value;
3054         clear_bit(index, ppat->used);
3055         set_bit(index, ppat->dirty);
3056 }
3057
3058 /**
3059  * intel_ppat_get - get a usable PPAT entry
3060  * @i915: i915 device instance
3061  * @value: the PPAT value required by the caller
3062  *
3063  * The function tries to search if there is an existing PPAT entry which
3064  * matches with the required value. If perfectly matched, the existing PPAT
3065  * entry will be used. If only partially matched, it will try to check if
3066  * there is any available PPAT index. If yes, it will allocate a new PPAT
3067  * index for the required entry and update the HW. If not, the partially
3068  * matched entry will be used.
3069  */
3070 const struct intel_ppat_entry *
3071 intel_ppat_get(struct drm_i915_private *i915, u8 value)
3072 {
3073         struct intel_ppat *ppat = &i915->ppat;
3074         struct intel_ppat_entry *entry = NULL;
3075         unsigned int scanned, best_score;
3076         int i;
3077
3078         GEM_BUG_ON(!ppat->max_entries);
3079
3080         scanned = best_score = 0;
3081         for_each_set_bit(i, ppat->used, ppat->max_entries) {
3082                 unsigned int score;
3083
3084                 score = ppat->match(ppat->entries[i].value, value);
3085                 if (score > best_score) {
3086                         entry = &ppat->entries[i];
3087                         if (score == INTEL_PPAT_PERFECT_MATCH) {
3088                                 kref_get(&entry->ref);
3089                                 return entry;
3090                         }
3091                         best_score = score;
3092                 }
3093                 scanned++;
3094         }
3095
3096         if (scanned == ppat->max_entries) {
3097                 if (!entry)
3098                         return ERR_PTR(-ENOSPC);
3099
3100                 kref_get(&entry->ref);
3101                 return entry;
3102         }
3103
3104         i = find_first_zero_bit(ppat->used, ppat->max_entries);
3105         entry = __alloc_ppat_entry(ppat, i, value);
3106         ppat->update_hw(i915);
3107         return entry;
3108 }
3109
3110 static void release_ppat(struct kref *kref)
3111 {
3112         struct intel_ppat_entry *entry =
3113                 container_of(kref, struct intel_ppat_entry, ref);
3114         struct drm_i915_private *i915 = entry->ppat->i915;
3115
3116         __free_ppat_entry(entry);
3117         entry->ppat->update_hw(i915);
3118 }
3119
3120 /**
3121  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3122  * @entry: an intel PPAT entry
3123  *
3124  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3125  * entry is dynamically allocated, its reference count will be decreased. Once
3126  * the reference count becomes into zero, the PPAT index becomes free again.
3127  */
3128 void intel_ppat_put(const struct intel_ppat_entry *entry)
3129 {
3130         struct intel_ppat *ppat = entry->ppat;
3131         unsigned int index = entry - ppat->entries;
3132
3133         GEM_BUG_ON(!ppat->max_entries);
3134
3135         kref_put(&ppat->entries[index].ref, release_ppat);
3136 }
3137
3138 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3139 {
3140         struct intel_ppat *ppat = &dev_priv->ppat;
3141         int i;
3142
3143         for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3144                 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3145                 clear_bit(i, ppat->dirty);
3146         }
3147 }
3148
3149 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3150 {
3151         struct intel_ppat *ppat = &dev_priv->ppat;
3152         u64 pat = 0;
3153         int i;
3154
3155         for (i = 0; i < ppat->max_entries; i++)
3156                 pat |= GEN8_PPAT(i, ppat->entries[i].value);
3157
3158         bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3159
3160         I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3161         I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3162 }
3163
3164 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3165 {
3166         unsigned int score = 0;
3167         enum {
3168                 AGE_MATCH = BIT(0),
3169                 TC_MATCH = BIT(1),
3170                 CA_MATCH = BIT(2),
3171         };
3172
3173         /* Cache attribute has to be matched. */
3174         if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3175                 return 0;
3176
3177         score |= CA_MATCH;
3178
3179         if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3180                 score |= TC_MATCH;
3181
3182         if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3183                 score |= AGE_MATCH;
3184
3185         if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3186                 return INTEL_PPAT_PERFECT_MATCH;
3187
3188         return score;
3189 }
3190
3191 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3192 {
3193         return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3194                 INTEL_PPAT_PERFECT_MATCH : 0;
3195 }
3196
3197 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3198 {
3199         ppat->max_entries = 8;
3200         ppat->update_hw = cnl_private_pat_update_hw;
3201         ppat->match = bdw_private_pat_match;
3202         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3203
3204         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3205         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3206         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3207         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3208         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3209         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3210         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3211         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3212 }
3213
3214 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3215  * bits. When using advanced contexts each context stores its own PAT, but
3216  * writing this data shouldn't be harmful even in those cases. */
3217 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3218 {
3219         ppat->max_entries = 8;
3220         ppat->update_hw = bdw_private_pat_update_hw;
3221         ppat->match = bdw_private_pat_match;
3222         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3223
3224         if (!HAS_PPGTT(ppat->i915)) {
3225                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3226                  * so RTL will always use the value corresponding to
3227                  * pat_sel = 000".
3228                  * So let's disable cache for GGTT to avoid screen corruptions.
3229                  * MOCS still can be used though.
3230                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3231                  * before this patch, i.e. the same uncached + snooping access
3232                  * like on gen6/7 seems to be in effect.
3233                  * - So this just fixes blitter/render access. Again it looks
3234                  * like it's not just uncached access, but uncached + snooping.
3235                  * So we can still hold onto all our assumptions wrt cpu
3236                  * clflushing on LLC machines.
3237                  */
3238                 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3239                 return;
3240         }
3241
3242         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3243         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3244         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3245         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3246         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3247         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3248         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3249         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3250 }
3251
3252 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3253 {
3254         ppat->max_entries = 8;
3255         ppat->update_hw = bdw_private_pat_update_hw;
3256         ppat->match = chv_private_pat_match;
3257         ppat->clear_value = CHV_PPAT_SNOOP;
3258
3259         /*
3260          * Map WB on BDW to snooped on CHV.
3261          *
3262          * Only the snoop bit has meaning for CHV, the rest is
3263          * ignored.
3264          *
3265          * The hardware will never snoop for certain types of accesses:
3266          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3267          * - PPGTT page tables
3268          * - some other special cycles
3269          *
3270          * As with BDW, we also need to consider the following for GT accesses:
3271          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3272          * so RTL will always use the value corresponding to
3273          * pat_sel = 000".
3274          * Which means we must set the snoop bit in PAT entry 0
3275          * in order to keep the global status page working.
3276          */
3277
3278         __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3279         __alloc_ppat_entry(ppat, 1, 0);
3280         __alloc_ppat_entry(ppat, 2, 0);
3281         __alloc_ppat_entry(ppat, 3, 0);
3282         __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3283         __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3284         __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3285         __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3286 }
3287
3288 static void gen6_gmch_remove(struct i915_address_space *vm)
3289 {
3290         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3291
3292         iounmap(ggtt->gsm);
3293         cleanup_scratch_page(vm);
3294 }
3295
3296 static void setup_private_pat(struct drm_i915_private *dev_priv)
3297 {
3298         struct intel_ppat *ppat = &dev_priv->ppat;
3299         int i;
3300
3301         ppat->i915 = dev_priv;
3302
3303         if (INTEL_GEN(dev_priv) >= 10)
3304                 cnl_setup_private_ppat(ppat);
3305         else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3306                 chv_setup_private_ppat(ppat);
3307         else
3308                 bdw_setup_private_ppat(ppat);
3309
3310         GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3311
3312         for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3313                 ppat->entries[i].value = ppat->clear_value;
3314                 ppat->entries[i].ppat = ppat;
3315                 set_bit(i, ppat->dirty);
3316         }
3317
3318         ppat->update_hw(dev_priv);
3319 }
3320
3321 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3322 {
3323         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3324         struct pci_dev *pdev = dev_priv->drm.pdev;
3325         unsigned int size;
3326         u16 snb_gmch_ctl;
3327         int err;
3328
3329         /* TODO: We're not aware of mappable constraints on gen8 yet */
3330         ggtt->gmadr =
3331                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3332                                                  pci_resource_len(pdev, 2));
3333         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3334
3335         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3336         if (!err)
3337                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3338         if (err)
3339                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3340
3341         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3342         if (IS_CHERRYVIEW(dev_priv))
3343                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3344         else
3345                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3346
3347         ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
3348         ggtt->vm.cleanup = gen6_gmch_remove;
3349         ggtt->vm.insert_page = gen8_ggtt_insert_page;
3350         ggtt->vm.clear_range = nop_clear_range;
3351         if (intel_scanout_needs_vtd_wa(dev_priv))
3352                 ggtt->vm.clear_range = gen8_ggtt_clear_range;
3353
3354         ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
3355
3356         /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3357         if (intel_ggtt_update_needs_vtd_wa(dev_priv) ||
3358             IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) {
3359                 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3360                 ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3361                 if (ggtt->vm.clear_range != nop_clear_range)
3362                         ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3363
3364                 /* Prevent recursively calling stop_machine() and deadlocks. */
3365                 dev_info(dev_priv->drm.dev,
3366                          "Disabling error capture for VT-d workaround\n");
3367                 i915_disable_error_state(dev_priv, -ENODEV);
3368         }
3369
3370         ggtt->invalidate = gen6_ggtt_invalidate;
3371
3372         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3373         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3374         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3375         ggtt->vm.vma_ops.clear_pages = clear_pages;
3376
3377         ggtt->vm.pte_encode = gen8_pte_encode;
3378
3379         setup_private_pat(dev_priv);
3380
3381         return ggtt_probe_common(ggtt, size);
3382 }
3383
3384 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3385 {
3386         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3387         struct pci_dev *pdev = dev_priv->drm.pdev;
3388         unsigned int size;
3389         u16 snb_gmch_ctl;
3390         int err;
3391
3392         ggtt->gmadr =
3393                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3394                                                  pci_resource_len(pdev, 2));
3395         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3396
3397         /* 64/512MB is the current min/max we actually know of, but this is just
3398          * a coarse sanity check.
3399          */
3400         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3401                 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3402                 return -ENXIO;
3403         }
3404
3405         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3406         if (!err)
3407                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3408         if (err)
3409                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3410         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3411
3412         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3413         ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
3414
3415         ggtt->vm.clear_range = nop_clear_range;
3416         if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3417                 ggtt->vm.clear_range = gen6_ggtt_clear_range;
3418         ggtt->vm.insert_page = gen6_ggtt_insert_page;
3419         ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
3420         ggtt->vm.cleanup = gen6_gmch_remove;
3421
3422         ggtt->invalidate = gen6_ggtt_invalidate;
3423
3424         if (HAS_EDRAM(dev_priv))
3425                 ggtt->vm.pte_encode = iris_pte_encode;
3426         else if (IS_HASWELL(dev_priv))
3427                 ggtt->vm.pte_encode = hsw_pte_encode;
3428         else if (IS_VALLEYVIEW(dev_priv))
3429                 ggtt->vm.pte_encode = byt_pte_encode;
3430         else if (INTEL_GEN(dev_priv) >= 7)
3431                 ggtt->vm.pte_encode = ivb_pte_encode;
3432         else
3433                 ggtt->vm.pte_encode = snb_pte_encode;
3434
3435         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3436         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3437         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3438         ggtt->vm.vma_ops.clear_pages = clear_pages;
3439
3440         return ggtt_probe_common(ggtt, size);
3441 }
3442
3443 static void i915_gmch_remove(struct i915_address_space *vm)
3444 {
3445         intel_gmch_remove();
3446 }
3447
3448 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3449 {
3450         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3451         phys_addr_t gmadr_base;
3452         int ret;
3453
3454         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3455         if (!ret) {
3456                 DRM_ERROR("failed to set up gmch\n");
3457                 return -EIO;
3458         }
3459
3460         intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
3461
3462         ggtt->gmadr =
3463                 (struct resource) DEFINE_RES_MEM(gmadr_base,
3464                                                  ggtt->mappable_end);
3465
3466         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3467         ggtt->vm.insert_page = i915_ggtt_insert_page;
3468         ggtt->vm.insert_entries = i915_ggtt_insert_entries;
3469         ggtt->vm.clear_range = i915_ggtt_clear_range;
3470         ggtt->vm.cleanup = i915_gmch_remove;
3471
3472         ggtt->invalidate = gmch_ggtt_invalidate;
3473
3474         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3475         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3476         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3477         ggtt->vm.vma_ops.clear_pages = clear_pages;
3478
3479         if (unlikely(ggtt->do_idle_maps))
3480                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3481
3482         return 0;
3483 }
3484
3485 /**
3486  * i915_ggtt_probe_hw - Probe GGTT hardware location
3487  * @dev_priv: i915 device
3488  */
3489 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3490 {
3491         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3492         int ret;
3493
3494         ggtt->vm.i915 = dev_priv;
3495         ggtt->vm.dma = &dev_priv->drm.pdev->dev;
3496
3497         if (INTEL_GEN(dev_priv) <= 5)
3498                 ret = i915_gmch_probe(ggtt);
3499         else if (INTEL_GEN(dev_priv) < 8)
3500                 ret = gen6_gmch_probe(ggtt);
3501         else
3502                 ret = gen8_gmch_probe(ggtt);
3503         if (ret)
3504                 return ret;
3505
3506         if ((ggtt->vm.total - 1) >> 32) {
3507                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3508                           " of address space! Found %lldM!\n",
3509                           ggtt->vm.total >> 20);
3510                 ggtt->vm.total = 1ULL << 32;
3511                 ggtt->mappable_end =
3512                         min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3513         }
3514
3515         if (ggtt->mappable_end > ggtt->vm.total) {
3516                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3517                           " aperture=%pa, total=%llx\n",
3518                           &ggtt->mappable_end, ggtt->vm.total);
3519                 ggtt->mappable_end = ggtt->vm.total;
3520         }
3521
3522         /* GMADR is the PCI mmio aperture into the global GTT. */
3523         DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
3524         DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3525         DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3526                          (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3527         if (intel_vtd_active())
3528                 DRM_INFO("VT-d active for gfx access\n");
3529
3530         return 0;
3531 }
3532
3533 /**
3534  * i915_ggtt_init_hw - Initialize GGTT hardware
3535  * @dev_priv: i915 device
3536  */
3537 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3538 {
3539         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3540         int ret;
3541
3542         stash_init(&dev_priv->mm.wc_stash);
3543
3544         /* Note that we use page colouring to enforce a guard page at the
3545          * end of the address space. This is required as the CS may prefetch
3546          * beyond the end of the batch buffer, across the page boundary,
3547          * and beyond the end of the GTT if we do not provide a guard.
3548          */
3549         mutex_lock(&dev_priv->drm.struct_mutex);
3550         i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
3551
3552         ggtt->vm.is_ggtt = true;
3553
3554         /* Only VLV supports read-only GGTT mappings */
3555         ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
3556
3557         if (!HAS_LLC(dev_priv) && !HAS_PPGTT(dev_priv))
3558                 ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
3559         mutex_unlock(&dev_priv->drm.struct_mutex);
3560
3561         if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3562                                 dev_priv->ggtt.gmadr.start,
3563                                 dev_priv->ggtt.mappable_end)) {
3564                 ret = -EIO;
3565                 goto out_gtt_cleanup;
3566         }
3567
3568         ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3569
3570         i915_ggtt_init_fences(ggtt);
3571
3572         /*
3573          * Initialise stolen early so that we may reserve preallocated
3574          * objects for the BIOS to KMS transition.
3575          */
3576         ret = i915_gem_init_stolen(dev_priv);
3577         if (ret)
3578                 goto out_gtt_cleanup;
3579
3580         return 0;
3581
3582 out_gtt_cleanup:
3583         ggtt->vm.cleanup(&ggtt->vm);
3584         return ret;
3585 }
3586
3587 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3588 {
3589         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3590                 return -EIO;
3591
3592         return 0;
3593 }
3594
3595 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3596 {
3597         GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3598
3599         i915->ggtt.invalidate = guc_ggtt_invalidate;
3600
3601         i915_ggtt_invalidate(i915);
3602 }
3603
3604 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3605 {
3606         /* XXX Temporary pardon for error unload */
3607         if (i915->ggtt.invalidate == gen6_ggtt_invalidate)
3608                 return;
3609
3610         /* We should only be called after i915_ggtt_enable_guc() */
3611         GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3612
3613         i915->ggtt.invalidate = gen6_ggtt_invalidate;
3614
3615         i915_ggtt_invalidate(i915);
3616 }
3617
3618 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3619 {
3620         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3621         struct i915_vma *vma, *vn;
3622
3623         i915_check_and_clear_faults(dev_priv);
3624
3625         mutex_lock(&ggtt->vm.mutex);
3626
3627         /* First fill our portion of the GTT with scratch pages */
3628         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
3629         ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
3630
3631         /* clflush objects bound into the GGTT and rebind them. */
3632         list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
3633                 struct drm_i915_gem_object *obj = vma->obj;
3634
3635                 if (!(vma->flags & I915_VMA_GLOBAL_BIND))
3636                         continue;
3637
3638                 mutex_unlock(&ggtt->vm.mutex);
3639
3640                 if (!i915_vma_unbind(vma))
3641                         goto lock;
3642
3643                 WARN_ON(i915_vma_bind(vma,
3644                                       obj ? obj->cache_level : 0,
3645                                       PIN_UPDATE));
3646                 if (obj) {
3647                         i915_gem_object_lock(obj);
3648                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3649                         i915_gem_object_unlock(obj);
3650                 }
3651
3652 lock:
3653                 mutex_lock(&ggtt->vm.mutex);
3654         }
3655
3656         ggtt->vm.closed = false;
3657         i915_ggtt_invalidate(dev_priv);
3658
3659         mutex_unlock(&ggtt->vm.mutex);
3660
3661         if (INTEL_GEN(dev_priv) >= 8) {
3662                 struct intel_ppat *ppat = &dev_priv->ppat;
3663
3664                 bitmap_set(ppat->dirty, 0, ppat->max_entries);
3665                 dev_priv->ppat.update_hw(dev_priv);
3666                 return;
3667         }
3668 }
3669
3670 static struct scatterlist *
3671 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
3672              unsigned int width, unsigned int height,
3673              unsigned int stride,
3674              struct sg_table *st, struct scatterlist *sg)
3675 {
3676         unsigned int column, row;
3677         unsigned int src_idx;
3678
3679         for (column = 0; column < width; column++) {
3680                 src_idx = stride * (height - 1) + column + offset;
3681                 for (row = 0; row < height; row++) {
3682                         st->nents++;
3683                         /* We don't need the pages, but need to initialize
3684                          * the entries so the sg list can be happily traversed.
3685                          * The only thing we need are DMA addresses.
3686                          */
3687                         sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
3688                         sg_dma_address(sg) =
3689                                 i915_gem_object_get_dma_address(obj, src_idx);
3690                         sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
3691                         sg = sg_next(sg);
3692                         src_idx -= stride;
3693                 }
3694         }
3695
3696         return sg;
3697 }
3698
3699 static noinline struct sg_table *
3700 intel_rotate_pages(struct intel_rotation_info *rot_info,
3701                    struct drm_i915_gem_object *obj)
3702 {
3703         unsigned int size = intel_rotation_info_size(rot_info);
3704         struct sg_table *st;
3705         struct scatterlist *sg;
3706         int ret = -ENOMEM;
3707         int i;
3708
3709         /* Allocate target SG list. */
3710         st = kmalloc(sizeof(*st), GFP_KERNEL);
3711         if (!st)
3712                 goto err_st_alloc;
3713
3714         ret = sg_alloc_table(st, size, GFP_KERNEL);
3715         if (ret)
3716                 goto err_sg_alloc;
3717
3718         st->nents = 0;
3719         sg = st->sgl;
3720
3721         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3722                 sg = rotate_pages(obj, rot_info->plane[i].offset,
3723                                   rot_info->plane[i].width, rot_info->plane[i].height,
3724                                   rot_info->plane[i].stride, st, sg);
3725         }
3726
3727         return st;
3728
3729 err_sg_alloc:
3730         kfree(st);
3731 err_st_alloc:
3732
3733         DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3734                          obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3735
3736         return ERR_PTR(ret);
3737 }
3738
3739 static struct scatterlist *
3740 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
3741             unsigned int width, unsigned int height,
3742             unsigned int stride,
3743             struct sg_table *st, struct scatterlist *sg)
3744 {
3745         unsigned int row;
3746
3747         for (row = 0; row < height; row++) {
3748                 unsigned int left = width * I915_GTT_PAGE_SIZE;
3749
3750                 while (left) {
3751                         dma_addr_t addr;
3752                         unsigned int length;
3753
3754                         /* We don't need the pages, but need to initialize
3755                          * the entries so the sg list can be happily traversed.
3756                          * The only thing we need are DMA addresses.
3757                          */
3758
3759                         addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
3760
3761                         length = min(left, length);
3762
3763                         st->nents++;
3764
3765                         sg_set_page(sg, NULL, length, 0);
3766                         sg_dma_address(sg) = addr;
3767                         sg_dma_len(sg) = length;
3768                         sg = sg_next(sg);
3769
3770                         offset += length / I915_GTT_PAGE_SIZE;
3771                         left -= length;
3772                 }
3773
3774                 offset += stride - width;
3775         }
3776
3777         return sg;
3778 }
3779
3780 static noinline struct sg_table *
3781 intel_remap_pages(struct intel_remapped_info *rem_info,
3782                   struct drm_i915_gem_object *obj)
3783 {
3784         unsigned int size = intel_remapped_info_size(rem_info);
3785         struct sg_table *st;
3786         struct scatterlist *sg;
3787         int ret = -ENOMEM;
3788         int i;
3789
3790         /* Allocate target SG list. */
3791         st = kmalloc(sizeof(*st), GFP_KERNEL);
3792         if (!st)
3793                 goto err_st_alloc;
3794
3795         ret = sg_alloc_table(st, size, GFP_KERNEL);
3796         if (ret)
3797                 goto err_sg_alloc;
3798
3799         st->nents = 0;
3800         sg = st->sgl;
3801
3802         for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
3803                 sg = remap_pages(obj, rem_info->plane[i].offset,
3804                                  rem_info->plane[i].width, rem_info->plane[i].height,
3805                                  rem_info->plane[i].stride, st, sg);
3806         }
3807
3808         i915_sg_trim(st);
3809
3810         return st;
3811
3812 err_sg_alloc:
3813         kfree(st);
3814 err_st_alloc:
3815
3816         DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3817                          obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
3818
3819         return ERR_PTR(ret);
3820 }
3821
3822 static noinline struct sg_table *
3823 intel_partial_pages(const struct i915_ggtt_view *view,
3824                     struct drm_i915_gem_object *obj)
3825 {
3826         struct sg_table *st;
3827         struct scatterlist *sg, *iter;
3828         unsigned int count = view->partial.size;
3829         unsigned int offset;
3830         int ret = -ENOMEM;
3831
3832         st = kmalloc(sizeof(*st), GFP_KERNEL);
3833         if (!st)
3834                 goto err_st_alloc;
3835
3836         ret = sg_alloc_table(st, count, GFP_KERNEL);
3837         if (ret)
3838                 goto err_sg_alloc;
3839
3840         iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3841         GEM_BUG_ON(!iter);
3842
3843         sg = st->sgl;
3844         st->nents = 0;
3845         do {
3846                 unsigned int len;
3847
3848                 len = min(iter->length - (offset << PAGE_SHIFT),
3849                           count << PAGE_SHIFT);
3850                 sg_set_page(sg, NULL, len, 0);
3851                 sg_dma_address(sg) =
3852                         sg_dma_address(iter) + (offset << PAGE_SHIFT);
3853                 sg_dma_len(sg) = len;
3854
3855                 st->nents++;
3856                 count -= len >> PAGE_SHIFT;
3857                 if (count == 0) {
3858                         sg_mark_end(sg);
3859                         i915_sg_trim(st); /* Drop any unused tail entries. */
3860
3861                         return st;
3862                 }
3863
3864                 sg = __sg_next(sg);
3865                 iter = __sg_next(iter);
3866                 offset = 0;
3867         } while (1);
3868
3869 err_sg_alloc:
3870         kfree(st);
3871 err_st_alloc:
3872         return ERR_PTR(ret);
3873 }
3874
3875 static int
3876 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3877 {
3878         int ret;
3879
3880         /* The vma->pages are only valid within the lifespan of the borrowed
3881          * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3882          * must be the vma->pages. A simple rule is that vma->pages must only
3883          * be accessed when the obj->mm.pages are pinned.
3884          */
3885         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3886
3887         switch (vma->ggtt_view.type) {
3888         default:
3889                 GEM_BUG_ON(vma->ggtt_view.type);
3890                 /* fall through */
3891         case I915_GGTT_VIEW_NORMAL:
3892                 vma->pages = vma->obj->mm.pages;
3893                 return 0;
3894
3895         case I915_GGTT_VIEW_ROTATED:
3896                 vma->pages =
3897                         intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3898                 break;
3899
3900         case I915_GGTT_VIEW_REMAPPED:
3901                 vma->pages =
3902                         intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
3903                 break;
3904
3905         case I915_GGTT_VIEW_PARTIAL:
3906                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3907                 break;
3908         }
3909
3910         ret = 0;
3911         if (IS_ERR(vma->pages)) {
3912                 ret = PTR_ERR(vma->pages);
3913                 vma->pages = NULL;
3914                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3915                           vma->ggtt_view.type, ret);
3916         }
3917         return ret;
3918 }
3919
3920 /**
3921  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3922  * @vm: the &struct i915_address_space
3923  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3924  * @size: how much space to allocate inside the GTT,
3925  *        must be #I915_GTT_PAGE_SIZE aligned
3926  * @offset: where to insert inside the GTT,
3927  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3928  *          (@offset + @size) must fit within the address space
3929  * @color: color to apply to node, if this node is not from a VMA,
3930  *         color must be #I915_COLOR_UNEVICTABLE
3931  * @flags: control search and eviction behaviour
3932  *
3933  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3934  * the address space (using @size and @color). If the @node does not fit, it
3935  * tries to evict any overlapping nodes from the GTT, including any
3936  * neighbouring nodes if the colors do not match (to ensure guard pages between
3937  * differing domains). See i915_gem_evict_for_node() for the gory details
3938  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3939  * evicting active overlapping objects, and any overlapping node that is pinned
3940  * or marked as unevictable will also result in failure.
3941  *
3942  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3943  * asked to wait for eviction and interrupted.
3944  */
3945 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3946                          struct drm_mm_node *node,
3947                          u64 size, u64 offset, unsigned long color,
3948                          unsigned int flags)
3949 {
3950         int err;
3951
3952         GEM_BUG_ON(!size);
3953         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3954         GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3955         GEM_BUG_ON(range_overflows(offset, size, vm->total));
3956         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
3957         GEM_BUG_ON(drm_mm_node_allocated(node));
3958
3959         node->size = size;
3960         node->start = offset;
3961         node->color = color;
3962
3963         err = drm_mm_reserve_node(&vm->mm, node);
3964         if (err != -ENOSPC)
3965                 return err;
3966
3967         if (flags & PIN_NOEVICT)
3968                 return -ENOSPC;
3969
3970         err = i915_gem_evict_for_node(vm, node, flags);
3971         if (err == 0)
3972                 err = drm_mm_reserve_node(&vm->mm, node);
3973
3974         return err;
3975 }
3976
3977 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3978 {
3979         u64 range, addr;
3980
3981         GEM_BUG_ON(range_overflows(start, len, end));
3982         GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3983
3984         range = round_down(end - len, align) - round_up(start, align);
3985         if (range) {
3986                 if (sizeof(unsigned long) == sizeof(u64)) {
3987                         addr = get_random_long();
3988                 } else {
3989                         addr = get_random_int();
3990                         if (range > U32_MAX) {
3991                                 addr <<= 32;
3992                                 addr |= get_random_int();
3993                         }
3994                 }
3995                 div64_u64_rem(addr, range, &addr);
3996                 start += addr;
3997         }
3998
3999         return round_up(start, align);
4000 }
4001
4002 /**
4003  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
4004  * @vm: the &struct i915_address_space
4005  * @node: the &struct drm_mm_node (typically i915_vma.node)
4006  * @size: how much space to allocate inside the GTT,
4007  *        must be #I915_GTT_PAGE_SIZE aligned
4008  * @alignment: required alignment of starting offset, may be 0 but
4009  *             if specified, this must be a power-of-two and at least
4010  *             #I915_GTT_MIN_ALIGNMENT
4011  * @color: color to apply to node
4012  * @start: start of any range restriction inside GTT (0 for all),
4013  *         must be #I915_GTT_PAGE_SIZE aligned
4014  * @end: end of any range restriction inside GTT (U64_MAX for all),
4015  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
4016  * @flags: control search and eviction behaviour
4017  *
4018  * i915_gem_gtt_insert() first searches for an available hole into which
4019  * is can insert the node. The hole address is aligned to @alignment and
4020  * its @size must then fit entirely within the [@start, @end] bounds. The
4021  * nodes on either side of the hole must match @color, or else a guard page
4022  * will be inserted between the two nodes (or the node evicted). If no
4023  * suitable hole is found, first a victim is randomly selected and tested
4024  * for eviction, otherwise then the LRU list of objects within the GTT
4025  * is scanned to find the first set of replacement nodes to create the hole.
4026  * Those old overlapping nodes are evicted from the GTT (and so must be
4027  * rebound before any future use). Any node that is currently pinned cannot
4028  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
4029  * active and #PIN_NONBLOCK is specified, that node is also skipped when
4030  * searching for an eviction candidate. See i915_gem_evict_something() for
4031  * the gory details on the eviction algorithm.
4032  *
4033  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
4034  * asked to wait for eviction and interrupted.
4035  */
4036 int i915_gem_gtt_insert(struct i915_address_space *vm,
4037                         struct drm_mm_node *node,
4038                         u64 size, u64 alignment, unsigned long color,
4039                         u64 start, u64 end, unsigned int flags)
4040 {
4041         enum drm_mm_insert_mode mode;
4042         u64 offset;
4043         int err;
4044
4045         lockdep_assert_held(&vm->i915->drm.struct_mutex);
4046         GEM_BUG_ON(!size);
4047         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
4048         GEM_BUG_ON(alignment && !is_power_of_2(alignment));
4049         GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
4050         GEM_BUG_ON(start >= end);
4051         GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
4052         GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
4053         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
4054         GEM_BUG_ON(drm_mm_node_allocated(node));
4055
4056         if (unlikely(range_overflows(start, size, end)))
4057                 return -ENOSPC;
4058
4059         if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
4060                 return -ENOSPC;
4061
4062         mode = DRM_MM_INSERT_BEST;
4063         if (flags & PIN_HIGH)
4064                 mode = DRM_MM_INSERT_HIGHEST;
4065         if (flags & PIN_MAPPABLE)
4066                 mode = DRM_MM_INSERT_LOW;
4067
4068         /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
4069          * so we know that we always have a minimum alignment of 4096.
4070          * The drm_mm range manager is optimised to return results
4071          * with zero alignment, so where possible use the optimal
4072          * path.
4073          */
4074         BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
4075         if (alignment <= I915_GTT_MIN_ALIGNMENT)
4076                 alignment = 0;
4077
4078         err = drm_mm_insert_node_in_range(&vm->mm, node,
4079                                           size, alignment, color,
4080                                           start, end, mode);
4081         if (err != -ENOSPC)
4082                 return err;
4083
4084         if (mode & DRM_MM_INSERT_ONCE) {
4085                 err = drm_mm_insert_node_in_range(&vm->mm, node,
4086                                                   size, alignment, color,
4087                                                   start, end,
4088                                                   DRM_MM_INSERT_BEST);
4089                 if (err != -ENOSPC)
4090                         return err;
4091         }
4092
4093         if (flags & PIN_NOEVICT)
4094                 return -ENOSPC;
4095
4096         /* No free space, pick a slot at random.
4097          *
4098          * There is a pathological case here using a GTT shared between
4099          * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4100          *
4101          *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4102          *         (64k objects)             (448k objects)
4103          *
4104          * Now imagine that the eviction LRU is ordered top-down (just because
4105          * pathology meets real life), and that we need to evict an object to
4106          * make room inside the aperture. The eviction scan then has to walk
4107          * the 448k list before it finds one within range. And now imagine that
4108          * it has to search for a new hole between every byte inside the memcpy,
4109          * for several simultaneous clients.
4110          *
4111          * On a full-ppgtt system, if we have run out of available space, there
4112          * will be lots and lots of objects in the eviction list! Again,
4113          * searching that LRU list may be slow if we are also applying any
4114          * range restrictions (e.g. restriction to low 4GiB) and so, for
4115          * simplicity and similarilty between different GTT, try the single
4116          * random replacement first.
4117          */
4118         offset = random_offset(start, end,
4119                                size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4120         err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4121         if (err != -ENOSPC)
4122                 return err;
4123
4124         /* Randomly selected placement is pinned, do a search */
4125         err = i915_gem_evict_something(vm, size, alignment, color,
4126                                        start, end, flags);
4127         if (err)
4128                 return err;
4129
4130         return drm_mm_insert_node_in_range(&vm->mm, node,
4131                                            size, alignment, color,
4132                                            start, end, DRM_MM_INSERT_EVICT);
4133 }
4134
4135 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4136 #include "selftests/mock_gtt.c"
4137 #include "selftests/i915_gem_gtt.c"
4138 #endif