Merge tag 'nfs-for-5.17-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gt / gen6_ppgtt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/log2.h>
7
8 #include "gen6_ppgtt.h"
9 #include "i915_scatterlist.h"
10 #include "i915_trace.h"
11 #include "i915_vgpu.h"
12 #include "intel_gt.h"
13
14 /* Write pde (index) from the page directory @pd to the page table @pt */
15 static void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
16                            const unsigned int pde,
17                            const struct i915_page_table *pt)
18 {
19         dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
20
21         /* Caller needs to make sure the write completes if necessary */
22         iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
23                   ppgtt->pd_addr + pde);
24 }
25
26 void gen7_ppgtt_enable(struct intel_gt *gt)
27 {
28         struct drm_i915_private *i915 = gt->i915;
29         struct intel_uncore *uncore = gt->uncore;
30         u32 ecochk;
31
32         intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
33
34         ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
35         if (IS_HASWELL(i915)) {
36                 ecochk |= ECOCHK_PPGTT_WB_HSW;
37         } else {
38                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
39                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
40         }
41         intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
42 }
43
44 void gen6_ppgtt_enable(struct intel_gt *gt)
45 {
46         struct intel_uncore *uncore = gt->uncore;
47
48         intel_uncore_rmw(uncore,
49                          GAC_ECO_BITS,
50                          0,
51                          ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
52
53         intel_uncore_rmw(uncore,
54                          GAB_CTL,
55                          0,
56                          GAB_CTL_CONT_AFTER_PAGEFAULT);
57
58         intel_uncore_rmw(uncore,
59                          GAM_ECOCHK,
60                          0,
61                          ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
62
63         if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
64                 intel_uncore_write(uncore,
65                                    GFX_MODE,
66                                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
67 }
68
69 /* PPGTT support for Sandybdrige/Gen6 and later */
70 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
71                                    u64 start, u64 length)
72 {
73         struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
74         const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
75         const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
76         unsigned int pde = first_entry / GEN6_PTES;
77         unsigned int pte = first_entry % GEN6_PTES;
78         unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
79
80         while (num_entries) {
81                 struct i915_page_table * const pt =
82                         i915_pt_entry(ppgtt->base.pd, pde++);
83                 const unsigned int count = min(num_entries, GEN6_PTES - pte);
84                 gen6_pte_t *vaddr;
85
86                 num_entries -= count;
87
88                 GEM_BUG_ON(count > atomic_read(&pt->used));
89                 if (!atomic_sub_return(count, &pt->used))
90                         ppgtt->scan_for_unused_pt = true;
91
92                 /*
93                  * Note that the hw doesn't support removing PDE on the fly
94                  * (they are cached inside the context with no means to
95                  * invalidate the cache), so we can only reset the PTE
96                  * entries back to scratch.
97                  */
98
99                 vaddr = px_vaddr(pt);
100                 memset32(vaddr + pte, scratch_pte, count);
101
102                 pte = 0;
103         }
104 }
105
106 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
107                                       struct i915_vma *vma,
108                                       enum i915_cache_level cache_level,
109                                       u32 flags)
110 {
111         struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
112         struct i915_page_directory * const pd = ppgtt->pd;
113         unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
114         unsigned int act_pt = first_entry / GEN6_PTES;
115         unsigned int act_pte = first_entry % GEN6_PTES;
116         const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
117         struct sgt_dma iter = sgt_dma(vma);
118         gen6_pte_t *vaddr;
119
120         GEM_BUG_ON(!pd->entry[act_pt]);
121
122         vaddr = px_vaddr(i915_pt_entry(pd, act_pt));
123         do {
124                 GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
125                 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
126
127                 iter.dma += I915_GTT_PAGE_SIZE;
128                 if (iter.dma == iter.max) {
129                         iter.sg = __sg_next(iter.sg);
130                         if (!iter.sg || sg_dma_len(iter.sg) == 0)
131                                 break;
132
133                         iter.dma = sg_dma_address(iter.sg);
134                         iter.max = iter.dma + sg_dma_len(iter.sg);
135                 }
136
137                 if (++act_pte == GEN6_PTES) {
138                         vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt));
139                         act_pte = 0;
140                 }
141         } while (1);
142
143         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
144 }
145
146 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
147 {
148         struct i915_page_directory * const pd = ppgtt->base.pd;
149         struct i915_page_table *pt;
150         unsigned int pde;
151
152         start = round_down(start, SZ_64K);
153         end = round_up(end, SZ_64K) - start;
154
155         mutex_lock(&ppgtt->flush);
156
157         gen6_for_each_pde(pt, pd, start, end, pde)
158                 gen6_write_pde(ppgtt, pde, pt);
159
160         mb();
161         ioread32(ppgtt->pd_addr + pde - 1);
162         gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
163         mb();
164
165         mutex_unlock(&ppgtt->flush);
166 }
167
168 static void gen6_alloc_va_range(struct i915_address_space *vm,
169                                 struct i915_vm_pt_stash *stash,
170                                 u64 start, u64 length)
171 {
172         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
173         struct i915_page_directory * const pd = ppgtt->base.pd;
174         struct i915_page_table *pt;
175         bool flush = false;
176         u64 from = start;
177         unsigned int pde;
178
179         spin_lock(&pd->lock);
180         gen6_for_each_pde(pt, pd, start, length, pde) {
181                 const unsigned int count = gen6_pte_count(start, length);
182
183                 if (!pt) {
184                         spin_unlock(&pd->lock);
185
186                         pt = stash->pt[0];
187                         __i915_gem_object_pin_pages(pt->base);
188
189                         fill32_px(pt, vm->scratch[0]->encode);
190
191                         spin_lock(&pd->lock);
192                         if (!pd->entry[pde]) {
193                                 stash->pt[0] = pt->stash;
194                                 atomic_set(&pt->used, 0);
195                                 pd->entry[pde] = pt;
196                         } else {
197                                 pt = pd->entry[pde];
198                         }
199
200                         flush = true;
201                 }
202
203                 atomic_add(count, &pt->used);
204         }
205         spin_unlock(&pd->lock);
206
207         if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
208                 intel_wakeref_t wakeref;
209
210                 with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
211                         gen6_flush_pd(ppgtt, from, start);
212         }
213 }
214
215 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
216 {
217         struct i915_address_space * const vm = &ppgtt->base.vm;
218         int ret;
219
220         ret = setup_scratch_page(vm);
221         if (ret)
222                 return ret;
223
224         vm->scratch[0]->encode =
225                 vm->pte_encode(px_dma(vm->scratch[0]),
226                                I915_CACHE_NONE, PTE_READ_ONLY);
227
228         vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
229         if (IS_ERR(vm->scratch[1])) {
230                 ret = PTR_ERR(vm->scratch[1]);
231                 goto err_scratch0;
232         }
233
234         ret = map_pt_dma(vm, vm->scratch[1]);
235         if (ret)
236                 goto err_scratch1;
237
238         fill32_px(vm->scratch[1], vm->scratch[0]->encode);
239
240         return 0;
241
242 err_scratch1:
243         i915_gem_object_put(vm->scratch[1]);
244 err_scratch0:
245         i915_gem_object_put(vm->scratch[0]);
246         return ret;
247 }
248
249 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
250 {
251         struct i915_page_directory * const pd = ppgtt->base.pd;
252         struct i915_page_table *pt;
253         u32 pde;
254
255         gen6_for_all_pdes(pt, pd, pde)
256                 if (pt)
257                         free_pt(&ppgtt->base.vm, pt);
258 }
259
260 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
261 {
262         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
263
264         gen6_ppgtt_free_pd(ppgtt);
265         free_scratch(vm);
266
267         mutex_destroy(&ppgtt->flush);
268
269         free_pd(&ppgtt->base.vm, ppgtt->base.pd);
270 }
271
272 static void pd_vma_bind(struct i915_address_space *vm,
273                         struct i915_vm_pt_stash *stash,
274                         struct i915_vma *vma,
275                         enum i915_cache_level cache_level,
276                         u32 unused)
277 {
278         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
279         struct gen6_ppgtt *ppgtt = vma->private;
280         u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
281
282         ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
283         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
284
285         gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
286 }
287
288 static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
289 {
290         struct gen6_ppgtt *ppgtt = vma->private;
291         struct i915_page_directory * const pd = ppgtt->base.pd;
292         struct i915_page_table *pt;
293         unsigned int pde;
294
295         if (!ppgtt->scan_for_unused_pt)
296                 return;
297
298         /* Free all no longer used page tables */
299         gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
300                 if (!pt || atomic_read(&pt->used))
301                         continue;
302
303                 free_pt(&ppgtt->base.vm, pt);
304                 pd->entry[pde] = NULL;
305         }
306
307         ppgtt->scan_for_unused_pt = false;
308 }
309
310 static const struct i915_vma_ops pd_vma_ops = {
311         .bind_vma = pd_vma_bind,
312         .unbind_vma = pd_vma_unbind,
313 };
314
315 int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
316 {
317         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
318         int err;
319
320         GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
321
322         /*
323          * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
324          * which will be pinned into every active context.
325          * (When vma->pin_count becomes atomic, I expect we will naturally
326          * need a larger, unpacked, type and kill this redundancy.)
327          */
328         if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
329                 return 0;
330
331         /* grab the ppgtt resv to pin the object */
332         err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
333         if (err)
334                 return err;
335
336         /*
337          * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
338          * allocator works in address space sizes, so it's multiplied by page
339          * size. We allocate at the top of the GTT to avoid fragmentation.
340          */
341         if (!atomic_read(&ppgtt->pin_count)) {
342                 err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
343
344                 GEM_BUG_ON(ppgtt->vma->fence);
345                 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
346         }
347         if (!err)
348                 atomic_inc(&ppgtt->pin_count);
349
350         return err;
351 }
352
353 static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
354 {
355         obj->mm.pages = ZERO_SIZE_PTR;
356         return 0;
357 }
358
359 static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
360                                    struct sg_table *pages)
361 {
362 }
363
364 static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
365         .name = "pd_dummy_obj",
366         .get_pages = pd_dummy_obj_get_pages,
367         .put_pages = pd_dummy_obj_put_pages,
368 };
369
370 static struct i915_page_directory *
371 gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
372 {
373         struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
374         struct i915_page_directory *pd;
375         int err;
376
377         pd = __alloc_pd(I915_PDES);
378         if (unlikely(!pd))
379                 return ERR_PTR(-ENOMEM);
380
381         pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
382                                                         &pd_dummy_obj_ops,
383                                                         I915_PDES * SZ_4K);
384         if (IS_ERR(pd->pt.base)) {
385                 err = PTR_ERR(pd->pt.base);
386                 pd->pt.base = NULL;
387                 goto err_pd;
388         }
389
390         pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
391         pd->pt.base->shares_resv_from = &ppgtt->base.vm;
392
393         ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
394         if (IS_ERR(ppgtt->vma)) {
395                 err = PTR_ERR(ppgtt->vma);
396                 ppgtt->vma = NULL;
397                 goto err_pd;
398         }
399
400         /* The dummy object we create is special, override ops.. */
401         ppgtt->vma->ops = &pd_vma_ops;
402         ppgtt->vma->private = ppgtt;
403         return pd;
404
405 err_pd:
406         free_pd(&ppgtt->base.vm, pd);
407         return ERR_PTR(err);
408 }
409
410 void gen6_ppgtt_unpin(struct i915_ppgtt *base)
411 {
412         struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
413
414         GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
415         if (atomic_dec_and_test(&ppgtt->pin_count))
416                 i915_vma_unpin(ppgtt->vma);
417 }
418
419 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
420 {
421         struct i915_ggtt * const ggtt = gt->ggtt;
422         struct gen6_ppgtt *ppgtt;
423         int err;
424
425         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
426         if (!ppgtt)
427                 return ERR_PTR(-ENOMEM);
428
429         mutex_init(&ppgtt->flush);
430
431         ppgtt_init(&ppgtt->base, gt, 0);
432         ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
433         ppgtt->base.vm.top = 1;
434
435         ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
436         ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
437         ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
438         ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
439         ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
440
441         ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
442         ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
443         ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
444
445         err = gen6_ppgtt_init_scratch(ppgtt);
446         if (err)
447                 goto err_free;
448
449         ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
450         if (IS_ERR(ppgtt->base.pd)) {
451                 err = PTR_ERR(ppgtt->base.pd);
452                 goto err_scratch;
453         }
454
455         return &ppgtt->base;
456
457 err_scratch:
458         free_scratch(&ppgtt->base.vm);
459 err_free:
460         kfree(ppgtt);
461         return ERR_PTR(err);
462 }