Merge tag 'mips_6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gem / i915_gem_domain.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6
7 #include "display/intel_display.h"
8 #include "gt/intel_gt.h"
9
10 #include "i915_drv.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_domain.h"
13 #include "i915_gem_gtt.h"
14 #include "i915_gem_ioctls.h"
15 #include "i915_gem_lmem.h"
16 #include "i915_gem_mman.h"
17 #include "i915_gem_object.h"
18 #include "i915_gem_object_frontbuffer.h"
19 #include "i915_vma.h"
20
21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
22
23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
24 {
25         struct drm_i915_private *i915 = to_i915(obj->base.dev);
26
27         if (IS_DGFX(i915))
28                 return false;
29
30         /*
31          * For objects created by userspace through GEM_CREATE with pat_index
32          * set by set_pat extension, i915_gem_object_has_cache_level() will
33          * always return true, because the coherency of such object is managed
34          * by userspace. Othereise the call here would fall back to checking
35          * whether the object is un-cached or write-through.
36          */
37         return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
38                  i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
39 }
40
41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
42 {
43         struct drm_i915_private *i915 = to_i915(obj->base.dev);
44
45         if (obj->cache_dirty)
46                 return false;
47
48         if (IS_DGFX(i915))
49                 return false;
50
51         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
52                 return true;
53
54         /* Currently in use by HW (display engine)? Keep flushed. */
55         return i915_gem_object_is_framebuffer(obj);
56 }
57
58 static void
59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
60 {
61         struct i915_vma *vma;
62
63         assert_object_held(obj);
64
65         if (!(obj->write_domain & flush_domains))
66                 return;
67
68         switch (obj->write_domain) {
69         case I915_GEM_DOMAIN_GTT:
70                 spin_lock(&obj->vma.lock);
71                 for_each_ggtt_vma(vma, obj)
72                         i915_vma_flush_writes(vma);
73                 spin_unlock(&obj->vma.lock);
74
75                 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
76                 break;
77
78         case I915_GEM_DOMAIN_WC:
79                 wmb();
80                 break;
81
82         case I915_GEM_DOMAIN_CPU:
83                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
84                 break;
85
86         case I915_GEM_DOMAIN_RENDER:
87                 if (gpu_write_needs_clflush(obj))
88                         obj->cache_dirty = true;
89                 break;
90         }
91
92         obj->write_domain = 0;
93 }
94
95 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
96 {
97         /*
98          * We manually flush the CPU domain so that we can override and
99          * force the flush for the display, and perform it asyncrhonously.
100          */
101         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
102         if (obj->cache_dirty)
103                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
104         obj->write_domain = 0;
105 }
106
107 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
108 {
109         if (!i915_gem_object_is_framebuffer(obj))
110                 return;
111
112         i915_gem_object_lock(obj, NULL);
113         __i915_gem_object_flush_for_display(obj);
114         i915_gem_object_unlock(obj);
115 }
116
117 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
118 {
119         if (i915_gem_object_is_framebuffer(obj))
120                 __i915_gem_object_flush_for_display(obj);
121 }
122
123 /**
124  * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
125  *                                    possibly write domain.
126  * @obj: object to act on
127  * @write: ask for write access or read only
128  *
129  * This function returns when the move is complete, including waiting on
130  * flushes to occur.
131  */
132 int
133 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
134 {
135         int ret;
136
137         assert_object_held(obj);
138
139         ret = i915_gem_object_wait(obj,
140                                    I915_WAIT_INTERRUPTIBLE |
141                                    (write ? I915_WAIT_ALL : 0),
142                                    MAX_SCHEDULE_TIMEOUT);
143         if (ret)
144                 return ret;
145
146         if (obj->write_domain == I915_GEM_DOMAIN_WC)
147                 return 0;
148
149         /* Flush and acquire obj->pages so that we are coherent through
150          * direct access in memory with previous cached writes through
151          * shmemfs and that our cache domain tracking remains valid.
152          * For example, if the obj->filp was moved to swap without us
153          * being notified and releasing the pages, we would mistakenly
154          * continue to assume that the obj remained out of the CPU cached
155          * domain.
156          */
157         ret = i915_gem_object_pin_pages(obj);
158         if (ret)
159                 return ret;
160
161         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
162
163         /* Serialise direct access to this object with the barriers for
164          * coherent writes from the GPU, by effectively invalidating the
165          * WC domain upon first access.
166          */
167         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
168                 mb();
169
170         /* It should now be out of any other write domains, and we can update
171          * the domain values for our changes.
172          */
173         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
174         obj->read_domains |= I915_GEM_DOMAIN_WC;
175         if (write) {
176                 obj->read_domains = I915_GEM_DOMAIN_WC;
177                 obj->write_domain = I915_GEM_DOMAIN_WC;
178                 obj->mm.dirty = true;
179         }
180
181         i915_gem_object_unpin_pages(obj);
182         return 0;
183 }
184
185 /**
186  * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
187  *                                     and possibly write domain.
188  * @obj: object to act on
189  * @write: ask for write access or read only
190  *
191  * This function returns when the move is complete, including waiting on
192  * flushes to occur.
193  */
194 int
195 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
196 {
197         int ret;
198
199         assert_object_held(obj);
200
201         ret = i915_gem_object_wait(obj,
202                                    I915_WAIT_INTERRUPTIBLE |
203                                    (write ? I915_WAIT_ALL : 0),
204                                    MAX_SCHEDULE_TIMEOUT);
205         if (ret)
206                 return ret;
207
208         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
209                 return 0;
210
211         /* Flush and acquire obj->pages so that we are coherent through
212          * direct access in memory with previous cached writes through
213          * shmemfs and that our cache domain tracking remains valid.
214          * For example, if the obj->filp was moved to swap without us
215          * being notified and releasing the pages, we would mistakenly
216          * continue to assume that the obj remained out of the CPU cached
217          * domain.
218          */
219         ret = i915_gem_object_pin_pages(obj);
220         if (ret)
221                 return ret;
222
223         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
224
225         /* Serialise direct access to this object with the barriers for
226          * coherent writes from the GPU, by effectively invalidating the
227          * GTT domain upon first access.
228          */
229         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
230                 mb();
231
232         /* It should now be out of any other write domains, and we can update
233          * the domain values for our changes.
234          */
235         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
236         obj->read_domains |= I915_GEM_DOMAIN_GTT;
237         if (write) {
238                 struct i915_vma *vma;
239
240                 obj->read_domains = I915_GEM_DOMAIN_GTT;
241                 obj->write_domain = I915_GEM_DOMAIN_GTT;
242                 obj->mm.dirty = true;
243
244                 spin_lock(&obj->vma.lock);
245                 for_each_ggtt_vma(vma, obj)
246                         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
247                                 i915_vma_set_ggtt_write(vma);
248                 spin_unlock(&obj->vma.lock);
249         }
250
251         i915_gem_object_unpin_pages(obj);
252         return 0;
253 }
254
255 /**
256  * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
257  * @obj: object to act on
258  * @cache_level: new cache level to set for the object
259  *
260  * After this function returns, the object will be in the new cache-level
261  * across all GTT and the contents of the backing storage will be coherent,
262  * with respect to the new cache-level. In order to keep the backing storage
263  * coherent for all users, we only allow a single cache level to be set
264  * globally on the object and prevent it from being changed whilst the
265  * hardware is reading from the object. That is if the object is currently
266  * on the scanout it will be set to uncached (or equivalent display
267  * cache coherency) and all non-MOCS GPU access will also be uncached so
268  * that all direct access to the scanout remains coherent.
269  */
270 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
271                                     enum i915_cache_level cache_level)
272 {
273         int ret;
274
275         /*
276          * For objects created by userspace through GEM_CREATE with pat_index
277          * set by set_pat extension, simply return 0 here without touching
278          * the cache setting, because such objects should have an immutable
279          * cache setting by desgin and always managed by userspace.
280          */
281         if (i915_gem_object_has_cache_level(obj, cache_level))
282                 return 0;
283
284         ret = i915_gem_object_wait(obj,
285                                    I915_WAIT_INTERRUPTIBLE |
286                                    I915_WAIT_ALL,
287                                    MAX_SCHEDULE_TIMEOUT);
288         if (ret)
289                 return ret;
290
291         /* Always invalidate stale cachelines */
292         i915_gem_object_set_cache_coherency(obj, cache_level);
293         obj->cache_dirty = true;
294
295         /* The cache-level will be applied when each vma is rebound. */
296         return i915_gem_object_unbind(obj,
297                                       I915_GEM_OBJECT_UNBIND_ACTIVE |
298                                       I915_GEM_OBJECT_UNBIND_BARRIER);
299 }
300
301 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
302                                struct drm_file *file)
303 {
304         struct drm_i915_gem_caching *args = data;
305         struct drm_i915_gem_object *obj;
306         int err = 0;
307
308         if (IS_DGFX(to_i915(dev)))
309                 return -ENODEV;
310
311         rcu_read_lock();
312         obj = i915_gem_object_lookup_rcu(file, args->handle);
313         if (!obj) {
314                 err = -ENOENT;
315                 goto out;
316         }
317
318         /*
319          * This ioctl should be disabled for the objects with pat_index
320          * set by user space.
321          */
322         if (obj->pat_set_by_user) {
323                 err = -EOPNOTSUPP;
324                 goto out;
325         }
326
327         if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
328             i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
329                 args->caching = I915_CACHING_CACHED;
330         else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
331                 args->caching = I915_CACHING_DISPLAY;
332         else
333                 args->caching = I915_CACHING_NONE;
334 out:
335         rcu_read_unlock();
336         return err;
337 }
338
339 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
340                                struct drm_file *file)
341 {
342         struct drm_i915_private *i915 = to_i915(dev);
343         struct drm_i915_gem_caching *args = data;
344         struct drm_i915_gem_object *obj;
345         enum i915_cache_level level;
346         int ret = 0;
347
348         if (IS_DGFX(i915))
349                 return -ENODEV;
350
351         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
352                 return -EOPNOTSUPP;
353
354         switch (args->caching) {
355         case I915_CACHING_NONE:
356                 level = I915_CACHE_NONE;
357                 break;
358         case I915_CACHING_CACHED:
359                 /*
360                  * Due to a HW issue on BXT A stepping, GPU stores via a
361                  * snooped mapping may leave stale data in a corresponding CPU
362                  * cacheline, whereas normally such cachelines would get
363                  * invalidated.
364                  */
365                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
366                         return -ENODEV;
367
368                 level = I915_CACHE_LLC;
369                 break;
370         case I915_CACHING_DISPLAY:
371                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
372                 break;
373         default:
374                 return -EINVAL;
375         }
376
377         obj = i915_gem_object_lookup(file, args->handle);
378         if (!obj)
379                 return -ENOENT;
380
381         /*
382          * This ioctl should be disabled for the objects with pat_index
383          * set by user space.
384          */
385         if (obj->pat_set_by_user) {
386                 ret = -EOPNOTSUPP;
387                 goto out;
388         }
389
390         /*
391          * The caching mode of proxy object is handled by its generator, and
392          * not allowed to be changed by userspace.
393          */
394         if (i915_gem_object_is_proxy(obj)) {
395                 /*
396                  * Silently allow cached for userptr; the vulkan driver
397                  * sets all objects to cached
398                  */
399                 if (!i915_gem_object_is_userptr(obj) ||
400                     args->caching != I915_CACHING_CACHED)
401                         ret = -ENXIO;
402
403                 goto out;
404         }
405
406         ret = i915_gem_object_lock_interruptible(obj, NULL);
407         if (ret)
408                 goto out;
409
410         ret = i915_gem_object_set_cache_level(obj, level);
411         i915_gem_object_unlock(obj);
412
413 out:
414         i915_gem_object_put(obj);
415         return ret;
416 }
417
418 /*
419  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
420  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
421  * (for pageflips). We only flush the caches while preparing the buffer for
422  * display, the callers are responsible for frontbuffer flush.
423  */
424 struct i915_vma *
425 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
426                                      struct i915_gem_ww_ctx *ww,
427                                      u32 alignment,
428                                      const struct i915_gtt_view *view,
429                                      unsigned int flags)
430 {
431         struct drm_i915_private *i915 = to_i915(obj->base.dev);
432         struct i915_vma *vma;
433         int ret;
434
435         /* Frame buffer must be in LMEM */
436         if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
437                 return ERR_PTR(-EINVAL);
438
439         /*
440          * The display engine is not coherent with the LLC cache on gen6.  As
441          * a result, we make sure that the pinning that is about to occur is
442          * done with uncached PTEs. This is lowest common denominator for all
443          * chipsets.
444          *
445          * However for gen6+, we could do better by using the GFDT bit instead
446          * of uncaching, which would allow us to flush all the LLC-cached data
447          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
448          */
449         ret = i915_gem_object_set_cache_level(obj,
450                                               HAS_WT(i915) ?
451                                               I915_CACHE_WT : I915_CACHE_NONE);
452         if (ret)
453                 return ERR_PTR(ret);
454
455         /* VT-d may overfetch before/after the vma, so pad with scratch */
456         if (intel_scanout_needs_vtd_wa(i915)) {
457                 unsigned int guard = VTD_GUARD;
458
459                 if (i915_gem_object_is_tiled(obj))
460                         guard = max(guard,
461                                     i915_gem_object_get_tile_row_size(obj));
462
463                 flags |= PIN_OFFSET_GUARD | guard;
464         }
465
466         /*
467          * As the user may map the buffer once pinned in the display plane
468          * (e.g. libkms for the bootup splash), we have to ensure that we
469          * always use map_and_fenceable for all scanout buffers. However,
470          * it may simply be too big to fit into mappable, in which case
471          * put it anyway and hope that userspace can cope (but always first
472          * try to preserve the existing ABI).
473          */
474         vma = ERR_PTR(-ENOSPC);
475         if ((flags & PIN_MAPPABLE) == 0 &&
476             (!view || view->type == I915_GTT_VIEW_NORMAL))
477                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
478                                                   flags | PIN_MAPPABLE |
479                                                   PIN_NONBLOCK);
480         if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
481                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
482                                                   alignment, flags);
483         if (IS_ERR(vma))
484                 return vma;
485
486         vma->display_alignment = max(vma->display_alignment, alignment);
487         i915_vma_mark_scanout(vma);
488
489         i915_gem_object_flush_if_display_locked(obj);
490
491         return vma;
492 }
493
494 /**
495  * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
496  *                                     and possibly write domain.
497  * @obj: object to act on
498  * @write: requesting write or read-only access
499  *
500  * This function returns when the move is complete, including waiting on
501  * flushes to occur.
502  */
503 int
504 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
505 {
506         int ret;
507
508         assert_object_held(obj);
509
510         ret = i915_gem_object_wait(obj,
511                                    I915_WAIT_INTERRUPTIBLE |
512                                    (write ? I915_WAIT_ALL : 0),
513                                    MAX_SCHEDULE_TIMEOUT);
514         if (ret)
515                 return ret;
516
517         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
518
519         /* Flush the CPU cache if it's still invalid. */
520         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
521                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
522                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
523         }
524
525         /* It should now be out of any other write domains, and we can update
526          * the domain values for our changes.
527          */
528         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
529
530         /* If we're writing through the CPU, then the GPU read domains will
531          * need to be invalidated at next use.
532          */
533         if (write)
534                 __start_cpu_write(obj);
535
536         return 0;
537 }
538
539 /**
540  * i915_gem_set_domain_ioctl - Called when user space prepares to use an
541  *                             object with the CPU, either
542  * through the mmap ioctl's mapping or a GTT mapping.
543  * @dev: drm device
544  * @data: ioctl data blob
545  * @file: drm file
546  */
547 int
548 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
549                           struct drm_file *file)
550 {
551         struct drm_i915_gem_set_domain *args = data;
552         struct drm_i915_gem_object *obj;
553         u32 read_domains = args->read_domains;
554         u32 write_domain = args->write_domain;
555         int err;
556
557         if (IS_DGFX(to_i915(dev)))
558                 return -ENODEV;
559
560         /* Only handle setting domains to types used by the CPU. */
561         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
562                 return -EINVAL;
563
564         /*
565          * Having something in the write domain implies it's in the read
566          * domain, and only that read domain.  Enforce that in the request.
567          */
568         if (write_domain && read_domains != write_domain)
569                 return -EINVAL;
570
571         if (!read_domains)
572                 return 0;
573
574         obj = i915_gem_object_lookup(file, args->handle);
575         if (!obj)
576                 return -ENOENT;
577
578         /*
579          * Try to flush the object off the GPU without holding the lock.
580          * We will repeat the flush holding the lock in the normal manner
581          * to catch cases where we are gazumped.
582          */
583         err = i915_gem_object_wait(obj,
584                                    I915_WAIT_INTERRUPTIBLE |
585                                    I915_WAIT_PRIORITY |
586                                    (write_domain ? I915_WAIT_ALL : 0),
587                                    MAX_SCHEDULE_TIMEOUT);
588         if (err)
589                 goto out;
590
591         if (i915_gem_object_is_userptr(obj)) {
592                 /*
593                  * Try to grab userptr pages, iris uses set_domain to check
594                  * userptr validity
595                  */
596                 err = i915_gem_object_userptr_validate(obj);
597                 if (!err)
598                         err = i915_gem_object_wait(obj,
599                                                    I915_WAIT_INTERRUPTIBLE |
600                                                    I915_WAIT_PRIORITY |
601                                                    (write_domain ? I915_WAIT_ALL : 0),
602                                                    MAX_SCHEDULE_TIMEOUT);
603                 goto out;
604         }
605
606         /*
607          * Proxy objects do not control access to the backing storage, ergo
608          * they cannot be used as a means to manipulate the cache domain
609          * tracking for that backing storage. The proxy object is always
610          * considered to be outside of any cache domain.
611          */
612         if (i915_gem_object_is_proxy(obj)) {
613                 err = -ENXIO;
614                 goto out;
615         }
616
617         err = i915_gem_object_lock_interruptible(obj, NULL);
618         if (err)
619                 goto out;
620
621         /*
622          * Flush and acquire obj->pages so that we are coherent through
623          * direct access in memory with previous cached writes through
624          * shmemfs and that our cache domain tracking remains valid.
625          * For example, if the obj->filp was moved to swap without us
626          * being notified and releasing the pages, we would mistakenly
627          * continue to assume that the obj remained out of the CPU cached
628          * domain.
629          */
630         err = i915_gem_object_pin_pages(obj);
631         if (err)
632                 goto out_unlock;
633
634         /*
635          * Already in the desired write domain? Nothing for us to do!
636          *
637          * We apply a little bit of cunning here to catch a broader set of
638          * no-ops. If obj->write_domain is set, we must be in the same
639          * obj->read_domains, and only that domain. Therefore, if that
640          * obj->write_domain matches the request read_domains, we are
641          * already in the same read/write domain and can skip the operation,
642          * without having to further check the requested write_domain.
643          */
644         if (READ_ONCE(obj->write_domain) == read_domains)
645                 goto out_unpin;
646
647         if (read_domains & I915_GEM_DOMAIN_WC)
648                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
649         else if (read_domains & I915_GEM_DOMAIN_GTT)
650                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
651         else
652                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
653
654 out_unpin:
655         i915_gem_object_unpin_pages(obj);
656
657 out_unlock:
658         i915_gem_object_unlock(obj);
659
660         if (!err && write_domain)
661                 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
662
663 out:
664         i915_gem_object_put(obj);
665         return err;
666 }
667
668 /*
669  * Pins the specified object's pages and synchronizes the object with
670  * GPU accesses. Sets needs_clflush to non-zero if the caller should
671  * flush the object from the CPU cache.
672  */
673 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
674                                  unsigned int *needs_clflush)
675 {
676         int ret;
677
678         *needs_clflush = 0;
679         if (!i915_gem_object_has_struct_page(obj))
680                 return -ENODEV;
681
682         assert_object_held(obj);
683
684         ret = i915_gem_object_wait(obj,
685                                    I915_WAIT_INTERRUPTIBLE,
686                                    MAX_SCHEDULE_TIMEOUT);
687         if (ret)
688                 return ret;
689
690         ret = i915_gem_object_pin_pages(obj);
691         if (ret)
692                 return ret;
693
694         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
695             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
696                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
697                 if (ret)
698                         goto err_unpin;
699                 else
700                         goto out;
701         }
702
703         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
704
705         /* If we're not in the cpu read domain, set ourself into the gtt
706          * read domain and manually flush cachelines (if required). This
707          * optimizes for the case when the gpu will dirty the data
708          * anyway again before the next pread happens.
709          */
710         if (!obj->cache_dirty &&
711             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
712                 *needs_clflush = CLFLUSH_BEFORE;
713
714 out:
715         /* return with the pages pinned */
716         return 0;
717
718 err_unpin:
719         i915_gem_object_unpin_pages(obj);
720         return ret;
721 }
722
723 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
724                                   unsigned int *needs_clflush)
725 {
726         int ret;
727
728         *needs_clflush = 0;
729         if (!i915_gem_object_has_struct_page(obj))
730                 return -ENODEV;
731
732         assert_object_held(obj);
733
734         ret = i915_gem_object_wait(obj,
735                                    I915_WAIT_INTERRUPTIBLE |
736                                    I915_WAIT_ALL,
737                                    MAX_SCHEDULE_TIMEOUT);
738         if (ret)
739                 return ret;
740
741         ret = i915_gem_object_pin_pages(obj);
742         if (ret)
743                 return ret;
744
745         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
746             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
747                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
748                 if (ret)
749                         goto err_unpin;
750                 else
751                         goto out;
752         }
753
754         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
755
756         /* If we're not in the cpu write domain, set ourself into the
757          * gtt write domain and manually flush cachelines (as required).
758          * This optimizes for the case when the gpu will use the data
759          * right away and we therefore have to clflush anyway.
760          */
761         if (!obj->cache_dirty) {
762                 *needs_clflush |= CLFLUSH_AFTER;
763
764                 /*
765                  * Same trick applies to invalidate partially written
766                  * cachelines read before writing.
767                  */
768                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
769                         *needs_clflush |= CLFLUSH_BEFORE;
770         }
771
772 out:
773         i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
774         obj->mm.dirty = true;
775         /* return with the pages pinned */
776         return 0;
777
778 err_unpin:
779         i915_gem_object_unpin_pages(obj);
780         return ret;
781 }