Linux 6.0-rc1
[linux-2.6-microblaze.git] / drivers / gpu / drm / i915 / gem / i915_gem_domain.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6
7 #include "display/intel_frontbuffer.h"
8 #include "gt/intel_gt.h"
9
10 #include "i915_drv.h"
11 #include "i915_gem_clflush.h"
12 #include "i915_gem_domain.h"
13 #include "i915_gem_gtt.h"
14 #include "i915_gem_ioctls.h"
15 #include "i915_gem_lmem.h"
16 #include "i915_gem_mman.h"
17 #include "i915_gem_object.h"
18 #include "i915_vma.h"
19
20 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
21 {
22         struct drm_i915_private *i915 = to_i915(obj->base.dev);
23
24         if (IS_DGFX(i915))
25                 return false;
26
27         return !(obj->cache_level == I915_CACHE_NONE ||
28                  obj->cache_level == I915_CACHE_WT);
29 }
30
31 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
32 {
33         struct drm_i915_private *i915 = to_i915(obj->base.dev);
34
35         if (obj->cache_dirty)
36                 return false;
37
38         if (IS_DGFX(i915))
39                 return false;
40
41         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
42                 return true;
43
44         /* Currently in use by HW (display engine)? Keep flushed. */
45         return i915_gem_object_is_framebuffer(obj);
46 }
47
48 static void
49 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
50 {
51         struct i915_vma *vma;
52
53         assert_object_held(obj);
54
55         if (!(obj->write_domain & flush_domains))
56                 return;
57
58         switch (obj->write_domain) {
59         case I915_GEM_DOMAIN_GTT:
60                 spin_lock(&obj->vma.lock);
61                 for_each_ggtt_vma(vma, obj) {
62                         if (i915_vma_unset_ggtt_write(vma))
63                                 intel_gt_flush_ggtt_writes(vma->vm->gt);
64                 }
65                 spin_unlock(&obj->vma.lock);
66
67                 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
68                 break;
69
70         case I915_GEM_DOMAIN_WC:
71                 wmb();
72                 break;
73
74         case I915_GEM_DOMAIN_CPU:
75                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
76                 break;
77
78         case I915_GEM_DOMAIN_RENDER:
79                 if (gpu_write_needs_clflush(obj))
80                         obj->cache_dirty = true;
81                 break;
82         }
83
84         obj->write_domain = 0;
85 }
86
87 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
88 {
89         /*
90          * We manually flush the CPU domain so that we can override and
91          * force the flush for the display, and perform it asyncrhonously.
92          */
93         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
94         if (obj->cache_dirty)
95                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
96         obj->write_domain = 0;
97 }
98
99 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
100 {
101         if (!i915_gem_object_is_framebuffer(obj))
102                 return;
103
104         i915_gem_object_lock(obj, NULL);
105         __i915_gem_object_flush_for_display(obj);
106         i915_gem_object_unlock(obj);
107 }
108
109 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
110 {
111         if (i915_gem_object_is_framebuffer(obj))
112                 __i915_gem_object_flush_for_display(obj);
113 }
114
115 /**
116  * Moves a single object to the WC read, and possibly write domain.
117  * @obj: object to act on
118  * @write: ask for write access or read only
119  *
120  * This function returns when the move is complete, including waiting on
121  * flushes to occur.
122  */
123 int
124 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
125 {
126         int ret;
127
128         assert_object_held(obj);
129
130         ret = i915_gem_object_wait(obj,
131                                    I915_WAIT_INTERRUPTIBLE |
132                                    (write ? I915_WAIT_ALL : 0),
133                                    MAX_SCHEDULE_TIMEOUT);
134         if (ret)
135                 return ret;
136
137         if (obj->write_domain == I915_GEM_DOMAIN_WC)
138                 return 0;
139
140         /* Flush and acquire obj->pages so that we are coherent through
141          * direct access in memory with previous cached writes through
142          * shmemfs and that our cache domain tracking remains valid.
143          * For example, if the obj->filp was moved to swap without us
144          * being notified and releasing the pages, we would mistakenly
145          * continue to assume that the obj remained out of the CPU cached
146          * domain.
147          */
148         ret = i915_gem_object_pin_pages(obj);
149         if (ret)
150                 return ret;
151
152         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
153
154         /* Serialise direct access to this object with the barriers for
155          * coherent writes from the GPU, by effectively invalidating the
156          * WC domain upon first access.
157          */
158         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
159                 mb();
160
161         /* It should now be out of any other write domains, and we can update
162          * the domain values for our changes.
163          */
164         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
165         obj->read_domains |= I915_GEM_DOMAIN_WC;
166         if (write) {
167                 obj->read_domains = I915_GEM_DOMAIN_WC;
168                 obj->write_domain = I915_GEM_DOMAIN_WC;
169                 obj->mm.dirty = true;
170         }
171
172         i915_gem_object_unpin_pages(obj);
173         return 0;
174 }
175
176 /**
177  * Moves a single object to the GTT read, and possibly write domain.
178  * @obj: object to act on
179  * @write: ask for write access or read only
180  *
181  * This function returns when the move is complete, including waiting on
182  * flushes to occur.
183  */
184 int
185 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
186 {
187         int ret;
188
189         assert_object_held(obj);
190
191         ret = i915_gem_object_wait(obj,
192                                    I915_WAIT_INTERRUPTIBLE |
193                                    (write ? I915_WAIT_ALL : 0),
194                                    MAX_SCHEDULE_TIMEOUT);
195         if (ret)
196                 return ret;
197
198         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
199                 return 0;
200
201         /* Flush and acquire obj->pages so that we are coherent through
202          * direct access in memory with previous cached writes through
203          * shmemfs and that our cache domain tracking remains valid.
204          * For example, if the obj->filp was moved to swap without us
205          * being notified and releasing the pages, we would mistakenly
206          * continue to assume that the obj remained out of the CPU cached
207          * domain.
208          */
209         ret = i915_gem_object_pin_pages(obj);
210         if (ret)
211                 return ret;
212
213         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
214
215         /* Serialise direct access to this object with the barriers for
216          * coherent writes from the GPU, by effectively invalidating the
217          * GTT domain upon first access.
218          */
219         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
220                 mb();
221
222         /* It should now be out of any other write domains, and we can update
223          * the domain values for our changes.
224          */
225         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
226         obj->read_domains |= I915_GEM_DOMAIN_GTT;
227         if (write) {
228                 struct i915_vma *vma;
229
230                 obj->read_domains = I915_GEM_DOMAIN_GTT;
231                 obj->write_domain = I915_GEM_DOMAIN_GTT;
232                 obj->mm.dirty = true;
233
234                 spin_lock(&obj->vma.lock);
235                 for_each_ggtt_vma(vma, obj)
236                         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
237                                 i915_vma_set_ggtt_write(vma);
238                 spin_unlock(&obj->vma.lock);
239         }
240
241         i915_gem_object_unpin_pages(obj);
242         return 0;
243 }
244
245 /**
246  * Changes the cache-level of an object across all VMA.
247  * @obj: object to act on
248  * @cache_level: new cache level to set for the object
249  *
250  * After this function returns, the object will be in the new cache-level
251  * across all GTT and the contents of the backing storage will be coherent,
252  * with respect to the new cache-level. In order to keep the backing storage
253  * coherent for all users, we only allow a single cache level to be set
254  * globally on the object and prevent it from being changed whilst the
255  * hardware is reading from the object. That is if the object is currently
256  * on the scanout it will be set to uncached (or equivalent display
257  * cache coherency) and all non-MOCS GPU access will also be uncached so
258  * that all direct access to the scanout remains coherent.
259  */
260 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
261                                     enum i915_cache_level cache_level)
262 {
263         int ret;
264
265         if (obj->cache_level == cache_level)
266                 return 0;
267
268         ret = i915_gem_object_wait(obj,
269                                    I915_WAIT_INTERRUPTIBLE |
270                                    I915_WAIT_ALL,
271                                    MAX_SCHEDULE_TIMEOUT);
272         if (ret)
273                 return ret;
274
275         /* Always invalidate stale cachelines */
276         if (obj->cache_level != cache_level) {
277                 i915_gem_object_set_cache_coherency(obj, cache_level);
278                 obj->cache_dirty = true;
279         }
280
281         /* The cache-level will be applied when each vma is rebound. */
282         return i915_gem_object_unbind(obj,
283                                       I915_GEM_OBJECT_UNBIND_ACTIVE |
284                                       I915_GEM_OBJECT_UNBIND_BARRIER);
285 }
286
287 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
288                                struct drm_file *file)
289 {
290         struct drm_i915_gem_caching *args = data;
291         struct drm_i915_gem_object *obj;
292         int err = 0;
293
294         if (IS_DGFX(to_i915(dev)))
295                 return -ENODEV;
296
297         rcu_read_lock();
298         obj = i915_gem_object_lookup_rcu(file, args->handle);
299         if (!obj) {
300                 err = -ENOENT;
301                 goto out;
302         }
303
304         switch (obj->cache_level) {
305         case I915_CACHE_LLC:
306         case I915_CACHE_L3_LLC:
307                 args->caching = I915_CACHING_CACHED;
308                 break;
309
310         case I915_CACHE_WT:
311                 args->caching = I915_CACHING_DISPLAY;
312                 break;
313
314         default:
315                 args->caching = I915_CACHING_NONE;
316                 break;
317         }
318 out:
319         rcu_read_unlock();
320         return err;
321 }
322
323 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
324                                struct drm_file *file)
325 {
326         struct drm_i915_private *i915 = to_i915(dev);
327         struct drm_i915_gem_caching *args = data;
328         struct drm_i915_gem_object *obj;
329         enum i915_cache_level level;
330         int ret = 0;
331
332         if (IS_DGFX(i915))
333                 return -ENODEV;
334
335         switch (args->caching) {
336         case I915_CACHING_NONE:
337                 level = I915_CACHE_NONE;
338                 break;
339         case I915_CACHING_CACHED:
340                 /*
341                  * Due to a HW issue on BXT A stepping, GPU stores via a
342                  * snooped mapping may leave stale data in a corresponding CPU
343                  * cacheline, whereas normally such cachelines would get
344                  * invalidated.
345                  */
346                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
347                         return -ENODEV;
348
349                 level = I915_CACHE_LLC;
350                 break;
351         case I915_CACHING_DISPLAY:
352                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
353                 break;
354         default:
355                 return -EINVAL;
356         }
357
358         obj = i915_gem_object_lookup(file, args->handle);
359         if (!obj)
360                 return -ENOENT;
361
362         /*
363          * The caching mode of proxy object is handled by its generator, and
364          * not allowed to be changed by userspace.
365          */
366         if (i915_gem_object_is_proxy(obj)) {
367                 /*
368                  * Silently allow cached for userptr; the vulkan driver
369                  * sets all objects to cached
370                  */
371                 if (!i915_gem_object_is_userptr(obj) ||
372                     args->caching != I915_CACHING_CACHED)
373                         ret = -ENXIO;
374
375                 goto out;
376         }
377
378         ret = i915_gem_object_lock_interruptible(obj, NULL);
379         if (ret)
380                 goto out;
381
382         ret = i915_gem_object_set_cache_level(obj, level);
383         i915_gem_object_unlock(obj);
384
385 out:
386         i915_gem_object_put(obj);
387         return ret;
388 }
389
390 /*
391  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
392  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
393  * (for pageflips). We only flush the caches while preparing the buffer for
394  * display, the callers are responsible for frontbuffer flush.
395  */
396 struct i915_vma *
397 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
398                                      struct i915_gem_ww_ctx *ww,
399                                      u32 alignment,
400                                      const struct i915_ggtt_view *view,
401                                      unsigned int flags)
402 {
403         struct drm_i915_private *i915 = to_i915(obj->base.dev);
404         struct i915_vma *vma;
405         int ret;
406
407         /* Frame buffer must be in LMEM */
408         if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
409                 return ERR_PTR(-EINVAL);
410
411         /*
412          * The display engine is not coherent with the LLC cache on gen6.  As
413          * a result, we make sure that the pinning that is about to occur is
414          * done with uncached PTEs. This is lowest common denominator for all
415          * chipsets.
416          *
417          * However for gen6+, we could do better by using the GFDT bit instead
418          * of uncaching, which would allow us to flush all the LLC-cached data
419          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
420          */
421         ret = i915_gem_object_set_cache_level(obj,
422                                               HAS_WT(i915) ?
423                                               I915_CACHE_WT : I915_CACHE_NONE);
424         if (ret)
425                 return ERR_PTR(ret);
426
427         /*
428          * As the user may map the buffer once pinned in the display plane
429          * (e.g. libkms for the bootup splash), we have to ensure that we
430          * always use map_and_fenceable for all scanout buffers. However,
431          * it may simply be too big to fit into mappable, in which case
432          * put it anyway and hope that userspace can cope (but always first
433          * try to preserve the existing ABI).
434          */
435         vma = ERR_PTR(-ENOSPC);
436         if ((flags & PIN_MAPPABLE) == 0 &&
437             (!view || view->type == I915_GGTT_VIEW_NORMAL))
438                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
439                                                   flags | PIN_MAPPABLE |
440                                                   PIN_NONBLOCK);
441         if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
442                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
443                                                   alignment, flags);
444         if (IS_ERR(vma))
445                 return vma;
446
447         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
448         i915_vma_mark_scanout(vma);
449
450         i915_gem_object_flush_if_display_locked(obj);
451
452         return vma;
453 }
454
455 /**
456  * Moves a single object to the CPU read, and possibly write domain.
457  * @obj: object to act on
458  * @write: requesting write or read-only access
459  *
460  * This function returns when the move is complete, including waiting on
461  * flushes to occur.
462  */
463 int
464 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
465 {
466         int ret;
467
468         assert_object_held(obj);
469
470         ret = i915_gem_object_wait(obj,
471                                    I915_WAIT_INTERRUPTIBLE |
472                                    (write ? I915_WAIT_ALL : 0),
473                                    MAX_SCHEDULE_TIMEOUT);
474         if (ret)
475                 return ret;
476
477         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
478
479         /* Flush the CPU cache if it's still invalid. */
480         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
481                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
482                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
483         }
484
485         /* It should now be out of any other write domains, and we can update
486          * the domain values for our changes.
487          */
488         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
489
490         /* If we're writing through the CPU, then the GPU read domains will
491          * need to be invalidated at next use.
492          */
493         if (write)
494                 __start_cpu_write(obj);
495
496         return 0;
497 }
498
499 /**
500  * Called when user space prepares to use an object with the CPU, either
501  * through the mmap ioctl's mapping or a GTT mapping.
502  * @dev: drm device
503  * @data: ioctl data blob
504  * @file: drm file
505  */
506 int
507 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
508                           struct drm_file *file)
509 {
510         struct drm_i915_gem_set_domain *args = data;
511         struct drm_i915_gem_object *obj;
512         u32 read_domains = args->read_domains;
513         u32 write_domain = args->write_domain;
514         int err;
515
516         if (IS_DGFX(to_i915(dev)))
517                 return -ENODEV;
518
519         /* Only handle setting domains to types used by the CPU. */
520         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
521                 return -EINVAL;
522
523         /*
524          * Having something in the write domain implies it's in the read
525          * domain, and only that read domain.  Enforce that in the request.
526          */
527         if (write_domain && read_domains != write_domain)
528                 return -EINVAL;
529
530         if (!read_domains)
531                 return 0;
532
533         obj = i915_gem_object_lookup(file, args->handle);
534         if (!obj)
535                 return -ENOENT;
536
537         /*
538          * Try to flush the object off the GPU without holding the lock.
539          * We will repeat the flush holding the lock in the normal manner
540          * to catch cases where we are gazumped.
541          */
542         err = i915_gem_object_wait(obj,
543                                    I915_WAIT_INTERRUPTIBLE |
544                                    I915_WAIT_PRIORITY |
545                                    (write_domain ? I915_WAIT_ALL : 0),
546                                    MAX_SCHEDULE_TIMEOUT);
547         if (err)
548                 goto out;
549
550         if (i915_gem_object_is_userptr(obj)) {
551                 /*
552                  * Try to grab userptr pages, iris uses set_domain to check
553                  * userptr validity
554                  */
555                 err = i915_gem_object_userptr_validate(obj);
556                 if (!err)
557                         err = i915_gem_object_wait(obj,
558                                                    I915_WAIT_INTERRUPTIBLE |
559                                                    I915_WAIT_PRIORITY |
560                                                    (write_domain ? I915_WAIT_ALL : 0),
561                                                    MAX_SCHEDULE_TIMEOUT);
562                 goto out;
563         }
564
565         /*
566          * Proxy objects do not control access to the backing storage, ergo
567          * they cannot be used as a means to manipulate the cache domain
568          * tracking for that backing storage. The proxy object is always
569          * considered to be outside of any cache domain.
570          */
571         if (i915_gem_object_is_proxy(obj)) {
572                 err = -ENXIO;
573                 goto out;
574         }
575
576         err = i915_gem_object_lock_interruptible(obj, NULL);
577         if (err)
578                 goto out;
579
580         /*
581          * Flush and acquire obj->pages so that we are coherent through
582          * direct access in memory with previous cached writes through
583          * shmemfs and that our cache domain tracking remains valid.
584          * For example, if the obj->filp was moved to swap without us
585          * being notified and releasing the pages, we would mistakenly
586          * continue to assume that the obj remained out of the CPU cached
587          * domain.
588          */
589         err = i915_gem_object_pin_pages(obj);
590         if (err)
591                 goto out_unlock;
592
593         /*
594          * Already in the desired write domain? Nothing for us to do!
595          *
596          * We apply a little bit of cunning here to catch a broader set of
597          * no-ops. If obj->write_domain is set, we must be in the same
598          * obj->read_domains, and only that domain. Therefore, if that
599          * obj->write_domain matches the request read_domains, we are
600          * already in the same read/write domain and can skip the operation,
601          * without having to further check the requested write_domain.
602          */
603         if (READ_ONCE(obj->write_domain) == read_domains)
604                 goto out_unpin;
605
606         if (read_domains & I915_GEM_DOMAIN_WC)
607                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
608         else if (read_domains & I915_GEM_DOMAIN_GTT)
609                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
610         else
611                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
612
613 out_unpin:
614         i915_gem_object_unpin_pages(obj);
615
616 out_unlock:
617         i915_gem_object_unlock(obj);
618
619         if (!err && write_domain)
620                 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
621
622 out:
623         i915_gem_object_put(obj);
624         return err;
625 }
626
627 /*
628  * Pins the specified object's pages and synchronizes the object with
629  * GPU accesses. Sets needs_clflush to non-zero if the caller should
630  * flush the object from the CPU cache.
631  */
632 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
633                                  unsigned int *needs_clflush)
634 {
635         int ret;
636
637         *needs_clflush = 0;
638         if (!i915_gem_object_has_struct_page(obj))
639                 return -ENODEV;
640
641         assert_object_held(obj);
642
643         ret = i915_gem_object_wait(obj,
644                                    I915_WAIT_INTERRUPTIBLE,
645                                    MAX_SCHEDULE_TIMEOUT);
646         if (ret)
647                 return ret;
648
649         ret = i915_gem_object_pin_pages(obj);
650         if (ret)
651                 return ret;
652
653         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
654             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
655                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
656                 if (ret)
657                         goto err_unpin;
658                 else
659                         goto out;
660         }
661
662         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
663
664         /* If we're not in the cpu read domain, set ourself into the gtt
665          * read domain and manually flush cachelines (if required). This
666          * optimizes for the case when the gpu will dirty the data
667          * anyway again before the next pread happens.
668          */
669         if (!obj->cache_dirty &&
670             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
671                 *needs_clflush = CLFLUSH_BEFORE;
672
673 out:
674         /* return with the pages pinned */
675         return 0;
676
677 err_unpin:
678         i915_gem_object_unpin_pages(obj);
679         return ret;
680 }
681
682 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
683                                   unsigned int *needs_clflush)
684 {
685         int ret;
686
687         *needs_clflush = 0;
688         if (!i915_gem_object_has_struct_page(obj))
689                 return -ENODEV;
690
691         assert_object_held(obj);
692
693         ret = i915_gem_object_wait(obj,
694                                    I915_WAIT_INTERRUPTIBLE |
695                                    I915_WAIT_ALL,
696                                    MAX_SCHEDULE_TIMEOUT);
697         if (ret)
698                 return ret;
699
700         ret = i915_gem_object_pin_pages(obj);
701         if (ret)
702                 return ret;
703
704         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
705             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
706                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
707                 if (ret)
708                         goto err_unpin;
709                 else
710                         goto out;
711         }
712
713         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
714
715         /* If we're not in the cpu write domain, set ourself into the
716          * gtt write domain and manually flush cachelines (as required).
717          * This optimizes for the case when the gpu will use the data
718          * right away and we therefore have to clflush anyway.
719          */
720         if (!obj->cache_dirty) {
721                 *needs_clflush |= CLFLUSH_AFTER;
722
723                 /*
724                  * Same trick applies to invalidate partially written
725                  * cachelines read before writing.
726                  */
727                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
728                         *needs_clflush |= CLFLUSH_BEFORE;
729         }
730
731 out:
732         i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
733         obj->mm.dirty = true;
734         /* return with the pages pinned */
735         return 0;
736
737 err_unpin:
738         i915_gem_object_unpin_pages(obj);
739         return ret;
740 }