drivers/gpu/drm/i915/gem/i915_gem_domain.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2014-2016 Intel Corporation
   5  */
   6
   7 #include "display/intel_display.h"
   8 #include "gt/intel_gt.h"
   9
  10 #include "i915_drv.h"
  11 #include "i915_gem_clflush.h"
  12 #include "i915_gem_domain.h"
  13 #include "i915_gem_gtt.h"
  14 #include "i915_gem_ioctls.h"
  15 #include "i915_gem_lmem.h"
  16 #include "i915_gem_mman.h"
  17 #include "i915_gem_object.h"
  18 #include "i915_gem_object_frontbuffer.h"
  19 #include "i915_vma.h"
  20
  21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
  22
  23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  24 {
  25         struct drm_i915_private *i915 = to_i915(obj->base.dev);
  26
  27         if (IS_DGFX(i915))
  28                 return false;
  29
  30         /*
  31          * For objects created by userspace through GEM_CREATE with pat_index
  32          * set by set_pat extension, i915_gem_object_has_cache_level() will
  33          * always return true, because the coherency of such object is managed
  34          * by userspace. Othereise the call here would fall back to checking
  35          * whether the object is un-cached or write-through.
  36          */
  37         return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
  38                  i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
  39 }
  40
  41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  42 {
  43         struct drm_i915_private *i915 = to_i915(obj->base.dev);
  44
  45         if (obj->cache_dirty)
  46                 return false;
  47
  48         if (IS_DGFX(i915))
  49                 return false;
  50
  51         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
  52                 return true;
  53
  54         /* Currently in use by HW (display engine)? Keep flushed. */
  55         return i915_gem_object_is_framebuffer(obj);
  56 }
  57
  58 static void
  59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
  60 {
  61         struct i915_vma *vma;
  62
  63         assert_object_held(obj);
  64
  65         if (!(obj->write_domain & flush_domains))
  66                 return;
  67
  68         switch (obj->write_domain) {
  69         case I915_GEM_DOMAIN_GTT:
  70                 spin_lock(&obj->vma.lock);
  71                 for_each_ggtt_vma(vma, obj)
  72                         i915_vma_flush_writes(vma);
  73                 spin_unlock(&obj->vma.lock);
  74
  75                 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
  76                 break;
  77
  78         case I915_GEM_DOMAIN_WC:
  79                 wmb();
  80                 break;
  81
  82         case I915_GEM_DOMAIN_CPU:
  83                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
  84                 break;
  85
  86         case I915_GEM_DOMAIN_RENDER:
  87                 if (gpu_write_needs_clflush(obj))
  88                         obj->cache_dirty = true;
  89                 break;
  90         }
  91
  92         obj->write_domain = 0;
  93 }
  94
  95 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  96 {
  97         /*
  98          * We manually flush the CPU domain so that we can override and
  99          * force the flush for the display, and perform it asyncrhonously.
 100          */
 101         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 102         if (obj->cache_dirty)
 103                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 104         obj->write_domain = 0;
 105 }
 106
 107 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 108 {
 109         if (!i915_gem_object_is_framebuffer(obj))
 110                 return;
 111
 112         i915_gem_object_lock(obj, NULL);
 113         __i915_gem_object_flush_for_display(obj);
 114         i915_gem_object_unlock(obj);
 115 }
 116
 117 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
 118 {
 119         if (i915_gem_object_is_framebuffer(obj))
 120                 __i915_gem_object_flush_for_display(obj);
 121 }
 122
 123 /**
 124  * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
 125  *                                    possibly write domain.
 126  * @obj: object to act on
 127  * @write: ask for write access or read only
 128  *
 129  * This function returns when the move is complete, including waiting on
 130  * flushes to occur.
 131  */
 132 int
 133 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 134 {
 135         int ret;
 136
 137         assert_object_held(obj);
 138
 139         ret = i915_gem_object_wait(obj,
 140                                    I915_WAIT_INTERRUPTIBLE |
 141                                    (write ? I915_WAIT_ALL : 0),
 142                                    MAX_SCHEDULE_TIMEOUT);
 143         if (ret)
 144                 return ret;
 145
 146         if (obj->write_domain == I915_GEM_DOMAIN_WC)
 147                 return 0;
 148
 149         /* Flush and acquire obj->pages so that we are coherent through
 150          * direct access in memory with previous cached writes through
 151          * shmemfs and that our cache domain tracking remains valid.
 152          * For example, if the obj->filp was moved to swap without us
 153          * being notified and releasing the pages, we would mistakenly
 154          * continue to assume that the obj remained out of the CPU cached
 155          * domain.
 156          */
 157         ret = i915_gem_object_pin_pages(obj);
 158         if (ret)
 159                 return ret;
 160
 161         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 162
 163         /* Serialise direct access to this object with the barriers for
 164          * coherent writes from the GPU, by effectively invalidating the
 165          * WC domain upon first access.
 166          */
 167         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
 168                 mb();
 169
 170         /* It should now be out of any other write domains, and we can update
 171          * the domain values for our changes.
 172          */
 173         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 174         obj->read_domains |= I915_GEM_DOMAIN_WC;
 175         if (write) {
 176                 obj->read_domains = I915_GEM_DOMAIN_WC;
 177                 obj->write_domain = I915_GEM_DOMAIN_WC;
 178                 obj->mm.dirty = true;
 179         }
 180
 181         i915_gem_object_unpin_pages(obj);
 182         return 0;
 183 }
 184
 185 /**
 186  * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
 187  *                                     and possibly write domain.
 188  * @obj: object to act on
 189  * @write: ask for write access or read only
 190  *
 191  * This function returns when the move is complete, including waiting on
 192  * flushes to occur.
 193  */
 194 int
 195 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 196 {
 197         int ret;
 198
 199         assert_object_held(obj);
 200
 201         ret = i915_gem_object_wait(obj,
 202                                    I915_WAIT_INTERRUPTIBLE |
 203                                    (write ? I915_WAIT_ALL : 0),
 204                                    MAX_SCHEDULE_TIMEOUT);
 205         if (ret)
 206                 return ret;
 207
 208         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 209                 return 0;
 210
 211         /* Flush and acquire obj->pages so that we are coherent through
 212          * direct access in memory with previous cached writes through
 213          * shmemfs and that our cache domain tracking remains valid.
 214          * For example, if the obj->filp was moved to swap without us
 215          * being notified and releasing the pages, we would mistakenly
 216          * continue to assume that the obj remained out of the CPU cached
 217          * domain.
 218          */
 219         ret = i915_gem_object_pin_pages(obj);
 220         if (ret)
 221                 return ret;
 222
 223         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 224
 225         /* Serialise direct access to this object with the barriers for
 226          * coherent writes from the GPU, by effectively invalidating the
 227          * GTT domain upon first access.
 228          */
 229         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
 230                 mb();
 231
 232         /* It should now be out of any other write domains, and we can update
 233          * the domain values for our changes.
 234          */
 235         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 236         obj->read_domains |= I915_GEM_DOMAIN_GTT;
 237         if (write) {
 238                 struct i915_vma *vma;
 239
 240                 obj->read_domains = I915_GEM_DOMAIN_GTT;
 241                 obj->write_domain = I915_GEM_DOMAIN_GTT;
 242                 obj->mm.dirty = true;
 243
 244                 spin_lock(&obj->vma.lock);
 245                 for_each_ggtt_vma(vma, obj)
 246                         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 247                                 i915_vma_set_ggtt_write(vma);
 248                 spin_unlock(&obj->vma.lock);
 249         }
 250
 251         i915_gem_object_unpin_pages(obj);
 252         return 0;
 253 }
 254
 255 /**
 256  * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
 257  * @obj: object to act on
 258  * @cache_level: new cache level to set for the object
 259  *
 260  * After this function returns, the object will be in the new cache-level
 261  * across all GTT and the contents of the backing storage will be coherent,
 262  * with respect to the new cache-level. In order to keep the backing storage
 263  * coherent for all users, we only allow a single cache level to be set
 264  * globally on the object and prevent it from being changed whilst the
 265  * hardware is reading from the object. That is if the object is currently
 266  * on the scanout it will be set to uncached (or equivalent display
 267  * cache coherency) and all non-MOCS GPU access will also be uncached so
 268  * that all direct access to the scanout remains coherent.
 269  */
 270 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 271                                     enum i915_cache_level cache_level)
 272 {
 273         int ret;
 274
 275         /*
 276          * For objects created by userspace through GEM_CREATE with pat_index
 277          * set by set_pat extension, simply return 0 here without touching
 278          * the cache setting, because such objects should have an immutable
 279          * cache setting by desgin and always managed by userspace.
 280          */
 281         if (i915_gem_object_has_cache_level(obj, cache_level))
 282                 return 0;
 283
 284         ret = i915_gem_object_wait(obj,
 285                                    I915_WAIT_INTERRUPTIBLE |
 286                                    I915_WAIT_ALL,
 287                                    MAX_SCHEDULE_TIMEOUT);
 288         if (ret)
 289                 return ret;
 290
 291         /* Always invalidate stale cachelines */
 292         i915_gem_object_set_cache_coherency(obj, cache_level);
 293         obj->cache_dirty = true;
 294
 295         /* The cache-level will be applied when each vma is rebound. */
 296         return i915_gem_object_unbind(obj,
 297                                       I915_GEM_OBJECT_UNBIND_ACTIVE |
 298                                       I915_GEM_OBJECT_UNBIND_BARRIER);
 299 }
 300
 301 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 302                                struct drm_file *file)
 303 {
 304         struct drm_i915_gem_caching *args = data;
 305         struct drm_i915_gem_object *obj;
 306         int err = 0;
 307
 308         if (IS_DGFX(to_i915(dev)))
 309                 return -ENODEV;
 310
 311         rcu_read_lock();
 312         obj = i915_gem_object_lookup_rcu(file, args->handle);
 313         if (!obj) {
 314                 err = -ENOENT;
 315                 goto out;
 316         }
 317
 318         /*
 319          * This ioctl should be disabled for the objects with pat_index
 320          * set by user space.
 321          */
 322         if (obj->pat_set_by_user) {
 323                 err = -EOPNOTSUPP;
 324                 goto out;
 325         }
 326
 327         if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
 328             i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
 329                 args->caching = I915_CACHING_CACHED;
 330         else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
 331                 args->caching = I915_CACHING_DISPLAY;
 332         else
 333                 args->caching = I915_CACHING_NONE;
 334 out:
 335         rcu_read_unlock();
 336         return err;
 337 }
 338
 339 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 340                                struct drm_file *file)
 341 {
 342         struct drm_i915_private *i915 = to_i915(dev);
 343         struct drm_i915_gem_caching *args = data;
 344         struct drm_i915_gem_object *obj;
 345         enum i915_cache_level level;
 346         int ret = 0;
 347
 348         if (IS_DGFX(i915))
 349                 return -ENODEV;
 350
 351         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
 352                 return -EOPNOTSUPP;
 353
 354         switch (args->caching) {
 355         case I915_CACHING_NONE:
 356                 level = I915_CACHE_NONE;
 357                 break;
 358         case I915_CACHING_CACHED:
 359                 /*
 360                  * Due to a HW issue on BXT A stepping, GPU stores via a
 361                  * snooped mapping may leave stale data in a corresponding CPU
 362                  * cacheline, whereas normally such cachelines would get
 363                  * invalidated.
 364                  */
 365                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 366                         return -ENODEV;
 367
 368                 level = I915_CACHE_LLC;
 369                 break;
 370         case I915_CACHING_DISPLAY:
 371                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 372                 break;
 373         default:
 374                 return -EINVAL;
 375         }
 376
 377         obj = i915_gem_object_lookup(file, args->handle);
 378         if (!obj)
 379                 return -ENOENT;
 380
 381         /*
 382          * This ioctl should be disabled for the objects with pat_index
 383          * set by user space.
 384          */
 385         if (obj->pat_set_by_user) {
 386                 ret = -EOPNOTSUPP;
 387                 goto out;
 388         }
 389
 390         /*
 391          * The caching mode of proxy object is handled by its generator, and
 392          * not allowed to be changed by userspace.
 393          */
 394         if (i915_gem_object_is_proxy(obj)) {
 395                 /*
 396                  * Silently allow cached for userptr; the vulkan driver
 397                  * sets all objects to cached
 398                  */
 399                 if (!i915_gem_object_is_userptr(obj) ||
 400                     args->caching != I915_CACHING_CACHED)
 401                         ret = -ENXIO;
 402
 403                 goto out;
 404         }
 405
 406         ret = i915_gem_object_lock_interruptible(obj, NULL);
 407         if (ret)
 408                 goto out;
 409
 410         ret = i915_gem_object_set_cache_level(obj, level);
 411         i915_gem_object_unlock(obj);
 412
 413 out:
 414         i915_gem_object_put(obj);
 415         return ret;
 416 }
 417
 418 /*
 419  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
 420  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
 421  * (for pageflips). We only flush the caches while preparing the buffer for
 422  * display, the callers are responsible for frontbuffer flush.
 423  */
 424 struct i915_vma *
 425 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 426                                      struct i915_gem_ww_ctx *ww,
 427                                      u32 alignment,
 428                                      const struct i915_gtt_view *view,
 429                                      unsigned int flags)
 430 {
 431         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 432         struct i915_vma *vma;
 433         int ret;
 434
 435         /* Frame buffer must be in LMEM */
 436         if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 437                 return ERR_PTR(-EINVAL);
 438
 439         /*
 440          * The display engine is not coherent with the LLC cache on gen6.  As
 441          * a result, we make sure that the pinning that is about to occur is
 442          * done with uncached PTEs. This is lowest common denominator for all
 443          * chipsets.
 444          *
 445          * However for gen6+, we could do better by using the GFDT bit instead
 446          * of uncaching, which would allow us to flush all the LLC-cached data
 447          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 448          */
 449         ret = i915_gem_object_set_cache_level(obj,
 450                                               HAS_WT(i915) ?
 451                                               I915_CACHE_WT : I915_CACHE_NONE);
 452         if (ret)
 453                 return ERR_PTR(ret);
 454
 455         /* VT-d may overfetch before/after the vma, so pad with scratch */
 456         if (intel_scanout_needs_vtd_wa(i915)) {
 457                 unsigned int guard = VTD_GUARD;
 458
 459                 if (i915_gem_object_is_tiled(obj))
 460                         guard = max(guard,
 461                                     i915_gem_object_get_tile_row_size(obj));
 462
 463                 flags |= PIN_OFFSET_GUARD | guard;
 464         }
 465
 466         /*
 467          * As the user may map the buffer once pinned in the display plane
 468          * (e.g. libkms for the bootup splash), we have to ensure that we
 469          * always use map_and_fenceable for all scanout buffers. However,
 470          * it may simply be too big to fit into mappable, in which case
 471          * put it anyway and hope that userspace can cope (but always first
 472          * try to preserve the existing ABI).
 473          */
 474         vma = ERR_PTR(-ENOSPC);
 475         if ((flags & PIN_MAPPABLE) == 0 &&
 476             (!view || view->type == I915_GTT_VIEW_NORMAL))
 477                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
 478                                                   flags | PIN_MAPPABLE |
 479                                                   PIN_NONBLOCK);
 480         if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
 481                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
 482                                                   alignment, flags);
 483         if (IS_ERR(vma))
 484                 return vma;
 485
 486         vma->display_alignment = max(vma->display_alignment, alignment);
 487         i915_vma_mark_scanout(vma);
 488
 489         i915_gem_object_flush_if_display_locked(obj);
 490
 491         return vma;
 492 }
 493
 494 /**
 495  * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
 496  *                                     and possibly write domain.
 497  * @obj: object to act on
 498  * @write: requesting write or read-only access
 499  *
 500  * This function returns when the move is complete, including waiting on
 501  * flushes to occur.
 502  */
 503 int
 504 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 505 {
 506         int ret;
 507
 508         assert_object_held(obj);
 509
 510         ret = i915_gem_object_wait(obj,
 511                                    I915_WAIT_INTERRUPTIBLE |
 512                                    (write ? I915_WAIT_ALL : 0),
 513                                    MAX_SCHEDULE_TIMEOUT);
 514         if (ret)
 515                 return ret;
 516
 517         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 518
 519         /* Flush the CPU cache if it's still invalid. */
 520         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 521                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 522                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
 523         }
 524
 525         /* It should now be out of any other write domains, and we can update
 526          * the domain values for our changes.
 527          */
 528         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
 529
 530         /* If we're writing through the CPU, then the GPU read domains will
 531          * need to be invalidated at next use.
 532          */
 533         if (write)
 534                 __start_cpu_write(obj);
 535
 536         return 0;
 537 }
 538
 539 /**
 540  * i915_gem_set_domain_ioctl - Called when user space prepares to use an
 541  *                             object with the CPU, either
 542  * through the mmap ioctl's mapping or a GTT mapping.
 543  * @dev: drm device
 544  * @data: ioctl data blob
 545  * @file: drm file
 546  */
 547 int
 548 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 549                           struct drm_file *file)
 550 {
 551         struct drm_i915_gem_set_domain *args = data;
 552         struct drm_i915_gem_object *obj;
 553         u32 read_domains = args->read_domains;
 554         u32 write_domain = args->write_domain;
 555         int err;
 556
 557         if (IS_DGFX(to_i915(dev)))
 558                 return -ENODEV;
 559
 560         /* Only handle setting domains to types used by the CPU. */
 561         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 562                 return -EINVAL;
 563
 564         /*
 565          * Having something in the write domain implies it's in the read
 566          * domain, and only that read domain.  Enforce that in the request.
 567          */
 568         if (write_domain && read_domains != write_domain)
 569                 return -EINVAL;
 570
 571         if (!read_domains)
 572                 return 0;
 573
 574         obj = i915_gem_object_lookup(file, args->handle);
 575         if (!obj)
 576                 return -ENOENT;
 577
 578         /*
 579          * Try to flush the object off the GPU without holding the lock.
 580          * We will repeat the flush holding the lock in the normal manner
 581          * to catch cases where we are gazumped.
 582          */
 583         err = i915_gem_object_wait(obj,
 584                                    I915_WAIT_INTERRUPTIBLE |
 585                                    I915_WAIT_PRIORITY |
 586                                    (write_domain ? I915_WAIT_ALL : 0),
 587                                    MAX_SCHEDULE_TIMEOUT);
 588         if (err)
 589                 goto out;
 590
 591         if (i915_gem_object_is_userptr(obj)) {
 592                 /*
 593                  * Try to grab userptr pages, iris uses set_domain to check
 594                  * userptr validity
 595                  */
 596                 err = i915_gem_object_userptr_validate(obj);
 597                 if (!err)
 598                         err = i915_gem_object_wait(obj,
 599                                                    I915_WAIT_INTERRUPTIBLE |
 600                                                    I915_WAIT_PRIORITY |
 601                                                    (write_domain ? I915_WAIT_ALL : 0),
 602                                                    MAX_SCHEDULE_TIMEOUT);
 603                 goto out;
 604         }
 605
 606         /*
 607          * Proxy objects do not control access to the backing storage, ergo
 608          * they cannot be used as a means to manipulate the cache domain
 609          * tracking for that backing storage. The proxy object is always
 610          * considered to be outside of any cache domain.
 611          */
 612         if (i915_gem_object_is_proxy(obj)) {
 613                 err = -ENXIO;
 614                 goto out;
 615         }
 616
 617         err = i915_gem_object_lock_interruptible(obj, NULL);
 618         if (err)
 619                 goto out;
 620
 621         /*
 622          * Flush and acquire obj->pages so that we are coherent through
 623          * direct access in memory with previous cached writes through
 624          * shmemfs and that our cache domain tracking remains valid.
 625          * For example, if the obj->filp was moved to swap without us
 626          * being notified and releasing the pages, we would mistakenly
 627          * continue to assume that the obj remained out of the CPU cached
 628          * domain.
 629          */
 630         err = i915_gem_object_pin_pages(obj);
 631         if (err)
 632                 goto out_unlock;
 633
 634         /*
 635          * Already in the desired write domain? Nothing for us to do!
 636          *
 637          * We apply a little bit of cunning here to catch a broader set of
 638          * no-ops. If obj->write_domain is set, we must be in the same
 639          * obj->read_domains, and only that domain. Therefore, if that
 640          * obj->write_domain matches the request read_domains, we are
 641          * already in the same read/write domain and can skip the operation,
 642          * without having to further check the requested write_domain.
 643          */
 644         if (READ_ONCE(obj->write_domain) == read_domains)
 645                 goto out_unpin;
 646
 647         if (read_domains & I915_GEM_DOMAIN_WC)
 648                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
 649         else if (read_domains & I915_GEM_DOMAIN_GTT)
 650                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 651         else
 652                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 653
 654 out_unpin:
 655         i915_gem_object_unpin_pages(obj);
 656
 657 out_unlock:
 658         i915_gem_object_unlock(obj);
 659
 660         if (!err && write_domain)
 661                 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 662
 663 out:
 664         i915_gem_object_put(obj);
 665         return err;
 666 }
 667
 668 /*
 669  * Pins the specified object's pages and synchronizes the object with
 670  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 671  * flush the object from the CPU cache.
 672  */
 673 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 674                                  unsigned int *needs_clflush)
 675 {
 676         int ret;
 677
 678         *needs_clflush = 0;
 679         if (!i915_gem_object_has_struct_page(obj))
 680                 return -ENODEV;
 681
 682         assert_object_held(obj);
 683
 684         ret = i915_gem_object_wait(obj,
 685                                    I915_WAIT_INTERRUPTIBLE,
 686                                    MAX_SCHEDULE_TIMEOUT);
 687         if (ret)
 688                 return ret;
 689
 690         ret = i915_gem_object_pin_pages(obj);
 691         if (ret)
 692                 return ret;
 693
 694         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 695             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 696                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 697                 if (ret)
 698                         goto err_unpin;
 699                 else
 700                         goto out;
 701         }
 702
 703         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 704
 705         /* If we're not in the cpu read domain, set ourself into the gtt
 706          * read domain and manually flush cachelines (if required). This
 707          * optimizes for the case when the gpu will dirty the data
 708          * anyway again before the next pread happens.
 709          */
 710         if (!obj->cache_dirty &&
 711             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 712                 *needs_clflush = CLFLUSH_BEFORE;
 713
 714 out:
 715         /* return with the pages pinned */
 716         return 0;
 717
 718 err_unpin:
 719         i915_gem_object_unpin_pages(obj);
 720         return ret;
 721 }
 722
 723 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 724                                   unsigned int *needs_clflush)
 725 {
 726         int ret;
 727
 728         *needs_clflush = 0;
 729         if (!i915_gem_object_has_struct_page(obj))
 730                 return -ENODEV;
 731
 732         assert_object_held(obj);
 733
 734         ret = i915_gem_object_wait(obj,
 735                                    I915_WAIT_INTERRUPTIBLE |
 736                                    I915_WAIT_ALL,
 737                                    MAX_SCHEDULE_TIMEOUT);
 738         if (ret)
 739                 return ret;
 740
 741         ret = i915_gem_object_pin_pages(obj);
 742         if (ret)
 743                 return ret;
 744
 745         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 746             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 747                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 748                 if (ret)
 749                         goto err_unpin;
 750                 else
 751                         goto out;
 752         }
 753
 754         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 755
 756         /* If we're not in the cpu write domain, set ourself into the
 757          * gtt write domain and manually flush cachelines (as required).
 758          * This optimizes for the case when the gpu will use the data
 759          * right away and we therefore have to clflush anyway.
 760          */
 761         if (!obj->cache_dirty) {
 762                 *needs_clflush |= CLFLUSH_AFTER;
 763
 764                 /*
 765                  * Same trick applies to invalidate partially written
 766                  * cachelines read before writing.
 767                  */
 768                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 769                         *needs_clflush |= CLFLUSH_BEFORE;
 770         }
 771
 772 out:
 773         i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 774         obj->mm.dirty = true;
 775         /* return with the pages pinned */
 776         return 0;
 777
 778 err_unpin:
 779         i915_gem_object_unpin_pages(obj);
 780         return ret;
 781 }