drm/syncobj: Rename fence_get to find_fence
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_cs.c
index 5599c01..269b835 100644 (file)
@@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 
        *offset = data->offset;
 
-       drm_gem_object_unreference_unlocked(gobj);
+       drm_gem_object_put_unlocked(gobj);
 
        if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
                amdgpu_bo_unref(&p->uf_entry.robj);
@@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
        }
 
        /* get chunks */
-       chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks);
+       chunk_array_user = u64_to_user_ptr(cs->in.chunks);
        if (copy_from_user(chunk_array, chunk_array_user,
                           sizeof(uint64_t)*cs->in.num_chunks)) {
                ret = -EFAULT;
@@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                struct drm_amdgpu_cs_chunk user_chunk;
                uint32_t __user *cdata;
 
-               chunk_ptr = (void __user *)(uintptr_t)chunk_array[i];
+               chunk_ptr = u64_to_user_ptr(chunk_array[i]);
                if (copy_from_user(&user_chunk, chunk_ptr,
                                       sizeof(struct drm_amdgpu_cs_chunk))) {
                        ret = -EFAULT;
@@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                p->chunks[i].length_dw = user_chunk.length_dw;
 
                size = p->chunks[i].length_dw;
-               cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
+               cdata = u64_to_user_ptr(user_chunk.chunk_data);
 
                p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
                if (p->chunks[i].kdata == NULL) {
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
  * ticks. The accumulated microseconds (us) are converted to bytes and
  * returned.
  */
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+                                             u64 *max_bytes,
+                                             u64 *max_vis_bytes)
 {
        s64 time_us, increment_us;
-       u64 max_bytes;
        u64 free_vram, total_vram, used_vram;
 
        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,11 +239,14 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
         */
        const s64 us_upper_bound = 200000;
 
-       if (!adev->mm_stats.log2_max_MBps)
-               return 0;
+       if (!adev->mm_stats.log2_max_MBps) {
+               *max_bytes = 0;
+               *max_vis_bytes = 0;
+               return;
+       }
 
        total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
-       used_vram = atomic64_read(&adev->vram_usage);
+       used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
        free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
        spin_lock(&adev->mm_stats.lock);
@@ -280,23 +284,46 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
                adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
        }
 
-       /* This returns 0 if the driver is in debt to disallow (optional)
+       /* This is set to 0 if the driver is in debt to disallow (optional)
         * buffer moves.
         */
-       max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+       *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+       /* Do the same for visible VRAM if half of it is free */
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+               u64 total_vis_vram = adev->mc.visible_vram_size;
+               u64 used_vis_vram =
+                       amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
+
+               if (used_vis_vram < total_vis_vram) {
+                       u64 free_vis_vram = total_vis_vram - used_vis_vram;
+                       adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+                                                         increment_us, us_upper_bound);
+
+                       if (free_vis_vram >= total_vis_vram / 2)
+                               adev->mm_stats.accum_us_vis =
+                                       max(bytes_to_us(adev, free_vis_vram / 2),
+                                           adev->mm_stats.accum_us_vis);
+               }
+
+               *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+       } else {
+               *max_vis_bytes = 0;
+       }
 
        spin_unlock(&adev->mm_stats.lock);
-       return max_bytes;
 }
 
 /* Report how many bytes have really been moved for the last command
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                 u64 num_vis_bytes)
 {
        spin_lock(&adev->mm_stats.lock);
        adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+       adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
        spin_unlock(&adev->mm_stats.lock);
 }
 
@@ -304,7 +331,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
                                 struct amdgpu_bo *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       u64 initial_bytes_moved;
+       u64 initial_bytes_moved, bytes_moved;
        uint32_t domain;
        int r;
 
@@ -314,17 +341,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
        /* Don't move this buffer if we have depleted our allowance
         * to move it. Don't move anything if the threshold is zero.
         */
-       if (p->bytes_moved < p->bytes_moved_threshold)
-               domain = bo->prefered_domains;
-       else
+       if (p->bytes_moved < p->bytes_moved_threshold) {
+               if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                   (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+                       /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+                        * visible VRAM if we've depleted our allowance to do
+                        * that.
+                        */
+                       if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+                               domain = bo->preferred_domains;
+                       else
+                               domain = bo->allowed_domains;
+               } else {
+                       domain = bo->preferred_domains;
+               }
+       } else {
                domain = bo->allowed_domains;
+       }
 
 retry:
        amdgpu_ttm_placement_from_domain(bo, domain);
        initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-       p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
-               initial_bytes_moved;
+       bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+                     initial_bytes_moved;
+       p->bytes_moved += bytes_moved;
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+           bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+               p->bytes_moved_vis += bytes_moved;
 
        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
                domain = bo->allowed_domains;
@@ -350,7 +395,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
                struct amdgpu_bo_list_entry *candidate = p->evictable;
                struct amdgpu_bo *bo = candidate->robj;
                struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-               u64 initial_bytes_moved;
+               u64 initial_bytes_moved, bytes_moved;
+               bool update_bytes_moved_vis;
                uint32_t other;
 
                /* If we reached our current BO we can forget it */
@@ -370,10 +416,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 
                /* Good we can try to move this BO somewhere else */
                amdgpu_ttm_placement_from_domain(bo, other);
+               update_bytes_moved_vis =
+                       adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                       bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+                       bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
                initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-               p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+               bytes_moved = atomic64_read(&adev->num_bytes_moved) -
                        initial_bytes_moved;
+               p->bytes_moved += bytes_moved;
+               if (update_bytes_moved_vis)
+                       p->bytes_moved_vis += bytes_moved;
 
                if (unlikely(r))
                        break;
@@ -554,8 +607,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                list_splice(&need_pages, &p->validated);
        }
 
-       p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+       amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+                                         &p->bytes_moved_vis_threshold);
        p->bytes_moved = 0;
+       p->bytes_moved_vis = 0;
        p->evictable = list_last_entry(&p->validated,
                                       struct amdgpu_bo_list_entry,
                                       tv.head);
@@ -579,8 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                goto error_validate;
        }
 
-       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
-
+       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+                                    p->bytes_moved_vis);
        fpriv->vm.last_eviction_counter =
                atomic64_read(&p->adev->num_evictions);
 
@@ -619,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        }
 
 error_validate:
-       if (r) {
-               amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
+       if (r)
                ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-       }
 
 error_free_pages:
 
@@ -670,21 +723,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
  * If error is set than unvalidate buffer, otherwise just free memory
  * used by parsing context.
  **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
+                                 bool backoff)
 {
-       struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
        unsigned i;
 
-       if (!error) {
-               amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
+       if (!error)
                ttm_eu_fence_buffer_objects(&parser->ticket,
                                            &parser->validated,
                                            parser->fence);
-       } else if (backoff) {
+       else if (backoff)
                ttm_eu_backoff_reservation(&parser->ticket,
                                           &parser->validated);
-       }
 
        for (i = 0; i < parser->num_post_dep_syncobjs; i++)
                drm_syncobj_put(parser->post_dep_syncobjs[i]);
@@ -737,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
        if (amdgpu_sriov_vf(adev)) {
                struct dma_fence *f;
-               bo_va = vm->csa_bo_va;
+
+               bo_va = fpriv->csa_va;
                BUG_ON(!bo_va);
                r = amdgpu_vm_bo_update(adev, bo_va, false);
                if (r)
@@ -774,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
        }
 
-       r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
+       r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
 
        if (amdgpu_vm_debug && p->bo_list) {
                /* Invalidate all BOs to test for userspace bugs */
@@ -984,7 +1035,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 {
        int r;
        struct dma_fence *fence;
-       r = drm_syncobj_fence_get(p->filp, handle, &fence);
+       r = drm_syncobj_find_fence(p->filp, handle, &fence);
        if (r)
                return r;
 
@@ -1383,7 +1434,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
        if (fences == NULL)
                return -ENOMEM;
 
-       fences_user = (void __user *)(uintptr_t)(wait->in.fences);
+       fences_user = u64_to_user_ptr(wait->in.fences);
        if (copy_from_user(fences, fences_user,
                sizeof(struct drm_amdgpu_fence) * fence_count)) {
                r = -EFAULT;
@@ -1436,7 +1487,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                            addr > mapping->last)
                                continue;
 
-                       *bo = lobj->bo_va->bo;
+                       *bo = lobj->bo_va->base.bo;
                        return mapping;
                }
 
@@ -1445,7 +1496,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                            addr > mapping->last)
                                continue;
 
-                       *bo = lobj->bo_va->bo;
+                       *bo = lobj->bo_va->base.bo;
                        return mapping;
                }
        }