drm/ttm: switch to per device LRU lock
authorChristian König <christian.koenig@amd.com>
Tue, 6 Oct 2020 15:26:42 +0000 (17:26 +0200)
committerChristian König <christian.koenig@amd.com>
Wed, 24 Mar 2021 16:05:25 +0000 (17:05 +0100)
Instead of having a global lock for potentially less contention.

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/424010/
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/qxl/qxl_release.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_device.c
drivers/gpu/drm/ttm/ttm_execbuf_util.c
drivers/gpu/drm/ttm/ttm_resource.c
include/drm/ttm/ttm_bo_driver.h
include/drm/ttm/ttm_device.h

index 9d19078..ae18c0e 100644 (file)
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
        struct amdgpu_vm_bo_base *bo_base;
 
        if (vm->bulk_moveable) {
-               spin_lock(&ttm_glob.lru_lock);
+               spin_lock(&adev->mman.bdev.lru_lock);
                ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
-               spin_unlock(&ttm_glob.lru_lock);
+               spin_unlock(&adev->mman.bdev.lru_lock);
                return;
        }
 
        memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
 
-       spin_lock(&ttm_glob.lru_lock);
+       spin_lock(&adev->mman.bdev.lru_lock);
        list_for_each_entry(bo_base, &vm->idle, vm_status) {
                struct amdgpu_bo *bo = bo_base->bo;
 
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
                                                &bo->shadow->tbo.mem,
                                                &vm->lru_bulk_move);
        }
-       spin_unlock(&ttm_glob.lru_lock);
+       spin_unlock(&adev->mman.bdev.lru_lock);
 
        vm->bulk_moveable = true;
 }
index f5845c9..b19f2f0 100644 (file)
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
                       release->id | 0xf0000000, release->base.seqno);
        trace_dma_fence_emit(&release->base);
 
-       spin_lock(&ttm_glob.lru_lock);
-
        list_for_each_entry(entry, &release->bos, head) {
                bo = entry->bo;
 
                dma_resv_add_shared_fence(bo->base.resv, &release->base);
-               ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+               ttm_bo_move_to_lru_tail_unlocked(bo);
                dma_resv_unlock(bo->base.resv);
        }
-       spin_unlock(&ttm_glob.lru_lock);
        ww_acquire_fini(&release->ticket);
 }
 
index 3673157..2d2ac53 100644 (file)
@@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
                 * reference it any more. The only tricky case is the trylock on
                 * the resv object while holding the lru_lock.
                 */
-               spin_lock(&ttm_glob.lru_lock);
+               spin_lock(&bo->bdev->lru_lock);
                bo->base.resv = &bo->base._resv;
-               spin_unlock(&ttm_glob.lru_lock);
+               spin_unlock(&bo->bdev->lru_lock);
        }
 
        return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 
                if (unlock_resv)
                        dma_resv_unlock(bo->base.resv);
-               spin_unlock(&ttm_glob.lru_lock);
+               spin_unlock(&bo->bdev->lru_lock);
 
                lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
                                                 30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
                else if (lret == 0)
                        return -EBUSY;
 
-               spin_lock(&ttm_glob.lru_lock);
+               spin_lock(&bo->bdev->lru_lock);
                if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
                        /*
                         * We raced, and lost, someone else holds the reservation now,
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
                         * delayed destruction would succeed, so just return success
                         * here.
                         */
-                       spin_unlock(&ttm_glob.lru_lock);
+                       spin_unlock(&bo->bdev->lru_lock);
                        return 0;
                }
                ret = 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
        if (ret || unlikely(list_empty(&bo->ddestroy))) {
                if (unlock_resv)
                        dma_resv_unlock(bo->base.resv);
-               spin_unlock(&ttm_glob.lru_lock);
+               spin_unlock(&bo->bdev->lru_lock);
                return ret;
        }
 
        ttm_bo_del_from_lru(bo);
        list_del_init(&bo->ddestroy);
-       spin_unlock(&ttm_glob.lru_lock);
+       spin_unlock(&bo->bdev->lru_lock);
        ttm_bo_cleanup_memtype_use(bo);
 
        if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
  */
 bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
 {
-       struct ttm_global *glob = &ttm_glob;
        struct list_head removed;
        bool empty;
 
        INIT_LIST_HEAD(&removed);
 
-       spin_lock(&glob->lru_lock);
+       spin_lock(&bdev->lru_lock);
        while (!list_empty(&bdev->ddestroy)) {
                struct ttm_buffer_object *bo;
 
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
                        continue;
 
                if (remove_all || bo->base.resv != &bo->base._resv) {
-                       spin_unlock(&glob->lru_lock);
+                       spin_unlock(&bdev->lru_lock);
                        dma_resv_lock(bo->base.resv, NULL);
 
-                       spin_lock(&glob->lru_lock);
+                       spin_lock(&bdev->lru_lock);
                        ttm_bo_cleanup_refs(bo, false, !remove_all, true);
 
                } else if (dma_resv_trylock(bo->base.resv)) {
                        ttm_bo_cleanup_refs(bo, false, !remove_all, true);
                } else {
-                       spin_unlock(&glob->lru_lock);
+                       spin_unlock(&bdev->lru_lock);
                }
 
                ttm_bo_put(bo);
-               spin_lock(&glob->lru_lock);
+               spin_lock(&bdev->lru_lock);
        }
        list_splice_tail(&removed, &bdev->ddestroy);
        empty = list_empty(&bdev->ddestroy);
-       spin_unlock(&glob->lru_lock);
+       spin_unlock(&bdev->lru_lock);
 
        return empty;
 }
@@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
                ttm_bo_flush_all_fences(bo);
                bo->deleted = true;
 
-               spin_lock(&ttm_glob.lru_lock);
+               spin_lock(&bo->bdev->lru_lock);
 
                /*
                 * Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
 
                kref_init(&bo->kref);
                list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-               spin_unlock(&ttm_glob.lru_lock);
+               spin_unlock(&bo->bdev->lru_lock);
 
                schedule_delayed_work(&bdev->wq,
                                      ((HZ / 100) < 1) ? 1 : HZ / 100);
                return;
        }
 
-       spin_lock(&ttm_glob.lru_lock);
+       spin_lock(&bo->bdev->lru_lock);
        ttm_bo_del_from_lru(bo);
        list_del(&bo->ddestroy);
-       spin_unlock(&ttm_glob.lru_lock);
+       spin_unlock(&bo->bdev->lru_lock);
 
        ttm_bo_cleanup_memtype_use(bo);
        dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
        unsigned i;
        int ret;
 
-       spin_lock(&ttm_glob.lru_lock);
+       spin_lock(&bo->bdev->lru_lock);
        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
                list_for_each_entry(bo, &man->lru[i], lru) {
                        bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
        if (!bo) {
                if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
                        busy_bo = NULL;
-               spin_unlock(&ttm_glob.lru_lock);
+               spin_unlock(&bo->bdev->lru_lock);
                ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
                if (busy_bo)
                        ttm_bo_put(busy_bo);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
                return ret;
        }
 
-       spin_unlock(&ttm_glob.lru_lock);
+       spin_unlock(&bo->bdev->lru_lock);
 
        ret = ttm_bo_evict(bo, ctx);
        if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
        mem->mem_type = place->mem_type;
        mem->placement = place->flags;
 
-       spin_lock(&ttm_glob.lru_lock);
+       spin_lock(&bo->bdev->lru_lock);
        ttm_bo_move_to_lru_tail(bo, mem, NULL);
-       spin_unlock(&ttm_glob.lru_lock);
-
+       spin_unlock(&bo->bdev->lru_lock);
        return 0;
 }
 
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
 int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
                   gfp_t gfp_flags)
 {
-       struct ttm_global *glob = &ttm_glob;
        bool locked;
        int ret;
 
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 
        ttm_bo_del_from_lru(bo);
        /* TODO: Cleanup the locking */
-       spin_unlock(&glob->lru_lock);
+       spin_unlock(&bo->bdev->lru_lock);
 
        /*
         * Move to system cached
index b9e3bfb..8e821cd 100644 (file)
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
        ttm_pool_mgr_init(num_pages * 50 / 100);
        ttm_tt_mgr_init();
 
-       spin_lock_init(&glob->lru_lock);
        glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
 
        if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
 int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
                       gfp_t gfp_flags)
 {
-       struct ttm_global *glob = &ttm_glob;
        struct ttm_resource_manager *man;
        struct ttm_buffer_object *bo;
        unsigned i, j;
        int ret;
 
-       spin_lock(&glob->lru_lock);
+       spin_lock(&bdev->lru_lock);
        for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
                man = ttm_manager_type(bdev, i);
                if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
                        }
                }
        }
-       spin_unlock(&glob->lru_lock);
+       spin_unlock(&bdev->lru_lock);
        return 0;
 }
 EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 
        bdev->vma_manager = vma_manager;
        INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+       spin_lock_init(&bdev->lru_lock);
        INIT_LIST_HEAD(&bdev->ddestroy);
        bdev->dev_mapping = mapping;
        mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
 
 void ttm_device_fini(struct ttm_device *bdev)
 {
-       struct ttm_global *glob = &ttm_glob;
        struct ttm_resource_manager *man;
        unsigned i;
 
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
        if (ttm_bo_delayed_delete(bdev, true))
                pr_debug("Delayed destroy list was clean\n");
 
-       spin_lock(&glob->lru_lock);
+       spin_lock(&bdev->lru_lock);
        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
                if (list_empty(&man->lru[0]))
                        pr_debug("Swap list %d was clean\n", i);
-       spin_unlock(&glob->lru_lock);
+       spin_unlock(&bdev->lru_lock);
 
        ttm_pool_fini(&bdev->pool);
        ttm_global_release();
index 690ab97..071c48d 100644 (file)
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
        if (list_empty(list))
                return;
 
-       spin_lock(&ttm_glob.lru_lock);
        list_for_each_entry(entry, list, head) {
                struct ttm_buffer_object *bo = entry->bo;
 
-               ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+               ttm_bo_move_to_lru_tail_unlocked(bo);
                dma_resv_unlock(bo->base.resv);
        }
-       spin_unlock(&ttm_glob.lru_lock);
 
        if (ticket)
                ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
        if (list_empty(list))
                return;
 
-       spin_lock(&ttm_glob.lru_lock);
        list_for_each_entry(entry, list, head) {
                struct ttm_buffer_object *bo = entry->bo;
 
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
                        dma_resv_add_shared_fence(bo->base.resv, fence);
                else
                        dma_resv_add_excl_fence(bo->base.resv, fence);
-               ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+               ttm_bo_move_to_lru_tail_unlocked(bo);
                dma_resv_unlock(bo->base.resv);
        }
-       spin_unlock(&ttm_glob.lru_lock);
        if (ticket)
                ww_acquire_fini(ticket);
 }
index ed1672a..04f2eef 100644 (file)
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
                .no_wait_gpu = false,
                .force_alloc = true
        };
-       struct ttm_global *glob = &ttm_glob;
        struct dma_fence *fence;
        int ret;
        unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
         * Can't use standard list traversal since we're unlocking.
         */
 
-       spin_lock(&glob->lru_lock);
+       spin_lock(&bdev->lru_lock);
        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
                while (!list_empty(&man->lru[i])) {
-                       spin_unlock(&glob->lru_lock);
+                       spin_unlock(&bdev->lru_lock);
                        ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
                                                  NULL);
                        if (ret)
                                return ret;
-                       spin_lock(&glob->lru_lock);
+                       spin_lock(&bdev->lru_lock);
                }
        }
-       spin_unlock(&glob->lru_lock);
+       spin_unlock(&bdev->lru_lock);
 
        spin_lock(&man->move_lock);
        fence = dma_fence_get(man->move);
index d007fee..dbccac9 100644 (file)
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
 static inline void
 ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
 {
-       spin_lock(&ttm_glob.lru_lock);
+       spin_lock(&bo->bdev->lru_lock);
        ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
-       spin_unlock(&ttm_glob.lru_lock);
+       spin_unlock(&bo->bdev->lru_lock);
 }
 
 static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
index 93f3b59..7c8f87b 100644 (file)
@@ -56,7 +56,6 @@ extern struct ttm_global {
         */
 
        struct page *dummy_read_page;
-       spinlock_t lru_lock;
 
        /**
         * Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
        struct ttm_pool pool;
 
        /*
-        * Protected by the global:lru lock.
+        * Protection for the per manager LRU and ddestroy lists.
         */
+       spinlock_t lru_lock;
        struct list_head ddestroy;
 
        /*