drm/amdgpu: add VM generation token

author Christian König <christian.koenig@amd.com>

Wed, 19 Apr 2023 13:17:57 +0000 (15:17 +0200)

committer Alex Deucher <alexander.deucher@amd.com>

Thu, 15 Jun 2023 15:37:55 +0000 (11:37 -0400)
author Christian König <christian.koenig@amd.com>
Wed, 19 Apr 2023 13:17:57 +0000 (15:17 +0200)
committer Alex Deucher <alexander.deucher@amd.com>
Thu, 15 Jun 2023 15:37:55 +0000 (11:37 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 6e1d331..d950388 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -309,7 +309,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
         }
         p->gang_leader = p->jobs[p->gang_leader_idx];
  
-       if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
+       if (p->ctx->generation != p->gang_leader->generation) {
                 ret = -ECANCELED;
                 goto free_all_kdata;
         }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 410acdd..3ccd709 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -333,7 +333,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
  
         ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
         ctx->reset_counter_query = ctx->reset_counter;
-       ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
+       ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
         ctx->init_priority = priority;
         ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
  
@@ -586,7 +586,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
         if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
  
-       if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+       if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
  
         if (atomic_read(&ctx->guilty))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index f1e27b6..85376ba 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -47,7 +47,7 @@ struct amdgpu_ctx {
         struct amdgpu_ctx_mgr           *mgr;
         unsigned                        reset_counter;
         unsigned                        reset_counter_query;
-       uint32_t                        vram_lost_counter;
+       uint64_t                        generation;
         spinlock_t                      ring_lock;
         struct amdgpu_ctx_entity        *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
         bool                            preamble_presented;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 9e6f2fa..78476bc 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -109,7 +109,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
         (*job)->vm = vm;
  
         amdgpu_sync_create(&(*job)->explicit_sync);
-       (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+       (*job)->generation = amdgpu_vm_generation(adev, vm);
         (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
  
         if (!entity)
@@ -295,7 +295,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
         trace_amdgpu_sched_run_job(job);
  
         /* Skip job if VRAM is lost and never resubmit gangs */
-       if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter) ||
+       if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
             (job->job_run_counter && job->gang_submit))
                 dma_fence_set_error(finished, -ECANCELED);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

index 3f9804f..a963a25 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -61,7 +61,7 @@ struct amdgpu_job {
         uint32_t                gds_base, gds_size;
         uint32_t                gws_base, gws_size;
         uint32_t                oa_base, oa_size;
-       uint32_t                vram_lost_counter;
+       uint64_t                generation;
  
         /* user fence handling */
         uint64_t                uf_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 1045be4..143d11a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -405,6 +405,30 @@ static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
         drm_sched_entity_destroy(&vm->delayed);
  }
  
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+       uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+       if (!vm)
+               return result;
+
+       result += vm->generation;
+       /* Add one if the page tables will be re-generated on next CS */
+       if (drm_sched_entity_error(&vm->delayed))
+               ++result;
+
+       return result;
+}
+
  /**
   * amdgpu_vm_validate_pt_bos - validate the page table BOs
   *
@@ -428,6 +452,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
         int r;
  
         if (drm_sched_entity_error(&vm->delayed)) {
+               ++vm->generation;
                 amdgpu_vm_bo_reset_state_machine(vm);
                 amdgpu_vm_fini_entities(vm);
                 r = amdgpu_vm_init_entities(adev, vm);
@@ -2134,6 +2159,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
         vm->last_update = dma_fence_get_stub();
         vm->last_unlocked = dma_fence_get_stub();
         vm->last_tlb_flush = dma_fence_get_stub();
+       vm->generation = 0;
  
         mutex_init(&vm->eviction_lock);
         vm->evicting = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 14f9a2b..9c85d49 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -295,6 +295,9 @@ struct amdgpu_vm {
         atomic64_t              tlb_seq;
         struct dma_fence        *last_tlb_flush;
  
+       /* How many times we had to re-generate the page tables */
+       uint64_t                generation;
+
         /* Last unlocked submission to the scheduler entities */
         struct dma_fence        *last_unlocked;
  
@@ -397,6 +400,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
                          struct list_head *validated,
                          struct amdgpu_bo_list_entry *entry);
  bool amdgpu_vm_ready(struct amdgpu_vm *vm);
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                               int (*callback)(void *p, struct amdgpu_bo *bo),
                               void *param);
author	Christian König <christian.koenig@amd.com>
	Wed, 19 Apr 2023 13:17:57 +0000 (15:17 +0200)
committer	Alex Deucher <alexander.deucher@amd.com>
	Thu, 15 Jun 2023 15:37:55 +0000 (11:37 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h		patch \| blob \| history