drm/amdgpu:make ctx_add_fence interruptible(v2)
authorMonk Liu <Monk.Liu@amd.com>
Fri, 15 Sep 2017 05:40:31 +0000 (13:40 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 26 Sep 2017 19:14:13 +0000 (15:14 -0400)
otherwise a gpu hang will make application couldn't be killed
under timedout=0 mode

v2:
Fix memoryleak job/job->s_fence issue
unlock mn
remove the ERROR msg after waiting being interrupted

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 33e1d61..9cce59f 100644 (file)
@@ -735,8 +735,8 @@ struct amdgpu_ctx_mgr {
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
-uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-                             struct dma_fence *fence);
+int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
+                             struct dma_fence *fence, uint64_t *seq);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
                                   struct amdgpu_ring *ring, uint64_t seq);
 
index 9f1202a..c6a214f 100644 (file)
@@ -1129,6 +1129,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
        struct amdgpu_job *job;
        unsigned i;
+       uint64_t seq;
+
        int r;
 
        amdgpu_mn_lock(p->mn);
@@ -1158,10 +1160,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        job->fence_ctx = entity->fence_context;
        p->fence = dma_fence_get(&job->base.s_fence->finished);
 
+       r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
+       if (r) {
+               dma_fence_put(p->fence);
+               dma_fence_put(&job->base.s_fence->finished);
+               amdgpu_job_free(job);
+               amdgpu_mn_unlock(p->mn);
+               return r;
+       }
+
        amdgpu_cs_post_dependencies(p);
 
-       cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
-       job->uf_sequence = cs->out.handle;
+       cs->out.handle = seq;
+       job->uf_sequence = seq;
+
        amdgpu_job_free_resources(job);
 
        trace_amdgpu_cs_ioctl(job);
index a11e443..75c933b 100644 (file)
@@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
        return 0;
 }
 
-uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-                             struct dma_fence *fence)
+int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
+                             struct dma_fence *fence, uint64_t* handler)
 {
        struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
        uint64_t seq = cring->sequence;
@@ -258,9 +258,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
        other = cring->fences[idx];
        if (other) {
                signed long r;
-               r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+               r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
                if (r < 0)
-                       DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+                       return r;
        }
 
        dma_fence_get(fence);
@@ -271,8 +271,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
        spin_unlock(&ctx->ring_lock);
 
        dma_fence_put(other);
+       if (handler)
+               *handler = seq;
 
-       return seq;
+       return 0;
 }
 
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,