drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: monk liu <monk.liu@amd.com>
  23  */
  24
  25 #include <drm/drm_auth.h>
  26 #include "amdgpu.h"
  27 #include "amdgpu_sched.h"
  28 #include "amdgpu_ras.h"
  29 #include <linux/nospec.h>
  30
  31 #define to_amdgpu_ctx_entity(e) \
  32         container_of((e), struct amdgpu_ctx_entity, entity)
  33
  34 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
  35         [AMDGPU_HW_IP_GFX]      =       1,
  36         [AMDGPU_HW_IP_COMPUTE]  =       4,
  37         [AMDGPU_HW_IP_DMA]      =       2,
  38         [AMDGPU_HW_IP_UVD]      =       1,
  39         [AMDGPU_HW_IP_VCE]      =       1,
  40         [AMDGPU_HW_IP_UVD_ENC]  =       1,
  41         [AMDGPU_HW_IP_VCN_DEC]  =       1,
  42         [AMDGPU_HW_IP_VCN_ENC]  =       1,
  43         [AMDGPU_HW_IP_VCN_JPEG] =       1,
  44 };
  45
  46 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
  47                                       enum drm_sched_priority priority)
  48 {
  49         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
  50                 return -EINVAL;
  51
  52         /* NORMAL and below are accessible by everyone */
  53         if (priority <= DRM_SCHED_PRIORITY_NORMAL)
  54                 return 0;
  55
  56         if (capable(CAP_SYS_NICE))
  57                 return 0;
  58
  59         if (drm_is_current_master(filp))
  60                 return 0;
  61
  62         return -EACCES;
  63 }
  64
  65 static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
  66 {
  67         switch (prio) {
  68         case DRM_SCHED_PRIORITY_HIGH:
  69         case DRM_SCHED_PRIORITY_KERNEL:
  70                 return AMDGPU_GFX_PIPE_PRIO_HIGH;
  71         default:
  72                 return AMDGPU_GFX_PIPE_PRIO_NORMAL;
  73         }
  74 }
  75
  76 static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
  77                                                  enum drm_sched_priority prio,
  78                                                  u32 hw_ip)
  79 {
  80         unsigned int hw_prio;
  81
  82         hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
  83                         amdgpu_ctx_sched_prio_to_compute_prio(prio) :
  84                         AMDGPU_RING_PRIO_DEFAULT;
  85         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
  86         if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
  87                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
  88
  89         return hw_prio;
  90 }
  91
  92 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
  93                                    const u32 ring)
  94 {
  95         struct amdgpu_device *adev = ctx->adev;
  96         struct amdgpu_ctx_entity *entity;
  97         struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
  98         unsigned num_scheds = 0;
  99         unsigned int hw_prio;
 100         enum drm_sched_priority priority;
 101         int r;
 102
 103         entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
 104                          GFP_KERNEL);
 105         if (!entity)
 106                 return  -ENOMEM;
 107
 108         entity->sequence = 1;
 109         priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 110                                 ctx->init_priority : ctx->override_priority;
 111         hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
 112
 113         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
 114         scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 115         num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 116
 117         /* disable load balance if the hw engine retains context among dependent jobs */
 118         if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
 119             hw_ip == AMDGPU_HW_IP_VCN_DEC ||
 120             hw_ip == AMDGPU_HW_IP_UVD_ENC ||
 121             hw_ip == AMDGPU_HW_IP_UVD) {
 122                 sched = drm_sched_pick_best(scheds, num_scheds);
 123                 scheds = &sched;
 124                 num_scheds = 1;
 125         }
 126
 127         r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
 128                                   &ctx->guilty);
 129         if (r)
 130                 goto error_free_entity;
 131
 132         ctx->entities[hw_ip][ring] = entity;
 133         return 0;
 134
 135 error_free_entity:
 136         kfree(entity);
 137
 138         return r;
 139 }
 140
 141 static int amdgpu_ctx_init(struct amdgpu_device *adev,
 142                            enum drm_sched_priority priority,
 143                            struct drm_file *filp,
 144                            struct amdgpu_ctx *ctx)
 145 {
 146         int r;
 147
 148         r = amdgpu_ctx_priority_permit(filp, priority);
 149         if (r)
 150                 return r;
 151
 152         memset(ctx, 0, sizeof(*ctx));
 153
 154         ctx->adev = adev;
 155
 156         kref_init(&ctx->refcount);
 157         spin_lock_init(&ctx->ring_lock);
 158         mutex_init(&ctx->lock);
 159
 160         ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 161         ctx->reset_counter_query = ctx->reset_counter;
 162         ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 163         ctx->init_priority = priority;
 164         ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 165
 166         return 0;
 167 }
 168
 169 static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
 170 {
 171
 172         int i;
 173
 174         if (!entity)
 175                 return;
 176
 177         for (i = 0; i < amdgpu_sched_jobs; ++i)
 178                 dma_fence_put(entity->fences[i]);
 179
 180         kfree(entity);
 181 }
 182
 183 static void amdgpu_ctx_fini(struct kref *ref)
 184 {
 185         struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 186         struct amdgpu_device *adev = ctx->adev;
 187         unsigned i, j;
 188
 189         if (!adev)
 190                 return;
 191
 192         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 193                 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
 194                         amdgpu_ctx_fini_entity(ctx->entities[i][j]);
 195                         ctx->entities[i][j] = NULL;
 196                 }
 197         }
 198
 199         mutex_destroy(&ctx->lock);
 200         kfree(ctx);
 201 }
 202
 203 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 204                           u32 ring, struct drm_sched_entity **entity)
 205 {
 206         int r;
 207
 208         if (hw_ip >= AMDGPU_HW_IP_NUM) {
 209                 DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 210                 return -EINVAL;
 211         }
 212
 213         /* Right now all IPs have only one instance - multiple rings. */
 214         if (instance != 0) {
 215                 DRM_DEBUG("invalid ip instance: %d\n", instance);
 216                 return -EINVAL;
 217         }
 218
 219         if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
 220                 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
 221                 return -EINVAL;
 222         }
 223
 224         if (ctx->entities[hw_ip][ring] == NULL) {
 225                 r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
 226                 if (r)
 227                         return r;
 228         }
 229
 230         *entity = &ctx->entities[hw_ip][ring]->entity;
 231         return 0;
 232 }
 233
 234 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 235                             struct amdgpu_fpriv *fpriv,
 236                             struct drm_file *filp,
 237                             enum drm_sched_priority priority,
 238                             uint32_t *id)
 239 {
 240         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 241         struct amdgpu_ctx *ctx;
 242         int r;
 243
 244         ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 245         if (!ctx)
 246                 return -ENOMEM;
 247
 248         mutex_lock(&mgr->lock);
 249         r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
 250         if (r < 0) {
 251                 mutex_unlock(&mgr->lock);
 252                 kfree(ctx);
 253                 return r;
 254         }
 255
 256         *id = (uint32_t)r;
 257         r = amdgpu_ctx_init(adev, priority, filp, ctx);
 258         if (r) {
 259                 idr_remove(&mgr->ctx_handles, *id);
 260                 *id = 0;
 261                 kfree(ctx);
 262         }
 263         mutex_unlock(&mgr->lock);
 264         return r;
 265 }
 266
 267 static void amdgpu_ctx_do_release(struct kref *ref)
 268 {
 269         struct amdgpu_ctx *ctx;
 270         u32 i, j;
 271
 272         ctx = container_of(ref, struct amdgpu_ctx, refcount);
 273         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 274                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 275                         if (!ctx->entities[i][j])
 276                                 continue;
 277
 278                         drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
 279                 }
 280         }
 281
 282         amdgpu_ctx_fini(ref);
 283 }
 284
 285 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 286 {
 287         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 288         struct amdgpu_ctx *ctx;
 289
 290         mutex_lock(&mgr->lock);
 291         ctx = idr_remove(&mgr->ctx_handles, id);
 292         if (ctx)
 293                 kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 294         mutex_unlock(&mgr->lock);
 295         return ctx ? 0 : -EINVAL;
 296 }
 297
 298 static int amdgpu_ctx_query(struct amdgpu_device *adev,
 299                             struct amdgpu_fpriv *fpriv, uint32_t id,
 300                             union drm_amdgpu_ctx_out *out)
 301 {
 302         struct amdgpu_ctx *ctx;
 303         struct amdgpu_ctx_mgr *mgr;
 304         unsigned reset_counter;
 305
 306         if (!fpriv)
 307                 return -EINVAL;
 308
 309         mgr = &fpriv->ctx_mgr;
 310         mutex_lock(&mgr->lock);
 311         ctx = idr_find(&mgr->ctx_handles, id);
 312         if (!ctx) {
 313                 mutex_unlock(&mgr->lock);
 314                 return -EINVAL;
 315         }
 316
 317         /* TODO: these two are always zero */
 318         out->state.flags = 0x0;
 319         out->state.hangs = 0x0;
 320
 321         /* determine if a GPU reset has occured since the last call */
 322         reset_counter = atomic_read(&adev->gpu_reset_counter);
 323         /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
 324         if (ctx->reset_counter_query == reset_counter)
 325                 out->state.reset_status = AMDGPU_CTX_NO_RESET;
 326         else
 327                 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
 328         ctx->reset_counter_query = reset_counter;
 329
 330         mutex_unlock(&mgr->lock);
 331         return 0;
 332 }
 333
 334 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
 335
 336 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 337                              struct amdgpu_fpriv *fpriv, uint32_t id,
 338                              union drm_amdgpu_ctx_out *out)
 339 {
 340         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 341         struct amdgpu_ctx *ctx;
 342         struct amdgpu_ctx_mgr *mgr;
 343
 344         if (!fpriv)
 345                 return -EINVAL;
 346
 347         mgr = &fpriv->ctx_mgr;
 348         mutex_lock(&mgr->lock);
 349         ctx = idr_find(&mgr->ctx_handles, id);
 350         if (!ctx) {
 351                 mutex_unlock(&mgr->lock);
 352                 return -EINVAL;
 353         }
 354
 355         out->state.flags = 0x0;
 356         out->state.hangs = 0x0;
 357
 358         if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
 359                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
 360
 361         if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
 362                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
 363
 364         if (atomic_read(&ctx->guilty))
 365                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 366
 367         if (adev->ras_enabled && con) {
 368                 /* Return the cached values in O(1),
 369                  * and schedule delayed work to cache
 370                  * new vaues.
 371                  */
 372                 int ce_count, ue_count;
 373
 374                 ce_count = atomic_read(&con->ras_ce_count);
 375                 ue_count = atomic_read(&con->ras_ue_count);
 376
 377                 if (ce_count != ctx->ras_counter_ce) {
 378                         ctx->ras_counter_ce = ce_count;
 379                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
 380                 }
 381
 382                 if (ue_count != ctx->ras_counter_ue) {
 383                         ctx->ras_counter_ue = ue_count;
 384                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
 385                 }
 386
 387                 schedule_delayed_work(&con->ras_counte_delay_work,
 388                                       msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
 389         }
 390
 391         mutex_unlock(&mgr->lock);
 392         return 0;
 393 }
 394
 395 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 396                      struct drm_file *filp)
 397 {
 398         int r;
 399         uint32_t id;
 400         enum drm_sched_priority priority;
 401
 402         union drm_amdgpu_ctx *args = data;
 403         struct amdgpu_device *adev = drm_to_adev(dev);
 404         struct amdgpu_fpriv *fpriv = filp->driver_priv;
 405
 406         id = args->in.ctx_id;
 407         r = amdgpu_to_sched_priority(args->in.priority, &priority);
 408
 409         /* For backwards compatibility reasons, we need to accept
 410          * ioctls with garbage in the priority field */
 411         if (r == -EINVAL)
 412                 priority = DRM_SCHED_PRIORITY_NORMAL;
 413
 414         switch (args->in.op) {
 415         case AMDGPU_CTX_OP_ALLOC_CTX:
 416                 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
 417                 args->out.alloc.ctx_id = id;
 418                 break;
 419         case AMDGPU_CTX_OP_FREE_CTX:
 420                 r = amdgpu_ctx_free(fpriv, id);
 421                 break;
 422         case AMDGPU_CTX_OP_QUERY_STATE:
 423                 r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
 424                 break;
 425         case AMDGPU_CTX_OP_QUERY_STATE2:
 426                 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
 427                 break;
 428         default:
 429                 return -EINVAL;
 430         }
 431
 432         return r;
 433 }
 434
 435 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
 436 {
 437         struct amdgpu_ctx *ctx;
 438         struct amdgpu_ctx_mgr *mgr;
 439
 440         if (!fpriv)
 441                 return NULL;
 442
 443         mgr = &fpriv->ctx_mgr;
 444
 445         mutex_lock(&mgr->lock);
 446         ctx = idr_find(&mgr->ctx_handles, id);
 447         if (ctx)
 448                 kref_get(&ctx->refcount);
 449         mutex_unlock(&mgr->lock);
 450         return ctx;
 451 }
 452
 453 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 454 {
 455         if (ctx == NULL)
 456                 return -EINVAL;
 457
 458         kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 459         return 0;
 460 }
 461
 462 void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 463                           struct drm_sched_entity *entity,
 464                           struct dma_fence *fence, uint64_t *handle)
 465 {
 466         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 467         uint64_t seq = centity->sequence;
 468         struct dma_fence *other = NULL;
 469         unsigned idx = 0;
 470
 471         idx = seq & (amdgpu_sched_jobs - 1);
 472         other = centity->fences[idx];
 473         if (other)
 474                 BUG_ON(!dma_fence_is_signaled(other));
 475
 476         dma_fence_get(fence);
 477
 478         spin_lock(&ctx->ring_lock);
 479         centity->fences[idx] = fence;
 480         centity->sequence++;
 481         spin_unlock(&ctx->ring_lock);
 482
 483         dma_fence_put(other);
 484         if (handle)
 485                 *handle = seq;
 486 }
 487
 488 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 489                                        struct drm_sched_entity *entity,
 490                                        uint64_t seq)
 491 {
 492         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 493         struct dma_fence *fence;
 494
 495         spin_lock(&ctx->ring_lock);
 496
 497         if (seq == ~0ull)
 498                 seq = centity->sequence - 1;
 499
 500         if (seq >= centity->sequence) {
 501                 spin_unlock(&ctx->ring_lock);
 502                 return ERR_PTR(-EINVAL);
 503         }
 504
 505
 506         if (seq + amdgpu_sched_jobs < centity->sequence) {
 507                 spin_unlock(&ctx->ring_lock);
 508                 return NULL;
 509         }
 510
 511         fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 512         spin_unlock(&ctx->ring_lock);
 513
 514         return fence;
 515 }
 516
 517 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 518                                             struct amdgpu_ctx_entity *aentity,
 519                                             int hw_ip,
 520                                             enum drm_sched_priority priority)
 521 {
 522         struct amdgpu_device *adev = ctx->adev;
 523         unsigned int hw_prio;
 524         struct drm_gpu_scheduler **scheds = NULL;
 525         unsigned num_scheds;
 526
 527         /* set sw priority */
 528         drm_sched_entity_set_priority(&aentity->entity, priority);
 529
 530         /* set hw priority */
 531         if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
 532                 hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
 533                                                       AMDGPU_HW_IP_COMPUTE);
 534                 hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
 535                 scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 536                 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 537                 drm_sched_entity_modify_sched(&aentity->entity, scheds,
 538                                               num_scheds);
 539         }
 540 }
 541
 542 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 543                                   enum drm_sched_priority priority)
 544 {
 545         enum drm_sched_priority ctx_prio;
 546         unsigned i, j;
 547
 548         ctx->override_priority = priority;
 549
 550         ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 551                         ctx->init_priority : ctx->override_priority;
 552         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 553                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 554                         if (!ctx->entities[i][j])
 555                                 continue;
 556
 557                         amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
 558                                                        i, ctx_prio);
 559                 }
 560         }
 561 }
 562
 563 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 564                                struct drm_sched_entity *entity)
 565 {
 566         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 567         struct dma_fence *other;
 568         unsigned idx;
 569         long r;
 570
 571         spin_lock(&ctx->ring_lock);
 572         idx = centity->sequence & (amdgpu_sched_jobs - 1);
 573         other = dma_fence_get(centity->fences[idx]);
 574         spin_unlock(&ctx->ring_lock);
 575
 576         if (!other)
 577                 return 0;
 578
 579         r = dma_fence_wait(other, true);
 580         if (r < 0 && r != -ERESTARTSYS)
 581                 DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 582
 583         dma_fence_put(other);
 584         return r;
 585 }
 586
 587 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 588 {
 589         mutex_init(&mgr->lock);
 590         idr_init(&mgr->ctx_handles);
 591 }
 592
 593 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 594 {
 595         struct amdgpu_ctx *ctx;
 596         struct idr *idp;
 597         uint32_t id, i, j;
 598
 599         idp = &mgr->ctx_handles;
 600
 601         mutex_lock(&mgr->lock);
 602         idr_for_each_entry(idp, ctx, id) {
 603                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 604                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 605                                 struct drm_sched_entity *entity;
 606
 607                                 if (!ctx->entities[i][j])
 608                                         continue;
 609
 610                                 entity = &ctx->entities[i][j]->entity;
 611                                 timeout = drm_sched_entity_flush(entity, timeout);
 612                         }
 613                 }
 614         }
 615         mutex_unlock(&mgr->lock);
 616         return timeout;
 617 }
 618
 619 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 620 {
 621         struct amdgpu_ctx *ctx;
 622         struct idr *idp;
 623         uint32_t id, i, j;
 624
 625         idp = &mgr->ctx_handles;
 626
 627         idr_for_each_entry(idp, ctx, id) {
 628                 if (kref_read(&ctx->refcount) != 1) {
 629                         DRM_ERROR("ctx %p is still alive\n", ctx);
 630                         continue;
 631                 }
 632
 633                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 634                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 635                                 struct drm_sched_entity *entity;
 636
 637                                 if (!ctx->entities[i][j])
 638                                         continue;
 639
 640                                 entity = &ctx->entities[i][j]->entity;
 641                                 drm_sched_entity_fini(entity);
 642                         }
 643                 }
 644         }
 645 }
 646
 647 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 648 {
 649         struct amdgpu_ctx *ctx;
 650         struct idr *idp;
 651         uint32_t id;
 652
 653         amdgpu_ctx_mgr_entity_fini(mgr);
 654
 655         idp = &mgr->ctx_handles;
 656
 657         idr_for_each_entry(idp, ctx, id) {
 658                 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 659                         DRM_ERROR("ctx %p is still alive\n", ctx);
 660         }
 661
 662         idr_destroy(&mgr->ctx_handles);
 663         mutex_destroy(&mgr->lock);
 664 }
 665
 666 static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
 667                 struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
 668 {
 669         ktime_t now, t1;
 670         uint32_t i;
 671
 672         *total = *max = 0;
 673
 674         now = ktime_get();
 675         for (i = 0; i < amdgpu_sched_jobs; i++) {
 676                 struct dma_fence *fence;
 677                 struct drm_sched_fence *s_fence;
 678
 679                 spin_lock(&ctx->ring_lock);
 680                 fence = dma_fence_get(centity->fences[i]);
 681                 spin_unlock(&ctx->ring_lock);
 682                 if (!fence)
 683                         continue;
 684                 s_fence = to_drm_sched_fence(fence);
 685                 if (!dma_fence_is_signaled(&s_fence->scheduled)) {
 686                         dma_fence_put(fence);
 687                         continue;
 688                 }
 689                 t1 = s_fence->scheduled.timestamp;
 690                 if (!ktime_before(t1, now)) {
 691                         dma_fence_put(fence);
 692                         continue;
 693                 }
 694                 if (dma_fence_is_signaled(&s_fence->finished) &&
 695                         s_fence->finished.timestamp < now)
 696                         *total += ktime_sub(s_fence->finished.timestamp, t1);
 697                 else
 698                         *total += ktime_sub(now, t1);
 699                 t1 = ktime_sub(now, t1);
 700                 dma_fence_put(fence);
 701                 *max = max(t1, *max);
 702         }
 703 }
 704
 705 ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
 706                 uint32_t idx, uint64_t *elapsed)
 707 {
 708         struct idr *idp;
 709         struct amdgpu_ctx *ctx;
 710         uint32_t id;
 711         struct amdgpu_ctx_entity *centity;
 712         ktime_t total = 0, max = 0;
 713
 714         if (idx >= AMDGPU_MAX_ENTITY_NUM)
 715                 return 0;
 716         idp = &mgr->ctx_handles;
 717         mutex_lock(&mgr->lock);
 718         idr_for_each_entry(idp, ctx, id) {
 719                 ktime_t ttotal, tmax;
 720
 721                 if (!ctx->entities[hwip][idx])
 722                         continue;
 723
 724                 centity = ctx->entities[hwip][idx];
 725                 amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
 726
 727                 /* Harmonic mean approximation diverges for very small
 728                  * values. If ratio < 0.01% ignore
 729                  */
 730                 if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
 731                         continue;
 732
 733                 total = ktime_add(total, ttotal);
 734                 max = ktime_after(tmax, max) ? tmax : max;
 735         }
 736
 737         mutex_unlock(&mgr->lock);
 738         if (elapsed)
 739                 *elapsed = max;
 740
 741         return total;
 742 }