drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: monk liu <monk.liu@amd.com>
  23  */
  24
  25 #include <drm/drmP.h>
  26 #include <drm/drm_auth.h>
  27 #include "amdgpu.h"
  28 #include "amdgpu_sched.h"
  29
  30 #define to_amdgpu_ctx_entity(e) \
  31         container_of((e), struct amdgpu_ctx_entity, entity)
  32
  33 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
  34         [AMDGPU_HW_IP_GFX]      =       1,
  35         [AMDGPU_HW_IP_COMPUTE]  =       4,
  36         [AMDGPU_HW_IP_DMA]      =       2,
  37         [AMDGPU_HW_IP_UVD]      =       1,
  38         [AMDGPU_HW_IP_VCE]      =       1,
  39         [AMDGPU_HW_IP_UVD_ENC]  =       1,
  40         [AMDGPU_HW_IP_VCN_DEC]  =       1,
  41         [AMDGPU_HW_IP_VCN_ENC]  =       1,
  42         [AMDGPU_HW_IP_VCN_JPEG] =       1,
  43 };
  44
  45 static int amdgput_ctx_total_num_entities(void)
  46 {
  47         unsigned i, num_entities = 0;
  48
  49         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
  50                 num_entities += amdgpu_ctx_num_entities[i];
  51
  52         return num_entities;
  53 }
  54
  55 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
  56                                       enum drm_sched_priority priority)
  57 {
  58         /* NORMAL and below are accessible by everyone */
  59         if (priority <= DRM_SCHED_PRIORITY_NORMAL)
  60                 return 0;
  61
  62         if (capable(CAP_SYS_NICE))
  63                 return 0;
  64
  65         if (drm_is_current_master(filp))
  66                 return 0;
  67
  68         return -EACCES;
  69 }
  70
  71 static int amdgpu_ctx_init(struct amdgpu_device *adev,
  72                            enum drm_sched_priority priority,
  73                            struct drm_file *filp,
  74                            struct amdgpu_ctx *ctx)
  75 {
  76         unsigned num_entities = amdgput_ctx_total_num_entities();
  77         unsigned i, j;
  78         int r;
  79
  80         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
  81                 return -EINVAL;
  82
  83         r = amdgpu_ctx_priority_permit(filp, priority);
  84         if (r)
  85                 return r;
  86
  87         memset(ctx, 0, sizeof(*ctx));
  88         ctx->adev = adev;
  89
  90         ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
  91                               sizeof(struct dma_fence*), GFP_KERNEL);
  92         if (!ctx->fences)
  93                 return -ENOMEM;
  94
  95         ctx->entities[0] = kcalloc(num_entities,
  96                                    sizeof(struct amdgpu_ctx_entity),
  97                                    GFP_KERNEL);
  98         if (!ctx->entities[0]) {
  99                 r = -ENOMEM;
 100                 goto error_free_fences;
 101         }
 102
 103         for (i = 0; i < num_entities; ++i) {
 104                 struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
 105
 106                 entity->sequence = 1;
 107                 entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
 108         }
 109         for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
 110                 ctx->entities[i] = ctx->entities[i - 1] +
 111                         amdgpu_ctx_num_entities[i - 1];
 112
 113         kref_init(&ctx->refcount);
 114         spin_lock_init(&ctx->ring_lock);
 115         mutex_init(&ctx->lock);
 116
 117         ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 118         ctx->reset_counter_query = ctx->reset_counter;
 119         ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 120         ctx->init_priority = priority;
 121         ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 122
 123         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 124                 struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
 125                 struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
 126                 unsigned num_rings;
 127
 128                 switch (i) {
 129                 case AMDGPU_HW_IP_GFX:
 130                         rings[0] = &adev->gfx.gfx_ring[0];
 131                         num_rings = 1;
 132                         break;
 133                 case AMDGPU_HW_IP_COMPUTE:
 134                         for (j = 0; j < adev->gfx.num_compute_rings; ++j)
 135                                 rings[j] = &adev->gfx.compute_ring[j];
 136                         num_rings = adev->gfx.num_compute_rings;
 137                         break;
 138                 case AMDGPU_HW_IP_DMA:
 139                         for (j = 0; j < adev->sdma.num_instances; ++j)
 140                                 rings[j] = &adev->sdma.instance[j].ring;
 141                         num_rings = adev->sdma.num_instances;
 142                         break;
 143                 case AMDGPU_HW_IP_UVD:
 144                         rings[0] = &adev->uvd.inst[0].ring;
 145                         num_rings = 1;
 146                         break;
 147                 case AMDGPU_HW_IP_VCE:
 148                         rings[0] = &adev->vce.ring[0];
 149                         num_rings = 1;
 150                         break;
 151                 case AMDGPU_HW_IP_UVD_ENC:
 152                         rings[0] = &adev->uvd.inst[0].ring_enc[0];
 153                         num_rings = 1;
 154                         break;
 155                 case AMDGPU_HW_IP_VCN_DEC:
 156                         rings[0] = &adev->vcn.ring_dec;
 157                         num_rings = 1;
 158                         break;
 159                 case AMDGPU_HW_IP_VCN_ENC:
 160                         rings[0] = &adev->vcn.ring_enc[0];
 161                         num_rings = 1;
 162                         break;
 163                 case AMDGPU_HW_IP_VCN_JPEG:
 164                         rings[0] = &adev->vcn.ring_jpeg;
 165                         num_rings = 1;
 166                         break;
 167                 }
 168
 169                 for (j = 0; j < num_rings; ++j)
 170                         rqs[j] = &rings[j]->sched.sched_rq[priority];
 171
 172                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 173                         r = drm_sched_entity_init(&ctx->entities[i][j].entity,
 174                                                   rqs, num_rings, &ctx->guilty);
 175                 if (r)
 176                         goto error_cleanup_entities;
 177         }
 178
 179         return 0;
 180
 181 error_cleanup_entities:
 182         for (i = 0; i < num_entities; ++i)
 183                 drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 184         kfree(ctx->entities[0]);
 185
 186 error_free_fences:
 187         kfree(ctx->fences);
 188         ctx->fences = NULL;
 189         return r;
 190 }
 191
 192 static void amdgpu_ctx_fini(struct kref *ref)
 193 {
 194         struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 195         unsigned num_entities = amdgput_ctx_total_num_entities();
 196         struct amdgpu_device *adev = ctx->adev;
 197         unsigned i, j;
 198
 199         if (!adev)
 200                 return;
 201
 202         for (i = 0; i < num_entities; ++i)
 203                 for (j = 0; j < amdgpu_sched_jobs; ++j)
 204                         dma_fence_put(ctx->entities[0][i].fences[j]);
 205         kfree(ctx->fences);
 206         kfree(ctx->entities[0]);
 207
 208         mutex_destroy(&ctx->lock);
 209
 210         kfree(ctx);
 211 }
 212
 213 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 214                           u32 ring, struct drm_sched_entity **entity)
 215 {
 216         if (hw_ip >= AMDGPU_HW_IP_NUM) {
 217                 DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 218                 return -EINVAL;
 219         }
 220
 221         /* Right now all IPs have only one instance - multiple rings. */
 222         if (instance != 0) {
 223                 DRM_DEBUG("invalid ip instance: %d\n", instance);
 224                 return -EINVAL;
 225         }
 226
 227         if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
 228                 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
 229                 return -EINVAL;
 230         }
 231
 232         *entity = &ctx->entities[hw_ip][ring].entity;
 233         return 0;
 234 }
 235
 236 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 237                             struct amdgpu_fpriv *fpriv,
 238                             struct drm_file *filp,
 239                             enum drm_sched_priority priority,
 240                             uint32_t *id)
 241 {
 242         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 243         struct amdgpu_ctx *ctx;
 244         int r;
 245
 246         ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 247         if (!ctx)
 248                 return -ENOMEM;
 249
 250         mutex_lock(&mgr->lock);
 251         r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
 252         if (r < 0) {
 253                 mutex_unlock(&mgr->lock);
 254                 kfree(ctx);
 255                 return r;
 256         }
 257
 258         *id = (uint32_t)r;
 259         r = amdgpu_ctx_init(adev, priority, filp, ctx);
 260         if (r) {
 261                 idr_remove(&mgr->ctx_handles, *id);
 262                 *id = 0;
 263                 kfree(ctx);
 264         }
 265         mutex_unlock(&mgr->lock);
 266         return r;
 267 }
 268
 269 static void amdgpu_ctx_do_release(struct kref *ref)
 270 {
 271         struct amdgpu_ctx *ctx;
 272         unsigned num_entities;
 273         u32 i;
 274
 275         ctx = container_of(ref, struct amdgpu_ctx, refcount);
 276
 277         num_entities = 0;
 278         for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
 279                 num_entities += amdgpu_ctx_num_entities[i];
 280
 281         for (i = 0; i < num_entities; i++)
 282                 drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 283
 284         amdgpu_ctx_fini(ref);
 285 }
 286
 287 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 288 {
 289         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 290         struct amdgpu_ctx *ctx;
 291
 292         mutex_lock(&mgr->lock);
 293         ctx = idr_remove(&mgr->ctx_handles, id);
 294         if (ctx)
 295                 kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 296         mutex_unlock(&mgr->lock);
 297         return ctx ? 0 : -EINVAL;
 298 }
 299
 300 static int amdgpu_ctx_query(struct amdgpu_device *adev,
 301                             struct amdgpu_fpriv *fpriv, uint32_t id,
 302                             union drm_amdgpu_ctx_out *out)
 303 {
 304         struct amdgpu_ctx *ctx;
 305         struct amdgpu_ctx_mgr *mgr;
 306         unsigned reset_counter;
 307
 308         if (!fpriv)
 309                 return -EINVAL;
 310
 311         mgr = &fpriv->ctx_mgr;
 312         mutex_lock(&mgr->lock);
 313         ctx = idr_find(&mgr->ctx_handles, id);
 314         if (!ctx) {
 315                 mutex_unlock(&mgr->lock);
 316                 return -EINVAL;
 317         }
 318
 319         /* TODO: these two are always zero */
 320         out->state.flags = 0x0;
 321         out->state.hangs = 0x0;
 322
 323         /* determine if a GPU reset has occured since the last call */
 324         reset_counter = atomic_read(&adev->gpu_reset_counter);
 325         /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
 326         if (ctx->reset_counter_query == reset_counter)
 327                 out->state.reset_status = AMDGPU_CTX_NO_RESET;
 328         else
 329                 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
 330         ctx->reset_counter_query = reset_counter;
 331
 332         mutex_unlock(&mgr->lock);
 333         return 0;
 334 }
 335
 336 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 337         struct amdgpu_fpriv *fpriv, uint32_t id,
 338         union drm_amdgpu_ctx_out *out)
 339 {
 340         struct amdgpu_ctx *ctx;
 341         struct amdgpu_ctx_mgr *mgr;
 342
 343         if (!fpriv)
 344                 return -EINVAL;
 345
 346         mgr = &fpriv->ctx_mgr;
 347         mutex_lock(&mgr->lock);
 348         ctx = idr_find(&mgr->ctx_handles, id);
 349         if (!ctx) {
 350                 mutex_unlock(&mgr->lock);
 351                 return -EINVAL;
 352         }
 353
 354         out->state.flags = 0x0;
 355         out->state.hangs = 0x0;
 356
 357         if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
 358                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
 359
 360         if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
 361                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
 362
 363         if (atomic_read(&ctx->guilty))
 364                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 365
 366         mutex_unlock(&mgr->lock);
 367         return 0;
 368 }
 369
 370 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 371                      struct drm_file *filp)
 372 {
 373         int r;
 374         uint32_t id;
 375         enum drm_sched_priority priority;
 376
 377         union drm_amdgpu_ctx *args = data;
 378         struct amdgpu_device *adev = dev->dev_private;
 379         struct amdgpu_fpriv *fpriv = filp->driver_priv;
 380
 381         r = 0;
 382         id = args->in.ctx_id;
 383         priority = amdgpu_to_sched_priority(args->in.priority);
 384
 385         /* For backwards compatibility reasons, we need to accept
 386          * ioctls with garbage in the priority field */
 387         if (priority == DRM_SCHED_PRIORITY_INVALID)
 388                 priority = DRM_SCHED_PRIORITY_NORMAL;
 389
 390         switch (args->in.op) {
 391         case AMDGPU_CTX_OP_ALLOC_CTX:
 392                 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
 393                 args->out.alloc.ctx_id = id;
 394                 break;
 395         case AMDGPU_CTX_OP_FREE_CTX:
 396                 r = amdgpu_ctx_free(fpriv, id);
 397                 break;
 398         case AMDGPU_CTX_OP_QUERY_STATE:
 399                 r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
 400                 break;
 401         case AMDGPU_CTX_OP_QUERY_STATE2:
 402                 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
 403                 break;
 404         default:
 405                 return -EINVAL;
 406         }
 407
 408         return r;
 409 }
 410
 411 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
 412 {
 413         struct amdgpu_ctx *ctx;
 414         struct amdgpu_ctx_mgr *mgr;
 415
 416         if (!fpriv)
 417                 return NULL;
 418
 419         mgr = &fpriv->ctx_mgr;
 420
 421         mutex_lock(&mgr->lock);
 422         ctx = idr_find(&mgr->ctx_handles, id);
 423         if (ctx)
 424                 kref_get(&ctx->refcount);
 425         mutex_unlock(&mgr->lock);
 426         return ctx;
 427 }
 428
 429 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 430 {
 431         if (ctx == NULL)
 432                 return -EINVAL;
 433
 434         kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 435         return 0;
 436 }
 437
 438 void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 439                           struct drm_sched_entity *entity,
 440                           struct dma_fence *fence, uint64_t* handle)
 441 {
 442         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 443         uint64_t seq = centity->sequence;
 444         struct dma_fence *other = NULL;
 445         unsigned idx = 0;
 446
 447         idx = seq & (amdgpu_sched_jobs - 1);
 448         other = centity->fences[idx];
 449         if (other)
 450                 BUG_ON(!dma_fence_is_signaled(other));
 451
 452         dma_fence_get(fence);
 453
 454         spin_lock(&ctx->ring_lock);
 455         centity->fences[idx] = fence;
 456         centity->sequence++;
 457         spin_unlock(&ctx->ring_lock);
 458
 459         dma_fence_put(other);
 460         if (handle)
 461                 *handle = seq;
 462 }
 463
 464 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 465                                        struct drm_sched_entity *entity,
 466                                        uint64_t seq)
 467 {
 468         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 469         struct dma_fence *fence;
 470
 471         spin_lock(&ctx->ring_lock);
 472
 473         if (seq == ~0ull)
 474                 seq = centity->sequence - 1;
 475
 476         if (seq >= centity->sequence) {
 477                 spin_unlock(&ctx->ring_lock);
 478                 return ERR_PTR(-EINVAL);
 479         }
 480
 481
 482         if (seq + amdgpu_sched_jobs < centity->sequence) {
 483                 spin_unlock(&ctx->ring_lock);
 484                 return NULL;
 485         }
 486
 487         fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 488         spin_unlock(&ctx->ring_lock);
 489
 490         return fence;
 491 }
 492
 493 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 494                                   enum drm_sched_priority priority)
 495 {
 496         unsigned num_entities = amdgput_ctx_total_num_entities();
 497         enum drm_sched_priority ctx_prio;
 498         unsigned i;
 499
 500         ctx->override_priority = priority;
 501
 502         ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 503                         ctx->init_priority : ctx->override_priority;
 504
 505         for (i = 0; i < num_entities; i++) {
 506                 struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
 507
 508                 drm_sched_entity_set_priority(entity, ctx_prio);
 509         }
 510 }
 511
 512 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 513                                struct drm_sched_entity *entity)
 514 {
 515         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 516         unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
 517         struct dma_fence *other = centity->fences[idx];
 518
 519         if (other) {
 520                 signed long r;
 521                 r = dma_fence_wait(other, true);
 522                 if (r < 0) {
 523                         if (r != -ERESTARTSYS)
 524                                 DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 525
 526                         return r;
 527                 }
 528         }
 529
 530         return 0;
 531 }
 532
 533 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 534 {
 535         mutex_init(&mgr->lock);
 536         idr_init(&mgr->ctx_handles);
 537 }
 538
 539 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 540 {
 541         unsigned num_entities = amdgput_ctx_total_num_entities();
 542         struct amdgpu_ctx *ctx;
 543         struct idr *idp;
 544         uint32_t id, i;
 545         long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
 546
 547         idp = &mgr->ctx_handles;
 548
 549         mutex_lock(&mgr->lock);
 550         idr_for_each_entry(idp, ctx, id) {
 551
 552                 if (!ctx->adev) {
 553                         mutex_unlock(&mgr->lock);
 554                         return;
 555                 }
 556
 557                 for (i = 0; i < num_entities; i++) {
 558                         struct drm_sched_entity *entity;
 559
 560                         entity = &ctx->entities[0][i].entity;
 561                         max_wait = drm_sched_entity_flush(entity, max_wait);
 562                 }
 563         }
 564         mutex_unlock(&mgr->lock);
 565 }
 566
 567 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 568 {
 569         unsigned num_entities = amdgput_ctx_total_num_entities();
 570         struct amdgpu_ctx *ctx;
 571         struct idr *idp;
 572         uint32_t id, i;
 573
 574         idp = &mgr->ctx_handles;
 575
 576         idr_for_each_entry(idp, ctx, id) {
 577
 578                 if (!ctx->adev)
 579                         return;
 580
 581                 if (kref_read(&ctx->refcount) != 1) {
 582                         DRM_ERROR("ctx %p is still alive\n", ctx);
 583                         continue;
 584                 }
 585
 586                 for (i = 0; i < num_entities; i++)
 587                         drm_sched_entity_fini(&ctx->entities[0][i].entity);
 588         }
 589 }
 590
 591 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 592 {
 593         struct amdgpu_ctx *ctx;
 594         struct idr *idp;
 595         uint32_t id;
 596
 597         amdgpu_ctx_mgr_entity_fini(mgr);
 598
 599         idp = &mgr->ctx_handles;
 600
 601         idr_for_each_entry(idp, ctx, id) {
 602                 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 603                         DRM_ERROR("ctx %p is still alive\n", ctx);
 604         }
 605
 606         idr_destroy(&mgr->ctx_handles);
 607         mutex_destroy(&mgr->lock);
 608 }