drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: monk liu <monk.liu@amd.com>
  23  */
  24
  25 #include <drm/drm_auth.h>
  26 #include <drm/drm_drv.h>
  27 #include "amdgpu.h"
  28 #include "amdgpu_sched.h"
  29 #include "amdgpu_ras.h"
  30 #include <linux/nospec.h>
  31
  32 #define to_amdgpu_ctx_entity(e) \
  33         container_of((e), struct amdgpu_ctx_entity, entity)
  34
  35 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
  36         [AMDGPU_HW_IP_GFX]      =       1,
  37         [AMDGPU_HW_IP_COMPUTE]  =       4,
  38         [AMDGPU_HW_IP_DMA]      =       2,
  39         [AMDGPU_HW_IP_UVD]      =       1,
  40         [AMDGPU_HW_IP_VCE]      =       1,
  41         [AMDGPU_HW_IP_UVD_ENC]  =       1,
  42         [AMDGPU_HW_IP_VCN_DEC]  =       1,
  43         [AMDGPU_HW_IP_VCN_ENC]  =       1,
  44         [AMDGPU_HW_IP_VCN_JPEG] =       1,
  45 };
  46
  47 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
  48 {
  49         switch (ctx_prio) {
  50         case AMDGPU_CTX_PRIORITY_VERY_LOW:
  51         case AMDGPU_CTX_PRIORITY_LOW:
  52         case AMDGPU_CTX_PRIORITY_NORMAL:
  53         case AMDGPU_CTX_PRIORITY_HIGH:
  54         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
  55                 return true;
  56         default:
  57         case AMDGPU_CTX_PRIORITY_UNSET:
  58                 /* UNSET priority is not valid and we don't carry that
  59                  * around, but set it to NORMAL in the only place this
  60                  * function is called, amdgpu_ctx_ioctl().
  61                  */
  62                 return false;
  63         }
  64 }
  65
  66 static enum drm_sched_priority
  67 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
  68 {
  69         switch (ctx_prio) {
  70         case AMDGPU_CTX_PRIORITY_UNSET:
  71                 pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
  72                 return DRM_SCHED_PRIORITY_NORMAL;
  73
  74         case AMDGPU_CTX_PRIORITY_VERY_LOW:
  75                 return DRM_SCHED_PRIORITY_MIN;
  76
  77         case AMDGPU_CTX_PRIORITY_LOW:
  78                 return DRM_SCHED_PRIORITY_MIN;
  79
  80         case AMDGPU_CTX_PRIORITY_NORMAL:
  81                 return DRM_SCHED_PRIORITY_NORMAL;
  82
  83         case AMDGPU_CTX_PRIORITY_HIGH:
  84                 return DRM_SCHED_PRIORITY_HIGH;
  85
  86         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
  87                 return DRM_SCHED_PRIORITY_HIGH;
  88
  89         /* This should not happen as we sanitized userspace provided priority
  90          * already, WARN if this happens.
  91          */
  92         default:
  93                 WARN(1, "Invalid context priority %d\n", ctx_prio);
  94                 return DRM_SCHED_PRIORITY_NORMAL;
  95         }
  96
  97 }
  98
  99 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 100                                       int32_t priority)
 101 {
 102         /* NORMAL and below are accessible by everyone */
 103         if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
 104                 return 0;
 105
 106         if (capable(CAP_SYS_NICE))
 107                 return 0;
 108
 109         if (drm_is_current_master(filp))
 110                 return 0;
 111
 112         return -EACCES;
 113 }
 114
 115 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
 116 {
 117         switch (prio) {
 118         case AMDGPU_CTX_PRIORITY_HIGH:
 119         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
 120                 return AMDGPU_GFX_PIPE_PRIO_HIGH;
 121         default:
 122                 return AMDGPU_GFX_PIPE_PRIO_NORMAL;
 123         }
 124 }
 125
 126 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
 127 {
 128         switch (prio) {
 129         case AMDGPU_CTX_PRIORITY_HIGH:
 130                 return AMDGPU_RING_PRIO_1;
 131         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
 132                 return AMDGPU_RING_PRIO_2;
 133         default:
 134                 return AMDGPU_RING_PRIO_0;
 135         }
 136 }
 137
 138 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
 139 {
 140         struct amdgpu_device *adev = ctx->mgr->adev;
 141         unsigned int hw_prio;
 142         int32_t ctx_prio;
 143
 144         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 145                         ctx->init_priority : ctx->override_priority;
 146
 147         switch (hw_ip) {
 148         case AMDGPU_HW_IP_GFX:
 149         case AMDGPU_HW_IP_COMPUTE:
 150                 hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
 151                 break;
 152         case AMDGPU_HW_IP_VCE:
 153         case AMDGPU_HW_IP_VCN_ENC:
 154                 hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
 155                 break;
 156         default:
 157                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
 158                 break;
 159         }
 160
 161         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
 162         if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
 163                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
 164
 165         return hw_prio;
 166 }
 167
 168 /* Calculate the time spend on the hw */
 169 static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
 170 {
 171         struct drm_sched_fence *s_fence;
 172
 173         if (!fence)
 174                 return ns_to_ktime(0);
 175
 176         /* When the fence is not even scheduled it can't have spend time */
 177         s_fence = to_drm_sched_fence(fence);
 178         if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
 179                 return ns_to_ktime(0);
 180
 181         /* When it is still running account how much already spend */
 182         if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
 183                 return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
 184
 185         return ktime_sub(s_fence->finished.timestamp,
 186                          s_fence->scheduled.timestamp);
 187 }
 188
 189 static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
 190                                       struct amdgpu_ctx_entity *centity)
 191 {
 192         ktime_t res = ns_to_ktime(0);
 193         uint32_t i;
 194
 195         spin_lock(&ctx->ring_lock);
 196         for (i = 0; i < amdgpu_sched_jobs; i++) {
 197                 res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
 198         }
 199         spin_unlock(&ctx->ring_lock);
 200         return res;
 201 }
 202
 203 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
 204                                   const u32 ring)
 205 {
 206         struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
 207         struct amdgpu_device *adev = ctx->mgr->adev;
 208         struct amdgpu_ctx_entity *entity;
 209         enum drm_sched_priority drm_prio;
 210         unsigned int hw_prio, num_scheds;
 211         int32_t ctx_prio;
 212         int r;
 213
 214         entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
 215                          GFP_KERNEL);
 216         if (!entity)
 217                 return  -ENOMEM;
 218
 219         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 220                         ctx->init_priority : ctx->override_priority;
 221         entity->hw_ip = hw_ip;
 222         entity->sequence = 1;
 223         hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
 224         drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
 225
 226         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
 227
 228         if (!(adev)->xcp_mgr) {
 229                 scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 230                 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 231         } else {
 232                 struct amdgpu_fpriv *fpriv;
 233
 234                 fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
 235                 r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
 236                                                 &num_scheds, &scheds);
 237                 if (r)
 238                         goto cleanup_entity;
 239         }
 240
 241         /* disable load balance if the hw engine retains context among dependent jobs */
 242         if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
 243             hw_ip == AMDGPU_HW_IP_VCN_DEC ||
 244             hw_ip == AMDGPU_HW_IP_UVD_ENC ||
 245             hw_ip == AMDGPU_HW_IP_UVD) {
 246                 sched = drm_sched_pick_best(scheds, num_scheds);
 247                 scheds = &sched;
 248                 num_scheds = 1;
 249         }
 250
 251         r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
 252                                   &ctx->guilty);
 253         if (r)
 254                 goto error_free_entity;
 255
 256         /* It's not an error if we fail to install the new entity */
 257         if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
 258                 goto cleanup_entity;
 259
 260         return 0;
 261
 262 cleanup_entity:
 263         drm_sched_entity_fini(&entity->entity);
 264
 265 error_free_entity:
 266         kfree(entity);
 267
 268         return r;
 269 }
 270
 271 static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
 272                                   struct amdgpu_ctx_entity *entity)
 273 {
 274         ktime_t res = ns_to_ktime(0);
 275         int i;
 276
 277         if (!entity)
 278                 return res;
 279
 280         for (i = 0; i < amdgpu_sched_jobs; ++i) {
 281                 res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
 282                 dma_fence_put(entity->fences[i]);
 283         }
 284
 285         amdgpu_xcp_release_sched(adev, entity);
 286
 287         kfree(entity);
 288         return res;
 289 }
 290
 291 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
 292                                         u32 *stable_pstate)
 293 {
 294         struct amdgpu_device *adev = ctx->mgr->adev;
 295         enum amd_dpm_forced_level current_level;
 296
 297         current_level = amdgpu_dpm_get_performance_level(adev);
 298
 299         switch (current_level) {
 300         case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
 301                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
 302                 break;
 303         case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
 304                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
 305                 break;
 306         case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
 307                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
 308                 break;
 309         case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
 310                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
 311                 break;
 312         default:
 313                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
 314                 break;
 315         }
 316         return 0;
 317 }
 318
 319 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 320                            struct drm_file *filp, struct amdgpu_ctx *ctx)
 321 {
 322         struct amdgpu_fpriv *fpriv = filp->driver_priv;
 323         u32 current_stable_pstate;
 324         int r;
 325
 326         r = amdgpu_ctx_priority_permit(filp, priority);
 327         if (r)
 328                 return r;
 329
 330         memset(ctx, 0, sizeof(*ctx));
 331
 332         kref_init(&ctx->refcount);
 333         ctx->mgr = mgr;
 334         spin_lock_init(&ctx->ring_lock);
 335
 336         ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
 337         ctx->reset_counter_query = ctx->reset_counter;
 338         ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
 339         ctx->init_priority = priority;
 340         ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
 341
 342         r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
 343         if (r)
 344                 return r;
 345
 346         if (mgr->adev->pm.stable_pstate_ctx)
 347                 ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
 348         else
 349                 ctx->stable_pstate = current_stable_pstate;
 350
 351         ctx->ctx_mgr = &(fpriv->ctx_mgr);
 352         return 0;
 353 }
 354
 355 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
 356                                         u32 stable_pstate)
 357 {
 358         struct amdgpu_device *adev = ctx->mgr->adev;
 359         enum amd_dpm_forced_level level;
 360         u32 current_stable_pstate;
 361         int r;
 362
 363         mutex_lock(&adev->pm.stable_pstate_ctx_lock);
 364         if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
 365                 r = -EBUSY;
 366                 goto done;
 367         }
 368
 369         r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
 370         if (r || (stable_pstate == current_stable_pstate))
 371                 goto done;
 372
 373         switch (stable_pstate) {
 374         case AMDGPU_CTX_STABLE_PSTATE_NONE:
 375                 level = AMD_DPM_FORCED_LEVEL_AUTO;
 376                 break;
 377         case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
 378                 level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
 379                 break;
 380         case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
 381                 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
 382                 break;
 383         case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
 384                 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
 385                 break;
 386         case AMDGPU_CTX_STABLE_PSTATE_PEAK:
 387                 level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
 388                 break;
 389         default:
 390                 r = -EINVAL;
 391                 goto done;
 392         }
 393
 394         r = amdgpu_dpm_force_performance_level(adev, level);
 395
 396         if (level == AMD_DPM_FORCED_LEVEL_AUTO)
 397                 adev->pm.stable_pstate_ctx = NULL;
 398         else
 399                 adev->pm.stable_pstate_ctx = ctx;
 400 done:
 401         mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
 402
 403         return r;
 404 }
 405
 406 static void amdgpu_ctx_fini(struct kref *ref)
 407 {
 408         struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 409         struct amdgpu_ctx_mgr *mgr = ctx->mgr;
 410         struct amdgpu_device *adev = mgr->adev;
 411         unsigned i, j, idx;
 412
 413         if (!adev)
 414                 return;
 415
 416         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 417                 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
 418                         ktime_t spend;
 419
 420                         spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
 421                         atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
 422                 }
 423         }
 424
 425         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 426                 amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
 427                 drm_dev_exit(idx);
 428         }
 429
 430         kfree(ctx);
 431 }
 432
 433 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 434                           u32 ring, struct drm_sched_entity **entity)
 435 {
 436         int r;
 437         struct drm_sched_entity *ctx_entity;
 438
 439         if (hw_ip >= AMDGPU_HW_IP_NUM) {
 440                 DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 441                 return -EINVAL;
 442         }
 443
 444         /* Right now all IPs have only one instance - multiple rings. */
 445         if (instance != 0) {
 446                 DRM_DEBUG("invalid ip instance: %d\n", instance);
 447                 return -EINVAL;
 448         }
 449
 450         if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
 451                 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
 452                 return -EINVAL;
 453         }
 454
 455         if (ctx->entities[hw_ip][ring] == NULL) {
 456                 r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
 457                 if (r)
 458                         return r;
 459         }
 460
 461         ctx_entity = &ctx->entities[hw_ip][ring]->entity;
 462         r = drm_sched_entity_error(ctx_entity);
 463         if (r) {
 464                 DRM_DEBUG("error entity %p\n", ctx_entity);
 465                 return r;
 466         }
 467
 468         *entity = ctx_entity;
 469         return 0;
 470 }
 471
 472 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 473                             struct amdgpu_fpriv *fpriv,
 474                             struct drm_file *filp,
 475                             int32_t priority,
 476                             uint32_t *id)
 477 {
 478         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 479         struct amdgpu_ctx *ctx;
 480         int r;
 481
 482         ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 483         if (!ctx)
 484                 return -ENOMEM;
 485
 486         mutex_lock(&mgr->lock);
 487         r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
 488         if (r < 0) {
 489                 mutex_unlock(&mgr->lock);
 490                 kfree(ctx);
 491                 return r;
 492         }
 493
 494         *id = (uint32_t)r;
 495         r = amdgpu_ctx_init(mgr, priority, filp, ctx);
 496         if (r) {
 497                 idr_remove(&mgr->ctx_handles, *id);
 498                 *id = 0;
 499                 kfree(ctx);
 500         }
 501         mutex_unlock(&mgr->lock);
 502         return r;
 503 }
 504
 505 static void amdgpu_ctx_do_release(struct kref *ref)
 506 {
 507         struct amdgpu_ctx *ctx;
 508         u32 i, j;
 509
 510         ctx = container_of(ref, struct amdgpu_ctx, refcount);
 511         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 512                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 513                         if (!ctx->entities[i][j])
 514                                 continue;
 515
 516                         drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
 517                 }
 518         }
 519
 520         amdgpu_ctx_fini(ref);
 521 }
 522
 523 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 524 {
 525         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 526         struct amdgpu_ctx *ctx;
 527
 528         mutex_lock(&mgr->lock);
 529         ctx = idr_remove(&mgr->ctx_handles, id);
 530         if (ctx)
 531                 kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 532         mutex_unlock(&mgr->lock);
 533         return ctx ? 0 : -EINVAL;
 534 }
 535
 536 static int amdgpu_ctx_query(struct amdgpu_device *adev,
 537                             struct amdgpu_fpriv *fpriv, uint32_t id,
 538                             union drm_amdgpu_ctx_out *out)
 539 {
 540         struct amdgpu_ctx *ctx;
 541         struct amdgpu_ctx_mgr *mgr;
 542         unsigned reset_counter;
 543
 544         if (!fpriv)
 545                 return -EINVAL;
 546
 547         mgr = &fpriv->ctx_mgr;
 548         mutex_lock(&mgr->lock);
 549         ctx = idr_find(&mgr->ctx_handles, id);
 550         if (!ctx) {
 551                 mutex_unlock(&mgr->lock);
 552                 return -EINVAL;
 553         }
 554
 555         /* TODO: these two are always zero */
 556         out->state.flags = 0x0;
 557         out->state.hangs = 0x0;
 558
 559         /* determine if a GPU reset has occured since the last call */
 560         reset_counter = atomic_read(&adev->gpu_reset_counter);
 561         /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
 562         if (ctx->reset_counter_query == reset_counter)
 563                 out->state.reset_status = AMDGPU_CTX_NO_RESET;
 564         else
 565                 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
 566         ctx->reset_counter_query = reset_counter;
 567
 568         mutex_unlock(&mgr->lock);
 569         return 0;
 570 }
 571
 572 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
 573
 574 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 575                              struct amdgpu_fpriv *fpriv, uint32_t id,
 576                              union drm_amdgpu_ctx_out *out)
 577 {
 578         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 579         struct amdgpu_ctx *ctx;
 580         struct amdgpu_ctx_mgr *mgr;
 581
 582         if (!fpriv)
 583                 return -EINVAL;
 584
 585         mgr = &fpriv->ctx_mgr;
 586         mutex_lock(&mgr->lock);
 587         ctx = idr_find(&mgr->ctx_handles, id);
 588         if (!ctx) {
 589                 mutex_unlock(&mgr->lock);
 590                 return -EINVAL;
 591         }
 592
 593         out->state.flags = 0x0;
 594         out->state.hangs = 0x0;
 595
 596         if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
 597                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
 598
 599         if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
 600                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
 601
 602         if (atomic_read(&ctx->guilty))
 603                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 604
 605         if (amdgpu_in_reset(adev))
 606                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
 607
 608         if (adev->ras_enabled && con) {
 609                 /* Return the cached values in O(1),
 610                  * and schedule delayed work to cache
 611                  * new vaues.
 612                  */
 613                 int ce_count, ue_count;
 614
 615                 ce_count = atomic_read(&con->ras_ce_count);
 616                 ue_count = atomic_read(&con->ras_ue_count);
 617
 618                 if (ce_count != ctx->ras_counter_ce) {
 619                         ctx->ras_counter_ce = ce_count;
 620                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
 621                 }
 622
 623                 if (ue_count != ctx->ras_counter_ue) {
 624                         ctx->ras_counter_ue = ue_count;
 625                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
 626                 }
 627
 628                 schedule_delayed_work(&con->ras_counte_delay_work,
 629                                       msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
 630         }
 631
 632         mutex_unlock(&mgr->lock);
 633         return 0;
 634 }
 635
 636 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
 637                                     struct amdgpu_fpriv *fpriv, uint32_t id,
 638                                     bool set, u32 *stable_pstate)
 639 {
 640         struct amdgpu_ctx *ctx;
 641         struct amdgpu_ctx_mgr *mgr;
 642         int r;
 643
 644         if (!fpriv)
 645                 return -EINVAL;
 646
 647         mgr = &fpriv->ctx_mgr;
 648         mutex_lock(&mgr->lock);
 649         ctx = idr_find(&mgr->ctx_handles, id);
 650         if (!ctx) {
 651                 mutex_unlock(&mgr->lock);
 652                 return -EINVAL;
 653         }
 654
 655         if (set)
 656                 r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
 657         else
 658                 r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
 659
 660         mutex_unlock(&mgr->lock);
 661         return r;
 662 }
 663
 664 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 665                      struct drm_file *filp)
 666 {
 667         int r;
 668         uint32_t id, stable_pstate;
 669         int32_t priority;
 670
 671         union drm_amdgpu_ctx *args = data;
 672         struct amdgpu_device *adev = drm_to_adev(dev);
 673         struct amdgpu_fpriv *fpriv = filp->driver_priv;
 674
 675         id = args->in.ctx_id;
 676         priority = args->in.priority;
 677
 678         /* For backwards compatibility, we need to accept ioctls with garbage
 679          * in the priority field. Garbage values in the priority field, result
 680          * in the priority being set to NORMAL.
 681          */
 682         if (!amdgpu_ctx_priority_is_valid(priority))
 683                 priority = AMDGPU_CTX_PRIORITY_NORMAL;
 684
 685         switch (args->in.op) {
 686         case AMDGPU_CTX_OP_ALLOC_CTX:
 687                 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
 688                 args->out.alloc.ctx_id = id;
 689                 break;
 690         case AMDGPU_CTX_OP_FREE_CTX:
 691                 r = amdgpu_ctx_free(fpriv, id);
 692                 break;
 693         case AMDGPU_CTX_OP_QUERY_STATE:
 694                 r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
 695                 break;
 696         case AMDGPU_CTX_OP_QUERY_STATE2:
 697                 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
 698                 break;
 699         case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
 700                 if (args->in.flags)
 701                         return -EINVAL;
 702                 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
 703                 if (!r)
 704                         args->out.pstate.flags = stable_pstate;
 705                 break;
 706         case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
 707                 if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
 708                         return -EINVAL;
 709                 stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
 710                 if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
 711                         return -EINVAL;
 712                 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
 713                 break;
 714         default:
 715                 return -EINVAL;
 716         }
 717
 718         return r;
 719 }
 720
 721 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
 722 {
 723         struct amdgpu_ctx *ctx;
 724         struct amdgpu_ctx_mgr *mgr;
 725
 726         if (!fpriv)
 727                 return NULL;
 728
 729         mgr = &fpriv->ctx_mgr;
 730
 731         mutex_lock(&mgr->lock);
 732         ctx = idr_find(&mgr->ctx_handles, id);
 733         if (ctx)
 734                 kref_get(&ctx->refcount);
 735         mutex_unlock(&mgr->lock);
 736         return ctx;
 737 }
 738
 739 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 740 {
 741         if (ctx == NULL)
 742                 return -EINVAL;
 743
 744         kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 745         return 0;
 746 }
 747
 748 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 749                               struct drm_sched_entity *entity,
 750                               struct dma_fence *fence)
 751 {
 752         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 753         uint64_t seq = centity->sequence;
 754         struct dma_fence *other = NULL;
 755         unsigned idx = 0;
 756
 757         idx = seq & (amdgpu_sched_jobs - 1);
 758         other = centity->fences[idx];
 759         WARN_ON(other && !dma_fence_is_signaled(other));
 760
 761         dma_fence_get(fence);
 762
 763         spin_lock(&ctx->ring_lock);
 764         centity->fences[idx] = fence;
 765         centity->sequence++;
 766         spin_unlock(&ctx->ring_lock);
 767
 768         atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
 769                      &ctx->mgr->time_spend[centity->hw_ip]);
 770
 771         dma_fence_put(other);
 772         return seq;
 773 }
 774
 775 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 776                                        struct drm_sched_entity *entity,
 777                                        uint64_t seq)
 778 {
 779         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 780         struct dma_fence *fence;
 781
 782         spin_lock(&ctx->ring_lock);
 783
 784         if (seq == ~0ull)
 785                 seq = centity->sequence - 1;
 786
 787         if (seq >= centity->sequence) {
 788                 spin_unlock(&ctx->ring_lock);
 789                 return ERR_PTR(-EINVAL);
 790         }
 791
 792
 793         if (seq + amdgpu_sched_jobs < centity->sequence) {
 794                 spin_unlock(&ctx->ring_lock);
 795                 return NULL;
 796         }
 797
 798         fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 799         spin_unlock(&ctx->ring_lock);
 800
 801         return fence;
 802 }
 803
 804 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 805                                            struct amdgpu_ctx_entity *aentity,
 806                                            int hw_ip,
 807                                            int32_t priority)
 808 {
 809         struct amdgpu_device *adev = ctx->mgr->adev;
 810         unsigned int hw_prio;
 811         struct drm_gpu_scheduler **scheds = NULL;
 812         unsigned num_scheds;
 813
 814         /* set sw priority */
 815         drm_sched_entity_set_priority(&aentity->entity,
 816                                       amdgpu_ctx_to_drm_sched_prio(priority));
 817
 818         /* set hw priority */
 819         if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
 820                 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
 821                 hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
 822                 scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 823                 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 824                 drm_sched_entity_modify_sched(&aentity->entity, scheds,
 825                                               num_scheds);
 826         }
 827 }
 828
 829 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 830                                   int32_t priority)
 831 {
 832         int32_t ctx_prio;
 833         unsigned i, j;
 834
 835         ctx->override_priority = priority;
 836
 837         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 838                         ctx->init_priority : ctx->override_priority;
 839         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 840                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 841                         if (!ctx->entities[i][j])
 842                                 continue;
 843
 844                         amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
 845                                                        i, ctx_prio);
 846                 }
 847         }
 848 }
 849
 850 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 851                                struct drm_sched_entity *entity)
 852 {
 853         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 854         struct dma_fence *other;
 855         unsigned idx;
 856         long r;
 857
 858         spin_lock(&ctx->ring_lock);
 859         idx = centity->sequence & (amdgpu_sched_jobs - 1);
 860         other = dma_fence_get(centity->fences[idx]);
 861         spin_unlock(&ctx->ring_lock);
 862
 863         if (!other)
 864                 return 0;
 865
 866         r = dma_fence_wait(other, true);
 867         if (r < 0 && r != -ERESTARTSYS)
 868                 DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 869
 870         dma_fence_put(other);
 871         return r;
 872 }
 873
 874 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
 875                          struct amdgpu_device *adev)
 876 {
 877         unsigned int i;
 878
 879         mgr->adev = adev;
 880         mutex_init(&mgr->lock);
 881         idr_init_base(&mgr->ctx_handles, 1);
 882
 883         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
 884                 atomic64_set(&mgr->time_spend[i], 0);
 885 }
 886
 887 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 888 {
 889         struct amdgpu_ctx *ctx;
 890         struct idr *idp;
 891         uint32_t id, i, j;
 892
 893         idp = &mgr->ctx_handles;
 894
 895         mutex_lock(&mgr->lock);
 896         idr_for_each_entry(idp, ctx, id) {
 897                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 898                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 899                                 struct drm_sched_entity *entity;
 900
 901                                 if (!ctx->entities[i][j])
 902                                         continue;
 903
 904                                 entity = &ctx->entities[i][j]->entity;
 905                                 timeout = drm_sched_entity_flush(entity, timeout);
 906                         }
 907                 }
 908         }
 909         mutex_unlock(&mgr->lock);
 910         return timeout;
 911 }
 912
 913 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 914 {
 915         struct amdgpu_ctx *ctx;
 916         struct idr *idp;
 917         uint32_t id, i, j;
 918
 919         idp = &mgr->ctx_handles;
 920
 921         idr_for_each_entry(idp, ctx, id) {
 922                 if (kref_read(&ctx->refcount) != 1) {
 923                         DRM_ERROR("ctx %p is still alive\n", ctx);
 924                         continue;
 925                 }
 926
 927                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 928                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 929                                 struct drm_sched_entity *entity;
 930
 931                                 if (!ctx->entities[i][j])
 932                                         continue;
 933
 934                                 entity = &ctx->entities[i][j]->entity;
 935                                 drm_sched_entity_fini(entity);
 936                         }
 937                 }
 938         }
 939 }
 940
 941 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 942 {
 943         struct amdgpu_ctx *ctx;
 944         struct idr *idp;
 945         uint32_t id;
 946
 947         amdgpu_ctx_mgr_entity_fini(mgr);
 948
 949         idp = &mgr->ctx_handles;
 950
 951         idr_for_each_entry(idp, ctx, id) {
 952                 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 953                         DRM_ERROR("ctx %p is still alive\n", ctx);
 954         }
 955
 956         idr_destroy(&mgr->ctx_handles);
 957         mutex_destroy(&mgr->lock);
 958 }
 959
 960 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
 961                           ktime_t usage[AMDGPU_HW_IP_NUM])
 962 {
 963         struct amdgpu_ctx *ctx;
 964         unsigned int hw_ip, i;
 965         uint32_t id;
 966
 967         /*
 968          * This is a little bit racy because it can be that a ctx or a fence are
 969          * destroyed just in the moment we try to account them. But that is ok
 970          * since exactly that case is explicitely allowed by the interface.
 971          */
 972         mutex_lock(&mgr->lock);
 973         for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
 974                 uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
 975
 976                 usage[hw_ip] = ns_to_ktime(ns);
 977         }
 978
 979         idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
 980                 for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
 981                         for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
 982                                 struct amdgpu_ctx_entity *centity;
 983                                 ktime_t spend;
 984
 985                                 centity = ctx->entities[hw_ip][i];
 986                                 if (!centity)
 987                                         continue;
 988                                 spend = amdgpu_ctx_entity_time(ctx, centity);
 989                                 usage[hw_ip] = ktime_add(usage[hw_ip], spend);
 990                         }
 991                 }
 992         }
 993         mutex_unlock(&mgr->lock);
 994 }