5981c7d9bd48fa616cc66b49a36bc7cc3cbac308
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ctx.c
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: monk liu <monk.liu@amd.com>
23  */
24
25 #include <drm/drm_auth.h>
26 #include <drm/drm_drv.h>
27 #include "amdgpu.h"
28 #include "amdgpu_sched.h"
29 #include "amdgpu_ras.h"
30 #include <linux/nospec.h>
31
32 #define to_amdgpu_ctx_entity(e) \
33         container_of((e), struct amdgpu_ctx_entity, entity)
34
35 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
36         [AMDGPU_HW_IP_GFX]      =       1,
37         [AMDGPU_HW_IP_COMPUTE]  =       4,
38         [AMDGPU_HW_IP_DMA]      =       2,
39         [AMDGPU_HW_IP_UVD]      =       1,
40         [AMDGPU_HW_IP_VCE]      =       1,
41         [AMDGPU_HW_IP_UVD_ENC]  =       1,
42         [AMDGPU_HW_IP_VCN_DEC]  =       1,
43         [AMDGPU_HW_IP_VCN_ENC]  =       1,
44         [AMDGPU_HW_IP_VCN_JPEG] =       1,
45 };
46
47 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
48 {
49         switch (ctx_prio) {
50         case AMDGPU_CTX_PRIORITY_UNSET:
51         case AMDGPU_CTX_PRIORITY_VERY_LOW:
52         case AMDGPU_CTX_PRIORITY_LOW:
53         case AMDGPU_CTX_PRIORITY_NORMAL:
54         case AMDGPU_CTX_PRIORITY_HIGH:
55         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
56                 return true;
57         default:
58                 return false;
59         }
60 }
61
62 static enum drm_sched_priority
63 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
64 {
65         switch (ctx_prio) {
66         case AMDGPU_CTX_PRIORITY_UNSET:
67                 return DRM_SCHED_PRIORITY_UNSET;
68
69         case AMDGPU_CTX_PRIORITY_VERY_LOW:
70                 return DRM_SCHED_PRIORITY_MIN;
71
72         case AMDGPU_CTX_PRIORITY_LOW:
73                 return DRM_SCHED_PRIORITY_MIN;
74
75         case AMDGPU_CTX_PRIORITY_NORMAL:
76                 return DRM_SCHED_PRIORITY_NORMAL;
77
78         case AMDGPU_CTX_PRIORITY_HIGH:
79                 return DRM_SCHED_PRIORITY_HIGH;
80
81         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
82                 return DRM_SCHED_PRIORITY_HIGH;
83
84         /* This should not happen as we sanitized userspace provided priority
85          * already, WARN if this happens.
86          */
87         default:
88                 WARN(1, "Invalid context priority %d\n", ctx_prio);
89                 return DRM_SCHED_PRIORITY_NORMAL;
90         }
91
92 }
93
94 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
95                                       int32_t priority)
96 {
97         if (!amdgpu_ctx_priority_is_valid(priority))
98                 return -EINVAL;
99
100         /* NORMAL and below are accessible by everyone */
101         if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
102                 return 0;
103
104         if (capable(CAP_SYS_NICE))
105                 return 0;
106
107         if (drm_is_current_master(filp))
108                 return 0;
109
110         return -EACCES;
111 }
112
113 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
114 {
115         switch (prio) {
116         case AMDGPU_CTX_PRIORITY_HIGH:
117         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
118                 return AMDGPU_GFX_PIPE_PRIO_HIGH;
119         default:
120                 return AMDGPU_GFX_PIPE_PRIO_NORMAL;
121         }
122 }
123
124 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
125 {
126         switch (prio) {
127         case AMDGPU_CTX_PRIORITY_HIGH:
128                 return AMDGPU_RING_PRIO_1;
129         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
130                 return AMDGPU_RING_PRIO_2;
131         default:
132                 return AMDGPU_RING_PRIO_0;
133         }
134 }
135
136 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
137 {
138         struct amdgpu_device *adev = ctx->adev;
139         int32_t ctx_prio;
140         unsigned int hw_prio;
141
142         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
143                         ctx->init_priority : ctx->override_priority;
144
145         switch (hw_ip) {
146         case AMDGPU_HW_IP_COMPUTE:
147                 hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
148                 break;
149         case AMDGPU_HW_IP_VCE:
150         case AMDGPU_HW_IP_VCN_ENC:
151                 hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
152                 break;
153         default:
154                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
155                 break;
156         }
157
158         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
159         if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
160                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
161
162         return hw_prio;
163 }
164
165
166 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
167                                   const u32 ring)
168 {
169         struct amdgpu_device *adev = ctx->adev;
170         struct amdgpu_ctx_entity *entity;
171         struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
172         unsigned num_scheds = 0;
173         int32_t ctx_prio;
174         unsigned int hw_prio;
175         enum drm_sched_priority drm_prio;
176         int r;
177
178         entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
179                          GFP_KERNEL);
180         if (!entity)
181                 return  -ENOMEM;
182
183         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
184                         ctx->init_priority : ctx->override_priority;
185         entity->sequence = 1;
186         hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
187         drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
188
189         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
190         scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
191         num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
192
193         /* disable load balance if the hw engine retains context among dependent jobs */
194         if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
195             hw_ip == AMDGPU_HW_IP_VCN_DEC ||
196             hw_ip == AMDGPU_HW_IP_UVD_ENC ||
197             hw_ip == AMDGPU_HW_IP_UVD) {
198                 sched = drm_sched_pick_best(scheds, num_scheds);
199                 scheds = &sched;
200                 num_scheds = 1;
201         }
202
203         r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
204                                   &ctx->guilty);
205         if (r)
206                 goto error_free_entity;
207
208         /* It's not an error if we fail to install the new entity */
209         if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
210                 goto cleanup_entity;
211
212         return 0;
213
214 cleanup_entity:
215         drm_sched_entity_fini(&entity->entity);
216
217 error_free_entity:
218         kfree(entity);
219
220         return r;
221 }
222
223 static int amdgpu_ctx_init(struct amdgpu_device *adev,
224                            int32_t priority,
225                            struct drm_file *filp,
226                            struct amdgpu_ctx *ctx)
227 {
228         int r;
229
230         r = amdgpu_ctx_priority_permit(filp, priority);
231         if (r)
232                 return r;
233
234         memset(ctx, 0, sizeof(*ctx));
235
236         ctx->adev = adev;
237
238         kref_init(&ctx->refcount);
239         spin_lock_init(&ctx->ring_lock);
240
241         ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
242         ctx->reset_counter_query = ctx->reset_counter;
243         ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
244         ctx->init_priority = priority;
245         ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
246         ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
247
248         return 0;
249 }
250
251 static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
252 {
253
254         int i;
255
256         if (!entity)
257                 return;
258
259         for (i = 0; i < amdgpu_sched_jobs; ++i)
260                 dma_fence_put(entity->fences[i]);
261
262         kfree(entity);
263 }
264
265 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
266                                         u32 *stable_pstate)
267 {
268         struct amdgpu_device *adev = ctx->adev;
269         enum amd_dpm_forced_level current_level;
270
271         current_level = amdgpu_dpm_get_performance_level(adev);
272
273         switch (current_level) {
274         case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
275                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
276                 break;
277         case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
278                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
279                 break;
280         case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
281                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
282                 break;
283         case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
284                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
285                 break;
286         default:
287                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
288                 break;
289         }
290         return 0;
291 }
292
293 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
294                                         u32 stable_pstate)
295 {
296         struct amdgpu_device *adev = ctx->adev;
297         enum amd_dpm_forced_level level;
298         int r;
299
300         mutex_lock(&adev->pm.stable_pstate_ctx_lock);
301         if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
302                 r = -EBUSY;
303                 goto done;
304         }
305
306         switch (stable_pstate) {
307         case AMDGPU_CTX_STABLE_PSTATE_NONE:
308                 level = AMD_DPM_FORCED_LEVEL_AUTO;
309                 break;
310         case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
311                 level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
312                 break;
313         case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
314                 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
315                 break;
316         case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
317                 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
318                 break;
319         case AMDGPU_CTX_STABLE_PSTATE_PEAK:
320                 level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
321                 break;
322         default:
323                 r = -EINVAL;
324                 goto done;
325         }
326
327         r = amdgpu_dpm_force_performance_level(adev, level);
328
329         if (level == AMD_DPM_FORCED_LEVEL_AUTO)
330                 adev->pm.stable_pstate_ctx = NULL;
331         else
332                 adev->pm.stable_pstate_ctx = ctx;
333 done:
334         mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
335
336         return r;
337 }
338
339 static void amdgpu_ctx_fini(struct kref *ref)
340 {
341         struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
342         struct amdgpu_device *adev = ctx->adev;
343         unsigned i, j, idx;
344
345         if (!adev)
346                 return;
347
348         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
349                 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
350                         amdgpu_ctx_fini_entity(ctx->entities[i][j]);
351                         ctx->entities[i][j] = NULL;
352                 }
353         }
354
355         if (drm_dev_enter(&adev->ddev, &idx)) {
356                 amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
357                 drm_dev_exit(idx);
358         }
359
360         kfree(ctx);
361 }
362
363 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
364                           u32 ring, struct drm_sched_entity **entity)
365 {
366         int r;
367
368         if (hw_ip >= AMDGPU_HW_IP_NUM) {
369                 DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
370                 return -EINVAL;
371         }
372
373         /* Right now all IPs have only one instance - multiple rings. */
374         if (instance != 0) {
375                 DRM_DEBUG("invalid ip instance: %d\n", instance);
376                 return -EINVAL;
377         }
378
379         if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
380                 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
381                 return -EINVAL;
382         }
383
384         if (ctx->entities[hw_ip][ring] == NULL) {
385                 r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
386                 if (r)
387                         return r;
388         }
389
390         *entity = &ctx->entities[hw_ip][ring]->entity;
391         return 0;
392 }
393
394 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
395                             struct amdgpu_fpriv *fpriv,
396                             struct drm_file *filp,
397                             int32_t priority,
398                             uint32_t *id)
399 {
400         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
401         struct amdgpu_ctx *ctx;
402         int r;
403
404         ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
405         if (!ctx)
406                 return -ENOMEM;
407
408         mutex_lock(&mgr->lock);
409         r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
410         if (r < 0) {
411                 mutex_unlock(&mgr->lock);
412                 kfree(ctx);
413                 return r;
414         }
415
416         *id = (uint32_t)r;
417         r = amdgpu_ctx_init(adev, priority, filp, ctx);
418         if (r) {
419                 idr_remove(&mgr->ctx_handles, *id);
420                 *id = 0;
421                 kfree(ctx);
422         }
423         mutex_unlock(&mgr->lock);
424         return r;
425 }
426
427 static void amdgpu_ctx_do_release(struct kref *ref)
428 {
429         struct amdgpu_ctx *ctx;
430         u32 i, j;
431
432         ctx = container_of(ref, struct amdgpu_ctx, refcount);
433         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
434                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
435                         if (!ctx->entities[i][j])
436                                 continue;
437
438                         drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
439                 }
440         }
441
442         amdgpu_ctx_fini(ref);
443 }
444
445 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
446 {
447         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
448         struct amdgpu_ctx *ctx;
449
450         mutex_lock(&mgr->lock);
451         ctx = idr_remove(&mgr->ctx_handles, id);
452         if (ctx)
453                 kref_put(&ctx->refcount, amdgpu_ctx_do_release);
454         mutex_unlock(&mgr->lock);
455         return ctx ? 0 : -EINVAL;
456 }
457
458 static int amdgpu_ctx_query(struct amdgpu_device *adev,
459                             struct amdgpu_fpriv *fpriv, uint32_t id,
460                             union drm_amdgpu_ctx_out *out)
461 {
462         struct amdgpu_ctx *ctx;
463         struct amdgpu_ctx_mgr *mgr;
464         unsigned reset_counter;
465
466         if (!fpriv)
467                 return -EINVAL;
468
469         mgr = &fpriv->ctx_mgr;
470         mutex_lock(&mgr->lock);
471         ctx = idr_find(&mgr->ctx_handles, id);
472         if (!ctx) {
473                 mutex_unlock(&mgr->lock);
474                 return -EINVAL;
475         }
476
477         /* TODO: these two are always zero */
478         out->state.flags = 0x0;
479         out->state.hangs = 0x0;
480
481         /* determine if a GPU reset has occured since the last call */
482         reset_counter = atomic_read(&adev->gpu_reset_counter);
483         /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
484         if (ctx->reset_counter_query == reset_counter)
485                 out->state.reset_status = AMDGPU_CTX_NO_RESET;
486         else
487                 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
488         ctx->reset_counter_query = reset_counter;
489
490         mutex_unlock(&mgr->lock);
491         return 0;
492 }
493
494 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
495
496 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
497                              struct amdgpu_fpriv *fpriv, uint32_t id,
498                              union drm_amdgpu_ctx_out *out)
499 {
500         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
501         struct amdgpu_ctx *ctx;
502         struct amdgpu_ctx_mgr *mgr;
503
504         if (!fpriv)
505                 return -EINVAL;
506
507         mgr = &fpriv->ctx_mgr;
508         mutex_lock(&mgr->lock);
509         ctx = idr_find(&mgr->ctx_handles, id);
510         if (!ctx) {
511                 mutex_unlock(&mgr->lock);
512                 return -EINVAL;
513         }
514
515         out->state.flags = 0x0;
516         out->state.hangs = 0x0;
517
518         if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
519                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
520
521         if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
522                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
523
524         if (atomic_read(&ctx->guilty))
525                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
526
527         if (adev->ras_enabled && con) {
528                 /* Return the cached values in O(1),
529                  * and schedule delayed work to cache
530                  * new vaues.
531                  */
532                 int ce_count, ue_count;
533
534                 ce_count = atomic_read(&con->ras_ce_count);
535                 ue_count = atomic_read(&con->ras_ue_count);
536
537                 if (ce_count != ctx->ras_counter_ce) {
538                         ctx->ras_counter_ce = ce_count;
539                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
540                 }
541
542                 if (ue_count != ctx->ras_counter_ue) {
543                         ctx->ras_counter_ue = ue_count;
544                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
545                 }
546
547                 schedule_delayed_work(&con->ras_counte_delay_work,
548                                       msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
549         }
550
551         mutex_unlock(&mgr->lock);
552         return 0;
553 }
554
555
556
557 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
558                                     struct amdgpu_fpriv *fpriv, uint32_t id,
559                                     bool set, u32 *stable_pstate)
560 {
561         struct amdgpu_ctx *ctx;
562         struct amdgpu_ctx_mgr *mgr;
563         int r;
564
565         if (!fpriv)
566                 return -EINVAL;
567
568         mgr = &fpriv->ctx_mgr;
569         mutex_lock(&mgr->lock);
570         ctx = idr_find(&mgr->ctx_handles, id);
571         if (!ctx) {
572                 mutex_unlock(&mgr->lock);
573                 return -EINVAL;
574         }
575
576         if (set)
577                 r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
578         else
579                 r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
580
581         mutex_unlock(&mgr->lock);
582         return r;
583 }
584
585 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
586                      struct drm_file *filp)
587 {
588         int r;
589         uint32_t id, stable_pstate;
590         int32_t priority;
591
592         union drm_amdgpu_ctx *args = data;
593         struct amdgpu_device *adev = drm_to_adev(dev);
594         struct amdgpu_fpriv *fpriv = filp->driver_priv;
595
596         id = args->in.ctx_id;
597         priority = args->in.priority;
598
599         /* For backwards compatibility reasons, we need to accept
600          * ioctls with garbage in the priority field */
601         if (!amdgpu_ctx_priority_is_valid(priority))
602                 priority = AMDGPU_CTX_PRIORITY_NORMAL;
603
604         switch (args->in.op) {
605         case AMDGPU_CTX_OP_ALLOC_CTX:
606                 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
607                 args->out.alloc.ctx_id = id;
608                 break;
609         case AMDGPU_CTX_OP_FREE_CTX:
610                 r = amdgpu_ctx_free(fpriv, id);
611                 break;
612         case AMDGPU_CTX_OP_QUERY_STATE:
613                 r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
614                 break;
615         case AMDGPU_CTX_OP_QUERY_STATE2:
616                 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
617                 break;
618         case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
619                 if (args->in.flags)
620                         return -EINVAL;
621                 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
622                 if (!r)
623                         args->out.pstate.flags = stable_pstate;
624                 break;
625         case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
626                 if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
627                         return -EINVAL;
628                 stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
629                 if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
630                         return -EINVAL;
631                 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
632                 break;
633         default:
634                 return -EINVAL;
635         }
636
637         return r;
638 }
639
640 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
641 {
642         struct amdgpu_ctx *ctx;
643         struct amdgpu_ctx_mgr *mgr;
644
645         if (!fpriv)
646                 return NULL;
647
648         mgr = &fpriv->ctx_mgr;
649
650         mutex_lock(&mgr->lock);
651         ctx = idr_find(&mgr->ctx_handles, id);
652         if (ctx)
653                 kref_get(&ctx->refcount);
654         mutex_unlock(&mgr->lock);
655         return ctx;
656 }
657
658 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
659 {
660         if (ctx == NULL)
661                 return -EINVAL;
662
663         kref_put(&ctx->refcount, amdgpu_ctx_do_release);
664         return 0;
665 }
666
667 void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
668                           struct drm_sched_entity *entity,
669                           struct dma_fence *fence, uint64_t *handle)
670 {
671         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
672         uint64_t seq = centity->sequence;
673         struct dma_fence *other = NULL;
674         unsigned idx = 0;
675
676         idx = seq & (amdgpu_sched_jobs - 1);
677         other = centity->fences[idx];
678         if (other)
679                 BUG_ON(!dma_fence_is_signaled(other));
680
681         dma_fence_get(fence);
682
683         spin_lock(&ctx->ring_lock);
684         centity->fences[idx] = fence;
685         centity->sequence++;
686         spin_unlock(&ctx->ring_lock);
687
688         dma_fence_put(other);
689         if (handle)
690                 *handle = seq;
691 }
692
693 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
694                                        struct drm_sched_entity *entity,
695                                        uint64_t seq)
696 {
697         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
698         struct dma_fence *fence;
699
700         spin_lock(&ctx->ring_lock);
701
702         if (seq == ~0ull)
703                 seq = centity->sequence - 1;
704
705         if (seq >= centity->sequence) {
706                 spin_unlock(&ctx->ring_lock);
707                 return ERR_PTR(-EINVAL);
708         }
709
710
711         if (seq + amdgpu_sched_jobs < centity->sequence) {
712                 spin_unlock(&ctx->ring_lock);
713                 return NULL;
714         }
715
716         fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
717         spin_unlock(&ctx->ring_lock);
718
719         return fence;
720 }
721
722 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
723                                            struct amdgpu_ctx_entity *aentity,
724                                            int hw_ip,
725                                            int32_t priority)
726 {
727         struct amdgpu_device *adev = ctx->adev;
728         unsigned int hw_prio;
729         struct drm_gpu_scheduler **scheds = NULL;
730         unsigned num_scheds;
731
732         /* set sw priority */
733         drm_sched_entity_set_priority(&aentity->entity,
734                                       amdgpu_ctx_to_drm_sched_prio(priority));
735
736         /* set hw priority */
737         if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
738                 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
739                 hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
740                 scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
741                 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
742                 drm_sched_entity_modify_sched(&aentity->entity, scheds,
743                                               num_scheds);
744         }
745 }
746
747 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
748                                   int32_t priority)
749 {
750         int32_t ctx_prio;
751         unsigned i, j;
752
753         ctx->override_priority = priority;
754
755         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
756                         ctx->init_priority : ctx->override_priority;
757         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
758                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
759                         if (!ctx->entities[i][j])
760                                 continue;
761
762                         amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
763                                                        i, ctx_prio);
764                 }
765         }
766 }
767
768 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
769                                struct drm_sched_entity *entity)
770 {
771         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
772         struct dma_fence *other;
773         unsigned idx;
774         long r;
775
776         spin_lock(&ctx->ring_lock);
777         idx = centity->sequence & (amdgpu_sched_jobs - 1);
778         other = dma_fence_get(centity->fences[idx]);
779         spin_unlock(&ctx->ring_lock);
780
781         if (!other)
782                 return 0;
783
784         r = dma_fence_wait(other, true);
785         if (r < 0 && r != -ERESTARTSYS)
786                 DRM_ERROR("Error (%ld) waiting for fence!\n", r);
787
788         dma_fence_put(other);
789         return r;
790 }
791
792 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
793 {
794         mutex_init(&mgr->lock);
795         idr_init(&mgr->ctx_handles);
796 }
797
798 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
799 {
800         struct amdgpu_ctx *ctx;
801         struct idr *idp;
802         uint32_t id, i, j;
803
804         idp = &mgr->ctx_handles;
805
806         mutex_lock(&mgr->lock);
807         idr_for_each_entry(idp, ctx, id) {
808                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
809                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
810                                 struct drm_sched_entity *entity;
811
812                                 if (!ctx->entities[i][j])
813                                         continue;
814
815                                 entity = &ctx->entities[i][j]->entity;
816                                 timeout = drm_sched_entity_flush(entity, timeout);
817                         }
818                 }
819         }
820         mutex_unlock(&mgr->lock);
821         return timeout;
822 }
823
824 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
825 {
826         struct amdgpu_ctx *ctx;
827         struct idr *idp;
828         uint32_t id, i, j;
829
830         idp = &mgr->ctx_handles;
831
832         idr_for_each_entry(idp, ctx, id) {
833                 if (kref_read(&ctx->refcount) != 1) {
834                         DRM_ERROR("ctx %p is still alive\n", ctx);
835                         continue;
836                 }
837
838                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
839                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
840                                 struct drm_sched_entity *entity;
841
842                                 if (!ctx->entities[i][j])
843                                         continue;
844
845                                 entity = &ctx->entities[i][j]->entity;
846                                 drm_sched_entity_fini(entity);
847                         }
848                 }
849         }
850 }
851
852 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
853 {
854         struct amdgpu_ctx *ctx;
855         struct idr *idp;
856         uint32_t id;
857
858         amdgpu_ctx_mgr_entity_fini(mgr);
859
860         idp = &mgr->ctx_handles;
861
862         idr_for_each_entry(idp, ctx, id) {
863                 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
864                         DRM_ERROR("ctx %p is still alive\n", ctx);
865         }
866
867         idr_destroy(&mgr->ctx_handles);
868         mutex_destroy(&mgr->lock);
869 }
870
871 static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
872                 struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
873 {
874         ktime_t now, t1;
875         uint32_t i;
876
877         *total = *max = 0;
878
879         now = ktime_get();
880         for (i = 0; i < amdgpu_sched_jobs; i++) {
881                 struct dma_fence *fence;
882                 struct drm_sched_fence *s_fence;
883
884                 spin_lock(&ctx->ring_lock);
885                 fence = dma_fence_get(centity->fences[i]);
886                 spin_unlock(&ctx->ring_lock);
887                 if (!fence)
888                         continue;
889                 s_fence = to_drm_sched_fence(fence);
890                 if (!dma_fence_is_signaled(&s_fence->scheduled)) {
891                         dma_fence_put(fence);
892                         continue;
893                 }
894                 t1 = s_fence->scheduled.timestamp;
895                 if (!ktime_before(t1, now)) {
896                         dma_fence_put(fence);
897                         continue;
898                 }
899                 if (dma_fence_is_signaled(&s_fence->finished) &&
900                         s_fence->finished.timestamp < now)
901                         *total += ktime_sub(s_fence->finished.timestamp, t1);
902                 else
903                         *total += ktime_sub(now, t1);
904                 t1 = ktime_sub(now, t1);
905                 dma_fence_put(fence);
906                 *max = max(t1, *max);
907         }
908 }
909
910 ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
911                 uint32_t idx, uint64_t *elapsed)
912 {
913         struct idr *idp;
914         struct amdgpu_ctx *ctx;
915         uint32_t id;
916         struct amdgpu_ctx_entity *centity;
917         ktime_t total = 0, max = 0;
918
919         if (idx >= AMDGPU_MAX_ENTITY_NUM)
920                 return 0;
921         idp = &mgr->ctx_handles;
922         mutex_lock(&mgr->lock);
923         idr_for_each_entry(idp, ctx, id) {
924                 ktime_t ttotal, tmax;
925
926                 if (!ctx->entities[hwip][idx])
927                         continue;
928
929                 centity = ctx->entities[hwip][idx];
930                 amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
931
932                 /* Harmonic mean approximation diverges for very small
933                  * values. If ratio < 0.01% ignore
934                  */
935                 if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
936                         continue;
937
938                 total = ktime_add(total, ttotal);
939                 max = ktime_after(tmax, max) ? tmax : max;
940         }
941
942         mutex_unlock(&mgr->lock);
943         if (elapsed)
944                 *elapsed = max;
945
946         return total;
947 }