drm/v3d/v3d_sched: fix scheduler callbacks return status
[linux-2.6-microblaze.git] / drivers / gpu / drm / v3d / v3d_sched.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (C) 2018 Broadcom */
3
4 /**
5  * DOC: Broadcom V3D scheduling
6  *
7  * The shared DRM GPU scheduler is used to coordinate submitting jobs
8  * to the hardware.  Each DRM fd (roughly a client process) gets its
9  * own scheduler entity, which will process jobs in order.  The GPU
10  * scheduler will round-robin between clients to submit the next job.
11  *
12  * For simplicity, and in order to keep latency low for interactive
13  * jobs when bulk background jobs are queued up, we submit a new job
14  * to the HW only when it has completed the last one, instead of
15  * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
16  * v3d_job_dependency() to manage the dependency between bin and
17  * render, instead of having the clients submit jobs using the HW's
18  * semaphores to interlock between them.
19  */
20
21 #include <linux/kthread.h>
22
23 #include "v3d_drv.h"
24 #include "v3d_regs.h"
25 #include "v3d_trace.h"
26
27 static struct v3d_job *
28 to_v3d_job(struct drm_sched_job *sched_job)
29 {
30         return container_of(sched_job, struct v3d_job, base);
31 }
32
33 static struct v3d_bin_job *
34 to_bin_job(struct drm_sched_job *sched_job)
35 {
36         return container_of(sched_job, struct v3d_bin_job, base.base);
37 }
38
39 static struct v3d_render_job *
40 to_render_job(struct drm_sched_job *sched_job)
41 {
42         return container_of(sched_job, struct v3d_render_job, base.base);
43 }
44
45 static struct v3d_tfu_job *
46 to_tfu_job(struct drm_sched_job *sched_job)
47 {
48         return container_of(sched_job, struct v3d_tfu_job, base.base);
49 }
50
51 static struct v3d_csd_job *
52 to_csd_job(struct drm_sched_job *sched_job)
53 {
54         return container_of(sched_job, struct v3d_csd_job, base.base);
55 }
56
57 static void
58 v3d_job_free(struct drm_sched_job *sched_job)
59 {
60         struct v3d_job *job = to_v3d_job(sched_job);
61
62         drm_sched_job_cleanup(sched_job);
63         v3d_job_put(job);
64 }
65
66 /*
67  * Returns the fences that the job depends on, one by one.
68  *
69  * If placed in the scheduler's .dependency method, the corresponding
70  * .run_job won't be called until all of them have been signaled.
71  */
72 static struct dma_fence *
73 v3d_job_dependency(struct drm_sched_job *sched_job,
74                    struct drm_sched_entity *s_entity)
75 {
76         struct v3d_job *job = to_v3d_job(sched_job);
77
78         /* XXX: Wait on a fence for switching the GMP if necessary,
79          * and then do so.
80          */
81
82         if (!xa_empty(&job->deps))
83                 return xa_erase(&job->deps, job->last_dep++);
84
85         return NULL;
86 }
87
88 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
89 {
90         struct v3d_bin_job *job = to_bin_job(sched_job);
91         struct v3d_dev *v3d = job->base.v3d;
92         struct drm_device *dev = &v3d->drm;
93         struct dma_fence *fence;
94         unsigned long irqflags;
95
96         if (unlikely(job->base.base.s_fence->finished.error))
97                 return NULL;
98
99         /* Lock required around bin_job update vs
100          * v3d_overflow_mem_work().
101          */
102         spin_lock_irqsave(&v3d->job_lock, irqflags);
103         v3d->bin_job = job;
104         /* Clear out the overflow allocation, so we don't
105          * reuse the overflow attached to a previous job.
106          */
107         V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
108         spin_unlock_irqrestore(&v3d->job_lock, irqflags);
109
110         v3d_invalidate_caches(v3d);
111
112         fence = v3d_fence_create(v3d, V3D_BIN);
113         if (IS_ERR(fence))
114                 return NULL;
115
116         if (job->base.irq_fence)
117                 dma_fence_put(job->base.irq_fence);
118         job->base.irq_fence = dma_fence_get(fence);
119
120         trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
121                             job->start, job->end);
122
123         /* Set the current and end address of the control list.
124          * Writing the end register is what starts the job.
125          */
126         if (job->qma) {
127                 V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
128                 V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
129         }
130         if (job->qts) {
131                 V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
132                                V3D_CLE_CT0QTS_ENABLE |
133                                job->qts);
134         }
135         V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
136         V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
137
138         return fence;
139 }
140
141 static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
142 {
143         struct v3d_render_job *job = to_render_job(sched_job);
144         struct v3d_dev *v3d = job->base.v3d;
145         struct drm_device *dev = &v3d->drm;
146         struct dma_fence *fence;
147
148         if (unlikely(job->base.base.s_fence->finished.error))
149                 return NULL;
150
151         v3d->render_job = job;
152
153         /* Can we avoid this flush?  We need to be careful of
154          * scheduling, though -- imagine job0 rendering to texture and
155          * job1 reading, and them being executed as bin0, bin1,
156          * render0, render1, so that render1's flush at bin time
157          * wasn't enough.
158          */
159         v3d_invalidate_caches(v3d);
160
161         fence = v3d_fence_create(v3d, V3D_RENDER);
162         if (IS_ERR(fence))
163                 return NULL;
164
165         if (job->base.irq_fence)
166                 dma_fence_put(job->base.irq_fence);
167         job->base.irq_fence = dma_fence_get(fence);
168
169         trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
170                             job->start, job->end);
171
172         /* XXX: Set the QCFG */
173
174         /* Set the current and end address of the control list.
175          * Writing the end register is what starts the job.
176          */
177         V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
178         V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
179
180         return fence;
181 }
182
183 static struct dma_fence *
184 v3d_tfu_job_run(struct drm_sched_job *sched_job)
185 {
186         struct v3d_tfu_job *job = to_tfu_job(sched_job);
187         struct v3d_dev *v3d = job->base.v3d;
188         struct drm_device *dev = &v3d->drm;
189         struct dma_fence *fence;
190
191         fence = v3d_fence_create(v3d, V3D_TFU);
192         if (IS_ERR(fence))
193                 return NULL;
194
195         v3d->tfu_job = job;
196         if (job->base.irq_fence)
197                 dma_fence_put(job->base.irq_fence);
198         job->base.irq_fence = dma_fence_get(fence);
199
200         trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
201
202         V3D_WRITE(V3D_TFU_IIA, job->args.iia);
203         V3D_WRITE(V3D_TFU_IIS, job->args.iis);
204         V3D_WRITE(V3D_TFU_ICA, job->args.ica);
205         V3D_WRITE(V3D_TFU_IUA, job->args.iua);
206         V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
207         V3D_WRITE(V3D_TFU_IOS, job->args.ios);
208         V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
209         if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
210                 V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
211                 V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
212                 V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
213         }
214         /* ICFG kicks off the job. */
215         V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
216
217         return fence;
218 }
219
220 static struct dma_fence *
221 v3d_csd_job_run(struct drm_sched_job *sched_job)
222 {
223         struct v3d_csd_job *job = to_csd_job(sched_job);
224         struct v3d_dev *v3d = job->base.v3d;
225         struct drm_device *dev = &v3d->drm;
226         struct dma_fence *fence;
227         int i;
228
229         v3d->csd_job = job;
230
231         v3d_invalidate_caches(v3d);
232
233         fence = v3d_fence_create(v3d, V3D_CSD);
234         if (IS_ERR(fence))
235                 return NULL;
236
237         if (job->base.irq_fence)
238                 dma_fence_put(job->base.irq_fence);
239         job->base.irq_fence = dma_fence_get(fence);
240
241         trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
242
243         for (i = 1; i <= 6; i++)
244                 V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
245         /* CFG0 write kicks off the job. */
246         V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
247
248         return fence;
249 }
250
251 static struct dma_fence *
252 v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
253 {
254         struct v3d_job *job = to_v3d_job(sched_job);
255         struct v3d_dev *v3d = job->v3d;
256
257         v3d_clean_caches(v3d);
258
259         return NULL;
260 }
261
262 static enum drm_gpu_sched_stat
263 v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
264 {
265         enum v3d_queue q;
266
267         mutex_lock(&v3d->reset_lock);
268
269         /* block scheduler */
270         for (q = 0; q < V3D_MAX_QUEUES; q++)
271                 drm_sched_stop(&v3d->queue[q].sched, sched_job);
272
273         if (sched_job)
274                 drm_sched_increase_karma(sched_job);
275
276         /* get the GPU back into the init state */
277         v3d_reset(v3d);
278
279         for (q = 0; q < V3D_MAX_QUEUES; q++)
280                 drm_sched_resubmit_jobs(&v3d->queue[q].sched);
281
282         /* Unblock schedulers and restart their jobs. */
283         for (q = 0; q < V3D_MAX_QUEUES; q++) {
284                 drm_sched_start(&v3d->queue[q].sched, true);
285         }
286
287         mutex_unlock(&v3d->reset_lock);
288
289         return DRM_GPU_SCHED_STAT_NOMINAL;
290 }
291
292 /* If the current address or return address have changed, then the GPU
293  * has probably made progress and we should delay the reset.  This
294  * could fail if the GPU got in an infinite loop in the CL, but that
295  * is pretty unlikely outside of an i-g-t testcase.
296  */
297 static enum drm_gpu_sched_stat
298 v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
299                     u32 *timedout_ctca, u32 *timedout_ctra)
300 {
301         struct v3d_job *job = to_v3d_job(sched_job);
302         struct v3d_dev *v3d = job->v3d;
303         u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
304         u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
305
306         if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
307                 *timedout_ctca = ctca;
308                 *timedout_ctra = ctra;
309                 return DRM_GPU_SCHED_STAT_NOMINAL;
310         }
311
312         return v3d_gpu_reset_for_timeout(v3d, sched_job);
313 }
314
315 static enum drm_gpu_sched_stat
316 v3d_bin_job_timedout(struct drm_sched_job *sched_job)
317 {
318         struct v3d_bin_job *job = to_bin_job(sched_job);
319
320         return v3d_cl_job_timedout(sched_job, V3D_BIN,
321                                    &job->timedout_ctca, &job->timedout_ctra);
322 }
323
324 static enum drm_gpu_sched_stat
325 v3d_render_job_timedout(struct drm_sched_job *sched_job)
326 {
327         struct v3d_render_job *job = to_render_job(sched_job);
328
329         return v3d_cl_job_timedout(sched_job, V3D_RENDER,
330                                    &job->timedout_ctca, &job->timedout_ctra);
331 }
332
333 static enum drm_gpu_sched_stat
334 v3d_generic_job_timedout(struct drm_sched_job *sched_job)
335 {
336         struct v3d_job *job = to_v3d_job(sched_job);
337
338         return v3d_gpu_reset_for_timeout(job->v3d, sched_job);
339 }
340
341 static enum drm_gpu_sched_stat
342 v3d_csd_job_timedout(struct drm_sched_job *sched_job)
343 {
344         struct v3d_csd_job *job = to_csd_job(sched_job);
345         struct v3d_dev *v3d = job->base.v3d;
346         u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
347
348         /* If we've made progress, skip reset and let the timer get
349          * rearmed.
350          */
351         if (job->timedout_batches != batches) {
352                 job->timedout_batches = batches;
353                 return DRM_GPU_SCHED_STAT_NOMINAL;
354         }
355
356         return v3d_gpu_reset_for_timeout(v3d, sched_job);
357 }
358
359 static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
360         .dependency = v3d_job_dependency,
361         .run_job = v3d_bin_job_run,
362         .timedout_job = v3d_bin_job_timedout,
363         .free_job = v3d_job_free,
364 };
365
366 static const struct drm_sched_backend_ops v3d_render_sched_ops = {
367         .dependency = v3d_job_dependency,
368         .run_job = v3d_render_job_run,
369         .timedout_job = v3d_render_job_timedout,
370         .free_job = v3d_job_free,
371 };
372
373 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
374         .dependency = v3d_job_dependency,
375         .run_job = v3d_tfu_job_run,
376         .timedout_job = v3d_generic_job_timedout,
377         .free_job = v3d_job_free,
378 };
379
380 static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
381         .dependency = v3d_job_dependency,
382         .run_job = v3d_csd_job_run,
383         .timedout_job = v3d_csd_job_timedout,
384         .free_job = v3d_job_free
385 };
386
387 static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
388         .dependency = v3d_job_dependency,
389         .run_job = v3d_cache_clean_job_run,
390         .timedout_job = v3d_generic_job_timedout,
391         .free_job = v3d_job_free
392 };
393
394 int
395 v3d_sched_init(struct v3d_dev *v3d)
396 {
397         int hw_jobs_limit = 1;
398         int job_hang_limit = 0;
399         int hang_limit_ms = 500;
400         int ret;
401
402         ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
403                              &v3d_bin_sched_ops,
404                              hw_jobs_limit, job_hang_limit,
405                              msecs_to_jiffies(hang_limit_ms),
406                              "v3d_bin");
407         if (ret) {
408                 dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
409                 return ret;
410         }
411
412         ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
413                              &v3d_render_sched_ops,
414                              hw_jobs_limit, job_hang_limit,
415                              msecs_to_jiffies(hang_limit_ms),
416                              "v3d_render");
417         if (ret) {
418                 dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
419                         ret);
420                 v3d_sched_fini(v3d);
421                 return ret;
422         }
423
424         ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
425                              &v3d_tfu_sched_ops,
426                              hw_jobs_limit, job_hang_limit,
427                              msecs_to_jiffies(hang_limit_ms),
428                              "v3d_tfu");
429         if (ret) {
430                 dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
431                         ret);
432                 v3d_sched_fini(v3d);
433                 return ret;
434         }
435
436         if (v3d_has_csd(v3d)) {
437                 ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
438                                      &v3d_csd_sched_ops,
439                                      hw_jobs_limit, job_hang_limit,
440                                      msecs_to_jiffies(hang_limit_ms),
441                                      "v3d_csd");
442                 if (ret) {
443                         dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
444                                 ret);
445                         v3d_sched_fini(v3d);
446                         return ret;
447                 }
448
449                 ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
450                                      &v3d_cache_clean_sched_ops,
451                                      hw_jobs_limit, job_hang_limit,
452                                      msecs_to_jiffies(hang_limit_ms),
453                                      "v3d_cache_clean");
454                 if (ret) {
455                         dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
456                                 ret);
457                         v3d_sched_fini(v3d);
458                         return ret;
459                 }
460         }
461
462         return 0;
463 }
464
465 void
466 v3d_sched_fini(struct v3d_dev *v3d)
467 {
468         enum v3d_queue q;
469
470         for (q = 0; q < V3D_MAX_QUEUES; q++) {
471                 if (v3d->queue[q].sched.ready)
472                         drm_sched_fini(&v3d->queue[q].sched);
473         }
474 }