Merge tag 'leds-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/pavel...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / common / command_submission.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
10
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
13
14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
16
17 /**
18  * enum hl_cs_wait_status - cs wait status
19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
20  * @CS_WAIT_STATUS_COMPLETED: cs completed
21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
22  */
23 enum hl_cs_wait_status {
24         CS_WAIT_STATUS_BUSY,
25         CS_WAIT_STATUS_COMPLETED,
26         CS_WAIT_STATUS_GONE
27 };
28
29 static void job_wq_completion(struct work_struct *work);
30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
31                                 u64 timeout_us, u64 seq,
32                                 enum hl_cs_wait_status *status, s64 *timestamp);
33 static void cs_do_release(struct kref *ref);
34
35 static void hl_sob_reset(struct kref *ref)
36 {
37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
38                                                         kref);
39         struct hl_device *hdev = hw_sob->hdev;
40
41         hdev->asic_funcs->reset_sob(hdev, hw_sob);
42 }
43
44 void hl_sob_reset_error(struct kref *ref)
45 {
46         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
47                                                         kref);
48         struct hl_device *hdev = hw_sob->hdev;
49
50         dev_crit(hdev->dev,
51                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
52                 hw_sob->q_idx, hw_sob->sob_id);
53 }
54
55 /**
56  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
57  * @sob_base: sob base id
58  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
59  * @mask: generated mask
60  *
61  * Return: 0 if given parameters are valid
62  */
63 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
64 {
65         int i;
66
67         if (sob_mask == 0)
68                 return -EINVAL;
69
70         if (sob_mask == 0x1) {
71                 *mask = ~(1 << (sob_base & 0x7));
72         } else {
73                 /* find msb in order to verify sob range is valid */
74                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
75                         if (BIT(i) & sob_mask)
76                                 break;
77
78                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
79                         return -EINVAL;
80
81                 *mask = ~sob_mask;
82         }
83
84         return 0;
85 }
86
87 static void sob_reset_work(struct work_struct *work)
88 {
89         struct hl_cs_compl *hl_cs_cmpl =
90                 container_of(work, struct hl_cs_compl, sob_reset_work);
91         struct hl_device *hdev = hl_cs_cmpl->hdev;
92
93         /*
94          * A signal CS can get completion while the corresponding wait
95          * for signal CS is on its way to the PQ. The wait for signal CS
96          * will get stuck if the signal CS incremented the SOB to its
97          * max value and there are no pending (submitted) waits on this
98          * SOB.
99          * We do the following to void this situation:
100          * 1. The wait for signal CS must get a ref for the signal CS as
101          *    soon as possible in cs_ioctl_signal_wait() and put it
102          *    before being submitted to the PQ but after it incremented
103          *    the SOB refcnt in init_signal_wait_cs().
104          * 2. Signal/Wait for signal CS will decrement the SOB refcnt
105          *    here.
106          * These two measures guarantee that the wait for signal CS will
107          * reset the SOB upon completion rather than the signal CS and
108          * hence the above scenario is avoided.
109          */
110         kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
111
112         if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
113                 hdev->asic_funcs->reset_sob_group(hdev,
114                                 hl_cs_cmpl->sob_group);
115
116         kfree(hl_cs_cmpl);
117 }
118
119 static void hl_fence_release(struct kref *kref)
120 {
121         struct hl_fence *fence =
122                 container_of(kref, struct hl_fence, refcount);
123         struct hl_cs_compl *hl_cs_cmpl =
124                 container_of(fence, struct hl_cs_compl, base_fence);
125         struct hl_device *hdev = hl_cs_cmpl->hdev;
126
127         /* EBUSY means the CS was never submitted and hence we don't have
128          * an attached hw_sob object that we should handle here
129          */
130         if (fence->error == -EBUSY)
131                 goto free;
132
133         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
134                 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
135                 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
136
137                 dev_dbg(hdev->dev,
138                         "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
139                         hl_cs_cmpl->cs_seq,
140                         hl_cs_cmpl->type,
141                         hl_cs_cmpl->hw_sob->sob_id,
142                         hl_cs_cmpl->sob_val);
143
144                 queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
145
146                 return;
147         }
148
149 free:
150         kfree(hl_cs_cmpl);
151 }
152
153 void hl_fence_put(struct hl_fence *fence)
154 {
155         if (fence)
156                 kref_put(&fence->refcount, hl_fence_release);
157 }
158
159 void hl_fence_get(struct hl_fence *fence)
160 {
161         if (fence)
162                 kref_get(&fence->refcount);
163 }
164
165 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
166 {
167         kref_init(&fence->refcount);
168         fence->cs_sequence = sequence;
169         fence->error = 0;
170         fence->timestamp = ktime_set(0, 0);
171         init_completion(&fence->completion);
172 }
173
174 void cs_get(struct hl_cs *cs)
175 {
176         kref_get(&cs->refcount);
177 }
178
179 static int cs_get_unless_zero(struct hl_cs *cs)
180 {
181         return kref_get_unless_zero(&cs->refcount);
182 }
183
184 static void cs_put(struct hl_cs *cs)
185 {
186         kref_put(&cs->refcount, cs_do_release);
187 }
188
189 static void cs_job_do_release(struct kref *ref)
190 {
191         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
192
193         kfree(job);
194 }
195
196 static void cs_job_put(struct hl_cs_job *job)
197 {
198         kref_put(&job->refcount, cs_job_do_release);
199 }
200
201 bool cs_needs_completion(struct hl_cs *cs)
202 {
203         /* In case this is a staged CS, only the last CS in sequence should
204          * get a completion, any non staged CS will always get a completion
205          */
206         if (cs->staged_cs && !cs->staged_last)
207                 return false;
208
209         return true;
210 }
211
212 bool cs_needs_timeout(struct hl_cs *cs)
213 {
214         /* In case this is a staged CS, only the first CS in sequence should
215          * get a timeout, any non staged CS will always get a timeout
216          */
217         if (cs->staged_cs && !cs->staged_first)
218                 return false;
219
220         return true;
221 }
222
223 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
224 {
225         /*
226          * Patched CB is created for external queues jobs, and for H/W queues
227          * jobs if the user CB was allocated by driver and MMU is disabled.
228          */
229         return (job->queue_type == QUEUE_TYPE_EXT ||
230                         (job->queue_type == QUEUE_TYPE_HW &&
231                                         job->is_kernel_allocated_cb &&
232                                         !hdev->mmu_enable));
233 }
234
235 /*
236  * cs_parser - parse the user command submission
237  *
238  * @hpriv       : pointer to the private data of the fd
239  * @job        : pointer to the job that holds the command submission info
240  *
241  * The function parses the command submission of the user. It calls the
242  * ASIC specific parser, which returns a list of memory blocks to send
243  * to the device as different command buffers
244  *
245  */
246 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
247 {
248         struct hl_device *hdev = hpriv->hdev;
249         struct hl_cs_parser parser;
250         int rc;
251
252         parser.ctx_id = job->cs->ctx->asid;
253         parser.cs_sequence = job->cs->sequence;
254         parser.job_id = job->id;
255
256         parser.hw_queue_id = job->hw_queue_id;
257         parser.job_userptr_list = &job->userptr_list;
258         parser.patched_cb = NULL;
259         parser.user_cb = job->user_cb;
260         parser.user_cb_size = job->user_cb_size;
261         parser.queue_type = job->queue_type;
262         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
263         job->patched_cb = NULL;
264         parser.completion = cs_needs_completion(job->cs);
265
266         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
267
268         if (is_cb_patched(hdev, job)) {
269                 if (!rc) {
270                         job->patched_cb = parser.patched_cb;
271                         job->job_cb_size = parser.patched_cb_size;
272                         job->contains_dma_pkt = parser.contains_dma_pkt;
273                         atomic_inc(&job->patched_cb->cs_cnt);
274                 }
275
276                 /*
277                  * Whether the parsing worked or not, we don't need the
278                  * original CB anymore because it was already parsed and
279                  * won't be accessed again for this CS
280                  */
281                 atomic_dec(&job->user_cb->cs_cnt);
282                 hl_cb_put(job->user_cb);
283                 job->user_cb = NULL;
284         } else if (!rc) {
285                 job->job_cb_size = job->user_cb_size;
286         }
287
288         return rc;
289 }
290
291 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
292 {
293         struct hl_cs *cs = job->cs;
294
295         if (is_cb_patched(hdev, job)) {
296                 hl_userptr_delete_list(hdev, &job->userptr_list);
297
298                 /*
299                  * We might arrive here from rollback and patched CB wasn't
300                  * created, so we need to check it's not NULL
301                  */
302                 if (job->patched_cb) {
303                         atomic_dec(&job->patched_cb->cs_cnt);
304                         hl_cb_put(job->patched_cb);
305                 }
306         }
307
308         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
309          * enabled, the user CB isn't released in cs_parser() and thus should be
310          * released here.
311          * This is also true for INT queues jobs which were allocated by driver
312          */
313         if (job->is_kernel_allocated_cb &&
314                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
315                                 job->queue_type == QUEUE_TYPE_INT)) {
316                 atomic_dec(&job->user_cb->cs_cnt);
317                 hl_cb_put(job->user_cb);
318         }
319
320         /*
321          * This is the only place where there can be multiple threads
322          * modifying the list at the same time
323          */
324         spin_lock(&cs->job_lock);
325         list_del(&job->cs_node);
326         spin_unlock(&cs->job_lock);
327
328         hl_debugfs_remove_job(hdev, job);
329
330         /* We decrement reference only for a CS that gets completion
331          * because the reference was incremented only for this kind of CS
332          * right before it was scheduled.
333          *
334          * In staged submission, only the last CS marked as 'staged_last'
335          * gets completion, hence its release function will be called from here.
336          * As for all the rest CS's in the staged submission which do not get
337          * completion, their CS reference will be decremented by the
338          * 'staged_last' CS during the CS release flow.
339          * All relevant PQ CI counters will be incremented during the CS release
340          * flow by calling 'hl_hw_queue_update_ci'.
341          */
342         if (cs_needs_completion(cs) &&
343                 (job->queue_type == QUEUE_TYPE_EXT ||
344                         job->queue_type == QUEUE_TYPE_HW))
345                 cs_put(cs);
346
347         cs_job_put(job);
348 }
349
350 /*
351  * hl_staged_cs_find_first - locate the first CS in this staged submission
352  *
353  * @hdev: pointer to device structure
354  * @cs_seq: staged submission sequence number
355  *
356  * @note: This function must be called under 'hdev->cs_mirror_lock'
357  *
358  * Find and return a CS pointer with the given sequence
359  */
360 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
361 {
362         struct hl_cs *cs;
363
364         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
365                 if (cs->staged_cs && cs->staged_first &&
366                                 cs->sequence == cs_seq)
367                         return cs;
368
369         return NULL;
370 }
371
372 /*
373  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
374  *
375  * @hdev: pointer to device structure
376  * @cs: staged submission member
377  *
378  */
379 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
380 {
381         struct hl_cs *last_entry;
382
383         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
384                                                                 staged_cs_node);
385
386         if (last_entry->staged_last)
387                 return true;
388
389         return false;
390 }
391
392 /*
393  * staged_cs_get - get CS reference if this CS is a part of a staged CS
394  *
395  * @hdev: pointer to device structure
396  * @cs: current CS
397  * @cs_seq: staged submission sequence number
398  *
399  * Increment CS reference for every CS in this staged submission except for
400  * the CS which get completion.
401  */
402 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
403 {
404         /* Only the last CS in this staged submission will get a completion.
405          * We must increment the reference for all other CS's in this
406          * staged submission.
407          * Once we get a completion we will release the whole staged submission.
408          */
409         if (!cs->staged_last)
410                 cs_get(cs);
411 }
412
413 /*
414  * staged_cs_put - put a CS in case it is part of staged submission
415  *
416  * @hdev: pointer to device structure
417  * @cs: CS to put
418  *
419  * This function decrements a CS reference (for a non completion CS)
420  */
421 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
422 {
423         /* We release all CS's in a staged submission except the last
424          * CS which we have never incremented its reference.
425          */
426         if (!cs_needs_completion(cs))
427                 cs_put(cs);
428 }
429
430 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
431 {
432         bool next_entry_found = false;
433         struct hl_cs *next;
434
435         if (!cs_needs_timeout(cs))
436                 return;
437
438         spin_lock(&hdev->cs_mirror_lock);
439
440         /* We need to handle tdr only once for the complete staged submission.
441          * Hence, we choose the CS that reaches this function first which is
442          * the CS marked as 'staged_last'.
443          */
444         if (cs->staged_cs && cs->staged_last)
445                 cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
446
447         spin_unlock(&hdev->cs_mirror_lock);
448
449         /* Don't cancel TDR in case this CS was timedout because we might be
450          * running from the TDR context
451          */
452         if (cs && (cs->timedout ||
453                         hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
454                 return;
455
456         if (cs && cs->tdr_active)
457                 cancel_delayed_work_sync(&cs->work_tdr);
458
459         spin_lock(&hdev->cs_mirror_lock);
460
461         /* queue TDR for next CS */
462         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
463                 if (cs_needs_timeout(next)) {
464                         next_entry_found = true;
465                         break;
466                 }
467
468         if (next_entry_found && !next->tdr_active) {
469                 next->tdr_active = true;
470                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
471         }
472
473         spin_unlock(&hdev->cs_mirror_lock);
474 }
475
476 static void cs_do_release(struct kref *ref)
477 {
478         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
479         struct hl_device *hdev = cs->ctx->hdev;
480         struct hl_cs_job *job, *tmp;
481
482         cs->completed = true;
483
484         /*
485          * Although if we reached here it means that all external jobs have
486          * finished, because each one of them took refcnt to CS, we still
487          * need to go over the internal jobs and complete them. Otherwise, we
488          * will have leaked memory and what's worse, the CS object (and
489          * potentially the CTX object) could be released, while the JOB
490          * still holds a pointer to them (but no reference).
491          */
492         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
493                 complete_job(hdev, job);
494
495         if (!cs->submitted) {
496                 /* In case the wait for signal CS was submitted, the put occurs
497                  * in init_signal_wait_cs() or collective_wait_init_cs()
498                  * right before hanging on the PQ.
499                  */
500                 if (cs->type == CS_TYPE_WAIT ||
501                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
502                         hl_fence_put(cs->signal_fence);
503
504                 goto out;
505         }
506
507         /* Need to update CI for all queue jobs that does not get completion */
508         hl_hw_queue_update_ci(cs);
509
510         /* remove CS from CS mirror list */
511         spin_lock(&hdev->cs_mirror_lock);
512         list_del_init(&cs->mirror_node);
513         spin_unlock(&hdev->cs_mirror_lock);
514
515         cs_handle_tdr(hdev, cs);
516
517         if (cs->staged_cs) {
518                 /* the completion CS decrements reference for the entire
519                  * staged submission
520                  */
521                 if (cs->staged_last) {
522                         struct hl_cs *staged_cs, *tmp;
523
524                         list_for_each_entry_safe(staged_cs, tmp,
525                                         &cs->staged_cs_node, staged_cs_node)
526                                 staged_cs_put(hdev, staged_cs);
527                 }
528
529                 /* A staged CS will be a member in the list only after it
530                  * was submitted. We used 'cs_mirror_lock' when inserting
531                  * it to list so we will use it again when removing it
532                  */
533                 if (cs->submitted) {
534                         spin_lock(&hdev->cs_mirror_lock);
535                         list_del(&cs->staged_cs_node);
536                         spin_unlock(&hdev->cs_mirror_lock);
537                 }
538         }
539
540 out:
541         /* Must be called before hl_ctx_put because inside we use ctx to get
542          * the device
543          */
544         hl_debugfs_remove_cs(cs);
545
546         hl_ctx_put(cs->ctx);
547
548         /* We need to mark an error for not submitted because in that case
549          * the hl fence release flow is different. Mainly, we don't need
550          * to handle hw_sob for signal/wait
551          */
552         if (cs->timedout)
553                 cs->fence->error = -ETIMEDOUT;
554         else if (cs->aborted)
555                 cs->fence->error = -EIO;
556         else if (!cs->submitted)
557                 cs->fence->error = -EBUSY;
558
559         if (cs->timestamp)
560                 cs->fence->timestamp = ktime_get();
561         complete_all(&cs->fence->completion);
562         hl_fence_put(cs->fence);
563
564         kfree(cs->jobs_in_queue_cnt);
565         kfree(cs);
566 }
567
568 static void cs_timedout(struct work_struct *work)
569 {
570         struct hl_device *hdev;
571         int rc;
572         struct hl_cs *cs = container_of(work, struct hl_cs,
573                                                  work_tdr.work);
574         rc = cs_get_unless_zero(cs);
575         if (!rc)
576                 return;
577
578         if ((!cs->submitted) || (cs->completed)) {
579                 cs_put(cs);
580                 return;
581         }
582
583         /* Mark the CS is timed out so we won't try to cancel its TDR */
584         cs->timedout = true;
585
586         hdev = cs->ctx->hdev;
587
588         switch (cs->type) {
589         case CS_TYPE_SIGNAL:
590                 dev_err(hdev->dev,
591                         "Signal command submission %llu has not finished in time!\n",
592                         cs->sequence);
593                 break;
594
595         case CS_TYPE_WAIT:
596                 dev_err(hdev->dev,
597                         "Wait command submission %llu has not finished in time!\n",
598                         cs->sequence);
599                 break;
600
601         case CS_TYPE_COLLECTIVE_WAIT:
602                 dev_err(hdev->dev,
603                         "Collective Wait command submission %llu has not finished in time!\n",
604                         cs->sequence);
605                 break;
606
607         default:
608                 dev_err(hdev->dev,
609                         "Command submission %llu has not finished in time!\n",
610                         cs->sequence);
611                 break;
612         }
613
614         cs_put(cs);
615
616         if (hdev->reset_on_lockup)
617                 hl_device_reset(hdev, 0);
618         else
619                 hdev->needs_reset = true;
620 }
621
622 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
623                         enum hl_cs_type cs_type, u64 user_sequence,
624                         struct hl_cs **cs_new, u32 flags, u32 timeout)
625 {
626         struct hl_cs_counters_atomic *cntr;
627         struct hl_fence *other = NULL;
628         struct hl_cs_compl *cs_cmpl;
629         struct hl_cs *cs;
630         int rc;
631
632         cntr = &hdev->aggregated_cs_counters;
633
634         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
635         if (!cs)
636                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
637
638         if (!cs) {
639                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
640                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
641                 return -ENOMEM;
642         }
643
644         /* increment refcnt for context */
645         hl_ctx_get(hdev, ctx);
646
647         cs->ctx = ctx;
648         cs->submitted = false;
649         cs->completed = false;
650         cs->type = cs_type;
651         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
652         cs->timeout_jiffies = timeout;
653         INIT_LIST_HEAD(&cs->job_list);
654         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
655         kref_init(&cs->refcount);
656         spin_lock_init(&cs->job_lock);
657
658         cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
659         if (!cs_cmpl)
660                 cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
661
662         if (!cs_cmpl) {
663                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
664                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
665                 rc = -ENOMEM;
666                 goto free_cs;
667         }
668
669         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
670                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
671         if (!cs->jobs_in_queue_cnt)
672                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
673                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
674
675         if (!cs->jobs_in_queue_cnt) {
676                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
677                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
678                 rc = -ENOMEM;
679                 goto free_cs_cmpl;
680         }
681
682         cs_cmpl->hdev = hdev;
683         cs_cmpl->type = cs->type;
684         spin_lock_init(&cs_cmpl->lock);
685         INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
686         cs->fence = &cs_cmpl->base_fence;
687
688         spin_lock(&ctx->cs_lock);
689
690         cs_cmpl->cs_seq = ctx->cs_sequence;
691         other = ctx->cs_pending[cs_cmpl->cs_seq &
692                                 (hdev->asic_prop.max_pending_cs - 1)];
693
694         if (other && !completion_done(&other->completion)) {
695                 /* If the following statement is true, it means we have reached
696                  * a point in which only part of the staged submission was
697                  * submitted and we don't have enough room in the 'cs_pending'
698                  * array for the rest of the submission.
699                  * This causes a deadlock because this CS will never be
700                  * completed as it depends on future CS's for completion.
701                  */
702                 if (other->cs_sequence == user_sequence)
703                         dev_crit_ratelimited(hdev->dev,
704                                 "Staged CS %llu deadlock due to lack of resources",
705                                 user_sequence);
706
707                 dev_dbg_ratelimited(hdev->dev,
708                         "Rejecting CS because of too many in-flights CS\n");
709                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
710                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
711                 rc = -EAGAIN;
712                 goto free_fence;
713         }
714
715         /* init hl_fence */
716         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
717
718         cs->sequence = cs_cmpl->cs_seq;
719
720         ctx->cs_pending[cs_cmpl->cs_seq &
721                         (hdev->asic_prop.max_pending_cs - 1)] =
722                                                         &cs_cmpl->base_fence;
723         ctx->cs_sequence++;
724
725         hl_fence_get(&cs_cmpl->base_fence);
726
727         hl_fence_put(other);
728
729         spin_unlock(&ctx->cs_lock);
730
731         *cs_new = cs;
732
733         return 0;
734
735 free_fence:
736         spin_unlock(&ctx->cs_lock);
737         kfree(cs->jobs_in_queue_cnt);
738 free_cs_cmpl:
739         kfree(cs_cmpl);
740 free_cs:
741         kfree(cs);
742         hl_ctx_put(ctx);
743         return rc;
744 }
745
746 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
747 {
748         struct hl_cs_job *job, *tmp;
749
750         staged_cs_put(hdev, cs);
751
752         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
753                 complete_job(hdev, job);
754 }
755
756 void hl_cs_rollback_all(struct hl_device *hdev)
757 {
758         int i;
759         struct hl_cs *cs, *tmp;
760
761         flush_workqueue(hdev->sob_reset_wq);
762
763         /* flush all completions before iterating over the CS mirror list in
764          * order to avoid a race with the release functions
765          */
766         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
767                 flush_workqueue(hdev->cq_wq[i]);
768
769         /* Make sure we don't have leftovers in the CS mirror list */
770         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
771                 cs_get(cs);
772                 cs->aborted = true;
773                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
774                                 cs->ctx->asid, cs->sequence);
775                 cs_rollback(hdev, cs);
776                 cs_put(cs);
777         }
778 }
779
780 void hl_pending_cb_list_flush(struct hl_ctx *ctx)
781 {
782         struct hl_pending_cb *pending_cb, *tmp;
783
784         list_for_each_entry_safe(pending_cb, tmp,
785                         &ctx->pending_cb_list, cb_node) {
786                 list_del(&pending_cb->cb_node);
787                 hl_cb_put(pending_cb->cb);
788                 kfree(pending_cb);
789         }
790 }
791
792 static void
793 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
794 {
795         struct hl_user_pending_interrupt *pend;
796
797         spin_lock(&interrupt->wait_list_lock);
798         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
799                 pend->fence.error = -EIO;
800                 complete_all(&pend->fence.completion);
801         }
802         spin_unlock(&interrupt->wait_list_lock);
803 }
804
805 void hl_release_pending_user_interrupts(struct hl_device *hdev)
806 {
807         struct asic_fixed_properties *prop = &hdev->asic_prop;
808         struct hl_user_interrupt *interrupt;
809         int i;
810
811         if (!prop->user_interrupt_count)
812                 return;
813
814         /* We iterate through the user interrupt requests and waking up all
815          * user threads waiting for interrupt completion. We iterate the
816          * list under a lock, this is why all user threads, once awake,
817          * will wait on the same lock and will release the waiting object upon
818          * unlock.
819          */
820
821         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
822                 interrupt = &hdev->user_interrupt[i];
823                 wake_pending_user_interrupt_threads(interrupt);
824         }
825
826         interrupt = &hdev->common_user_interrupt;
827         wake_pending_user_interrupt_threads(interrupt);
828 }
829
830 static void job_wq_completion(struct work_struct *work)
831 {
832         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
833                                                 finish_work);
834         struct hl_cs *cs = job->cs;
835         struct hl_device *hdev = cs->ctx->hdev;
836
837         /* job is no longer needed */
838         complete_job(hdev, job);
839 }
840
841 static int validate_queue_index(struct hl_device *hdev,
842                                 struct hl_cs_chunk *chunk,
843                                 enum hl_queue_type *queue_type,
844                                 bool *is_kernel_allocated_cb)
845 {
846         struct asic_fixed_properties *asic = &hdev->asic_prop;
847         struct hw_queue_properties *hw_queue_prop;
848
849         /* This must be checked here to prevent out-of-bounds access to
850          * hw_queues_props array
851          */
852         if (chunk->queue_index >= asic->max_queues) {
853                 dev_err(hdev->dev, "Queue index %d is invalid\n",
854                         chunk->queue_index);
855                 return -EINVAL;
856         }
857
858         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
859
860         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
861                 dev_err(hdev->dev, "Queue index %d is invalid\n",
862                         chunk->queue_index);
863                 return -EINVAL;
864         }
865
866         if (hw_queue_prop->driver_only) {
867                 dev_err(hdev->dev,
868                         "Queue index %d is restricted for the kernel driver\n",
869                         chunk->queue_index);
870                 return -EINVAL;
871         }
872
873         /* When hw queue type isn't QUEUE_TYPE_HW,
874          * USER_ALLOC_CB flag shall be referred as "don't care".
875          */
876         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
877                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
878                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
879                                 dev_err(hdev->dev,
880                                         "Queue index %d doesn't support user CB\n",
881                                         chunk->queue_index);
882                                 return -EINVAL;
883                         }
884
885                         *is_kernel_allocated_cb = false;
886                 } else {
887                         if (!(hw_queue_prop->cb_alloc_flags &
888                                         CB_ALLOC_KERNEL)) {
889                                 dev_err(hdev->dev,
890                                         "Queue index %d doesn't support kernel CB\n",
891                                         chunk->queue_index);
892                                 return -EINVAL;
893                         }
894
895                         *is_kernel_allocated_cb = true;
896                 }
897         } else {
898                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
899                                                 & CB_ALLOC_KERNEL);
900         }
901
902         *queue_type = hw_queue_prop->type;
903         return 0;
904 }
905
906 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
907                                         struct hl_cb_mgr *cb_mgr,
908                                         struct hl_cs_chunk *chunk)
909 {
910         struct hl_cb *cb;
911         u32 cb_handle;
912
913         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
914
915         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
916         if (!cb) {
917                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
918                 return NULL;
919         }
920
921         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
922                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
923                 goto release_cb;
924         }
925
926         atomic_inc(&cb->cs_cnt);
927
928         return cb;
929
930 release_cb:
931         hl_cb_put(cb);
932         return NULL;
933 }
934
935 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
936                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
937 {
938         struct hl_cs_job *job;
939
940         job = kzalloc(sizeof(*job), GFP_ATOMIC);
941         if (!job)
942                 job = kzalloc(sizeof(*job), GFP_KERNEL);
943
944         if (!job)
945                 return NULL;
946
947         kref_init(&job->refcount);
948         job->queue_type = queue_type;
949         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
950
951         if (is_cb_patched(hdev, job))
952                 INIT_LIST_HEAD(&job->userptr_list);
953
954         if (job->queue_type == QUEUE_TYPE_EXT)
955                 INIT_WORK(&job->finish_work, job_wq_completion);
956
957         return job;
958 }
959
960 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
961 {
962         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
963                 return CS_TYPE_SIGNAL;
964         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
965                 return CS_TYPE_WAIT;
966         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
967                 return CS_TYPE_COLLECTIVE_WAIT;
968         else
969                 return CS_TYPE_DEFAULT;
970 }
971
972 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
973 {
974         struct hl_device *hdev = hpriv->hdev;
975         struct hl_ctx *ctx = hpriv->ctx;
976         u32 cs_type_flags, num_chunks;
977         enum hl_device_status status;
978         enum hl_cs_type cs_type;
979
980         if (!hl_device_operational(hdev, &status)) {
981                 dev_warn_ratelimited(hdev->dev,
982                         "Device is %s. Can't submit new CS\n",
983                         hdev->status[status]);
984                 return -EBUSY;
985         }
986
987         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
988                         !hdev->supports_staged_submission) {
989                 dev_err(hdev->dev, "staged submission not supported");
990                 return -EPERM;
991         }
992
993         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
994
995         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
996                 dev_err(hdev->dev,
997                         "CS type flags are mutually exclusive, context %d\n",
998                         ctx->asid);
999                 return -EINVAL;
1000         }
1001
1002         cs_type = hl_cs_get_cs_type(cs_type_flags);
1003         num_chunks = args->in.num_chunks_execute;
1004
1005         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1006                                         !hdev->supports_sync_stream)) {
1007                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1008                 return -EINVAL;
1009         }
1010
1011         if (cs_type == CS_TYPE_DEFAULT) {
1012                 if (!num_chunks) {
1013                         dev_err(hdev->dev,
1014                                 "Got execute CS with 0 chunks, context %d\n",
1015                                 ctx->asid);
1016                         return -EINVAL;
1017                 }
1018         } else if (num_chunks != 1) {
1019                 dev_err(hdev->dev,
1020                         "Sync stream CS mandates one chunk only, context %d\n",
1021                         ctx->asid);
1022                 return -EINVAL;
1023         }
1024
1025         return 0;
1026 }
1027
1028 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1029                                         struct hl_cs_chunk **cs_chunk_array,
1030                                         void __user *chunks, u32 num_chunks,
1031                                         struct hl_ctx *ctx)
1032 {
1033         u32 size_to_copy;
1034
1035         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1036                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1037                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1038                 dev_err(hdev->dev,
1039                         "Number of chunks can NOT be larger than %d\n",
1040                         HL_MAX_JOBS_PER_CS);
1041                 return -EINVAL;
1042         }
1043
1044         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1045                                         GFP_ATOMIC);
1046         if (!*cs_chunk_array)
1047                 *cs_chunk_array = kmalloc_array(num_chunks,
1048                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1049         if (!*cs_chunk_array) {
1050                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1051                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1052                 return -ENOMEM;
1053         }
1054
1055         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1056         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1057                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1058                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1059                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1060                 kfree(*cs_chunk_array);
1061                 return -EFAULT;
1062         }
1063
1064         return 0;
1065 }
1066
1067 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1068                                 u64 sequence, u32 flags)
1069 {
1070         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1071                 return 0;
1072
1073         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1074         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1075
1076         if (cs->staged_first) {
1077                 /* Staged CS sequence is the first CS sequence */
1078                 INIT_LIST_HEAD(&cs->staged_cs_node);
1079                 cs->staged_sequence = cs->sequence;
1080         } else {
1081                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1082                  * under the cs_mirror_lock
1083                  */
1084                 cs->staged_sequence = sequence;
1085         }
1086
1087         /* Increment CS reference if needed */
1088         staged_cs_get(hdev, cs);
1089
1090         cs->staged_cs = true;
1091
1092         return 0;
1093 }
1094
1095 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1096                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1097                                 u32 timeout)
1098 {
1099         bool staged_mid, int_queues_only = true;
1100         struct hl_device *hdev = hpriv->hdev;
1101         struct hl_cs_chunk *cs_chunk_array;
1102         struct hl_cs_counters_atomic *cntr;
1103         struct hl_ctx *ctx = hpriv->ctx;
1104         struct hl_cs_job *job;
1105         struct hl_cs *cs;
1106         struct hl_cb *cb;
1107         u64 user_sequence;
1108         int rc, i;
1109
1110         cntr = &hdev->aggregated_cs_counters;
1111         user_sequence = *cs_seq;
1112         *cs_seq = ULLONG_MAX;
1113
1114         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1115                         hpriv->ctx);
1116         if (rc)
1117                 goto out;
1118
1119         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1120                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1121                 staged_mid = true;
1122         else
1123                 staged_mid = false;
1124
1125         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1126                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1127                         timeout);
1128         if (rc)
1129                 goto free_cs_chunk_array;
1130
1131         *cs_seq = cs->sequence;
1132
1133         hl_debugfs_add_cs(cs);
1134
1135         rc = cs_staged_submission(hdev, cs, user_sequence, flags);
1136         if (rc)
1137                 goto free_cs_object;
1138
1139         /* Validate ALL the CS chunks before submitting the CS */
1140         for (i = 0 ; i < num_chunks ; i++) {
1141                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1142                 enum hl_queue_type queue_type;
1143                 bool is_kernel_allocated_cb;
1144
1145                 rc = validate_queue_index(hdev, chunk, &queue_type,
1146                                                 &is_kernel_allocated_cb);
1147                 if (rc) {
1148                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1149                         atomic64_inc(&cntr->validation_drop_cnt);
1150                         goto free_cs_object;
1151                 }
1152
1153                 if (is_kernel_allocated_cb) {
1154                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1155                         if (!cb) {
1156                                 atomic64_inc(
1157                                         &ctx->cs_counters.validation_drop_cnt);
1158                                 atomic64_inc(&cntr->validation_drop_cnt);
1159                                 rc = -EINVAL;
1160                                 goto free_cs_object;
1161                         }
1162                 } else {
1163                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1164                 }
1165
1166                 if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
1167                         int_queues_only = false;
1168
1169                 job = hl_cs_allocate_job(hdev, queue_type,
1170                                                 is_kernel_allocated_cb);
1171                 if (!job) {
1172                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1173                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1174                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1175                         rc = -ENOMEM;
1176                         if (is_kernel_allocated_cb)
1177                                 goto release_cb;
1178
1179                         goto free_cs_object;
1180                 }
1181
1182                 job->id = i + 1;
1183                 job->cs = cs;
1184                 job->user_cb = cb;
1185                 job->user_cb_size = chunk->cb_size;
1186                 job->hw_queue_id = chunk->queue_index;
1187
1188                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1189
1190                 list_add_tail(&job->cs_node, &cs->job_list);
1191
1192                 /*
1193                  * Increment CS reference. When CS reference is 0, CS is
1194                  * done and can be signaled to user and free all its resources
1195                  * Only increment for JOB on external or H/W queues, because
1196                  * only for those JOBs we get completion
1197                  */
1198                 if (cs_needs_completion(cs) &&
1199                         (job->queue_type == QUEUE_TYPE_EXT ||
1200                                 job->queue_type == QUEUE_TYPE_HW))
1201                         cs_get(cs);
1202
1203                 hl_debugfs_add_job(hdev, job);
1204
1205                 rc = cs_parser(hpriv, job);
1206                 if (rc) {
1207                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1208                         atomic64_inc(&cntr->parsing_drop_cnt);
1209                         dev_err(hdev->dev,
1210                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1211                                 cs->ctx->asid, cs->sequence, job->id, rc);
1212                         goto free_cs_object;
1213                 }
1214         }
1215
1216         /* We allow a CS with any queue type combination as long as it does
1217          * not get a completion
1218          */
1219         if (int_queues_only && cs_needs_completion(cs)) {
1220                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1221                 atomic64_inc(&cntr->validation_drop_cnt);
1222                 dev_err(hdev->dev,
1223                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1224                         cs->ctx->asid, cs->sequence);
1225                 rc = -EINVAL;
1226                 goto free_cs_object;
1227         }
1228
1229         rc = hl_hw_queue_schedule_cs(cs);
1230         if (rc) {
1231                 if (rc != -EAGAIN)
1232                         dev_err(hdev->dev,
1233                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1234                                 cs->ctx->asid, cs->sequence, rc);
1235                 goto free_cs_object;
1236         }
1237
1238         rc = HL_CS_STATUS_SUCCESS;
1239         goto put_cs;
1240
1241 release_cb:
1242         atomic_dec(&cb->cs_cnt);
1243         hl_cb_put(cb);
1244 free_cs_object:
1245         cs_rollback(hdev, cs);
1246         *cs_seq = ULLONG_MAX;
1247         /* The path below is both for good and erroneous exits */
1248 put_cs:
1249         /* We finished with the CS in this function, so put the ref */
1250         cs_put(cs);
1251 free_cs_chunk_array:
1252         kfree(cs_chunk_array);
1253 out:
1254         return rc;
1255 }
1256
1257 static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
1258                 struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
1259 {
1260         struct hw_queue_properties *hw_queue_prop;
1261         struct hl_cs_counters_atomic *cntr;
1262         struct hl_cs_job *job;
1263
1264         hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
1265         cntr = &hdev->aggregated_cs_counters;
1266
1267         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1268         if (!job) {
1269                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1270                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1271                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1272                 return -ENOMEM;
1273         }
1274
1275         job->id = 0;
1276         job->cs = cs;
1277         job->user_cb = cb;
1278         atomic_inc(&job->user_cb->cs_cnt);
1279         job->user_cb_size = size;
1280         job->hw_queue_id = hw_queue_id;
1281         job->patched_cb = job->user_cb;
1282         job->job_cb_size = job->user_cb_size;
1283
1284         /* increment refcount as for external queues we get completion */
1285         cs_get(cs);
1286
1287         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1288
1289         list_add_tail(&job->cs_node, &cs->job_list);
1290
1291         hl_debugfs_add_job(hdev, job);
1292
1293         return 0;
1294 }
1295
1296 static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
1297 {
1298         struct hl_device *hdev = hpriv->hdev;
1299         struct hl_ctx *ctx = hpriv->ctx;
1300         struct hl_pending_cb *pending_cb, *tmp;
1301         struct list_head local_cb_list;
1302         struct hl_cs *cs;
1303         struct hl_cb *cb;
1304         u32 hw_queue_id;
1305         u32 cb_size;
1306         int process_list, rc = 0;
1307
1308         if (list_empty(&ctx->pending_cb_list))
1309                 return 0;
1310
1311         process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
1312
1313         /* Only a single thread is allowed to process the list */
1314         if (!process_list)
1315                 return 0;
1316
1317         if (list_empty(&ctx->pending_cb_list))
1318                 goto free_pending_cb_token;
1319
1320         /* move all list elements to a local list */
1321         INIT_LIST_HEAD(&local_cb_list);
1322         spin_lock(&ctx->pending_cb_lock);
1323         list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
1324                                                                 cb_node)
1325                 list_move_tail(&pending_cb->cb_node, &local_cb_list);
1326         spin_unlock(&ctx->pending_cb_lock);
1327
1328         rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
1329                                 hdev->timeout_jiffies);
1330         if (rc)
1331                 goto add_list_elements;
1332
1333         hl_debugfs_add_cs(cs);
1334
1335         /* Iterate through pending cb list, create jobs and add to CS */
1336         list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
1337                 cb = pending_cb->cb;
1338                 cb_size = pending_cb->cb_size;
1339                 hw_queue_id = pending_cb->hw_queue_id;
1340
1341                 rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
1342                                                                 hw_queue_id);
1343                 if (rc)
1344                         goto free_cs_object;
1345         }
1346
1347         rc = hl_hw_queue_schedule_cs(cs);
1348         if (rc) {
1349                 if (rc != -EAGAIN)
1350                         dev_err(hdev->dev,
1351                                 "Failed to submit CS %d.%llu (%d)\n",
1352                                 ctx->asid, cs->sequence, rc);
1353                 goto free_cs_object;
1354         }
1355
1356         /* pending cb was scheduled successfully */
1357         list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
1358                 list_del(&pending_cb->cb_node);
1359                 kfree(pending_cb);
1360         }
1361
1362         cs_put(cs);
1363
1364         goto free_pending_cb_token;
1365
1366 free_cs_object:
1367         cs_rollback(hdev, cs);
1368         cs_put(cs);
1369 add_list_elements:
1370         spin_lock(&ctx->pending_cb_lock);
1371         list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
1372                                                                 cb_node)
1373                 list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
1374         spin_unlock(&ctx->pending_cb_lock);
1375 free_pending_cb_token:
1376         atomic_set(&ctx->thread_pending_cb_token, 1);
1377
1378         return rc;
1379 }
1380
1381 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1382                                 u64 *cs_seq)
1383 {
1384         struct hl_device *hdev = hpriv->hdev;
1385         struct hl_ctx *ctx = hpriv->ctx;
1386         bool need_soft_reset = false;
1387         int rc = 0, do_ctx_switch;
1388         void __user *chunks;
1389         u32 num_chunks, tmp;
1390         int ret;
1391
1392         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1393
1394         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1395                 mutex_lock(&hpriv->restore_phase_mutex);
1396
1397                 if (do_ctx_switch) {
1398                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1399                         if (rc) {
1400                                 dev_err_ratelimited(hdev->dev,
1401                                         "Failed to switch to context %d, rejecting CS! %d\n",
1402                                         ctx->asid, rc);
1403                                 /*
1404                                  * If we timedout, or if the device is not IDLE
1405                                  * while we want to do context-switch (-EBUSY),
1406                                  * we need to soft-reset because QMAN is
1407                                  * probably stuck. However, we can't call to
1408                                  * reset here directly because of deadlock, so
1409                                  * need to do it at the very end of this
1410                                  * function
1411                                  */
1412                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1413                                         need_soft_reset = true;
1414                                 mutex_unlock(&hpriv->restore_phase_mutex);
1415                                 goto out;
1416                         }
1417                 }
1418
1419                 hdev->asic_funcs->restore_phase_topology(hdev);
1420
1421                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1422                 num_chunks = args->in.num_chunks_restore;
1423
1424                 if (!num_chunks) {
1425                         dev_dbg(hdev->dev,
1426                                 "Need to run restore phase but restore CS is empty\n");
1427                         rc = 0;
1428                 } else {
1429                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1430                                         cs_seq, 0, hdev->timeout_jiffies);
1431                 }
1432
1433                 mutex_unlock(&hpriv->restore_phase_mutex);
1434
1435                 if (rc) {
1436                         dev_err(hdev->dev,
1437                                 "Failed to submit restore CS for context %d (%d)\n",
1438                                 ctx->asid, rc);
1439                         goto out;
1440                 }
1441
1442                 /* Need to wait for restore completion before execution phase */
1443                 if (num_chunks) {
1444                         enum hl_cs_wait_status status;
1445 wait_again:
1446                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1447                                         jiffies_to_usecs(hdev->timeout_jiffies),
1448                                         *cs_seq, &status, NULL);
1449                         if (ret) {
1450                                 if (ret == -ERESTARTSYS) {
1451                                         usleep_range(100, 200);
1452                                         goto wait_again;
1453                                 }
1454
1455                                 dev_err(hdev->dev,
1456                                         "Restore CS for context %d failed to complete %d\n",
1457                                         ctx->asid, ret);
1458                                 rc = -ENOEXEC;
1459                                 goto out;
1460                         }
1461                 }
1462
1463                 ctx->thread_ctx_switch_wait_token = 1;
1464
1465         } else if (!ctx->thread_ctx_switch_wait_token) {
1466                 rc = hl_poll_timeout_memory(hdev,
1467                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1468                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1469
1470                 if (rc == -ETIMEDOUT) {
1471                         dev_err(hdev->dev,
1472                                 "context switch phase timeout (%d)\n", tmp);
1473                         goto out;
1474                 }
1475         }
1476
1477 out:
1478         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1479                 hl_device_reset(hdev, 0);
1480
1481         return rc;
1482 }
1483
1484 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1485                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
1486 {
1487         u64 *signal_seq_arr = NULL;
1488         u32 size_to_copy, signal_seq_arr_len;
1489         int rc = 0;
1490
1491         signal_seq_arr_len = chunk->num_signal_seq_arr;
1492
1493         /* currently only one signal seq is supported */
1494         if (signal_seq_arr_len != 1) {
1495                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1496                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1497                 dev_err(hdev->dev,
1498                         "Wait for signal CS supports only one signal CS seq\n");
1499                 return -EINVAL;
1500         }
1501
1502         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1503                                         sizeof(*signal_seq_arr),
1504                                         GFP_ATOMIC);
1505         if (!signal_seq_arr)
1506                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1507                                         sizeof(*signal_seq_arr),
1508                                         GFP_KERNEL);
1509         if (!signal_seq_arr) {
1510                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1511                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1512                 return -ENOMEM;
1513         }
1514
1515         size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
1516         if (copy_from_user(signal_seq_arr,
1517                                 u64_to_user_ptr(chunk->signal_seq_arr),
1518                                 size_to_copy)) {
1519                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1520                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1521                 dev_err(hdev->dev,
1522                         "Failed to copy signal seq array from user\n");
1523                 rc = -EFAULT;
1524                 goto out;
1525         }
1526
1527         /* currently it is guaranteed to have only one signal seq */
1528         *signal_seq = signal_seq_arr[0];
1529
1530 out:
1531         kfree(signal_seq_arr);
1532
1533         return rc;
1534 }
1535
1536 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1537                 struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
1538                 u32 q_idx)
1539 {
1540         struct hl_cs_counters_atomic *cntr;
1541         struct hl_cs_job *job;
1542         struct hl_cb *cb;
1543         u32 cb_size;
1544
1545         cntr = &hdev->aggregated_cs_counters;
1546
1547         job = hl_cs_allocate_job(hdev, q_type, true);
1548         if (!job) {
1549                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1550                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1551                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1552                 return -ENOMEM;
1553         }
1554
1555         if (cs->type == CS_TYPE_WAIT)
1556                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1557         else
1558                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1559
1560         cb = hl_cb_kernel_create(hdev, cb_size,
1561                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1562         if (!cb) {
1563                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1564                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1565                 kfree(job);
1566                 return -EFAULT;
1567         }
1568
1569         job->id = 0;
1570         job->cs = cs;
1571         job->user_cb = cb;
1572         atomic_inc(&job->user_cb->cs_cnt);
1573         job->user_cb_size = cb_size;
1574         job->hw_queue_id = q_idx;
1575
1576         /*
1577          * No need in parsing, user CB is the patched CB.
1578          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1579          * the CB idr anymore and to decrement its refcount as it was
1580          * incremented inside hl_cb_kernel_create().
1581          */
1582         job->patched_cb = job->user_cb;
1583         job->job_cb_size = job->user_cb_size;
1584         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1585
1586         /* increment refcount as for external queues we get completion */
1587         cs_get(cs);
1588
1589         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1590
1591         list_add_tail(&job->cs_node, &cs->job_list);
1592
1593         hl_debugfs_add_job(hdev, job);
1594
1595         return 0;
1596 }
1597
1598 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1599                                 void __user *chunks, u32 num_chunks,
1600                                 u64 *cs_seq, u32 flags, u32 timeout)
1601 {
1602         struct hl_cs_chunk *cs_chunk_array, *chunk;
1603         struct hw_queue_properties *hw_queue_prop;
1604         struct hl_device *hdev = hpriv->hdev;
1605         struct hl_cs_compl *sig_waitcs_cmpl;
1606         u32 q_idx, collective_engine_id = 0;
1607         struct hl_cs_counters_atomic *cntr;
1608         struct hl_fence *sig_fence = NULL;
1609         struct hl_ctx *ctx = hpriv->ctx;
1610         enum hl_queue_type q_type;
1611         struct hl_cs *cs;
1612         u64 signal_seq;
1613         int rc;
1614
1615         cntr = &hdev->aggregated_cs_counters;
1616         *cs_seq = ULLONG_MAX;
1617
1618         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1619                         ctx);
1620         if (rc)
1621                 goto out;
1622
1623         /* currently it is guaranteed to have only one chunk */
1624         chunk = &cs_chunk_array[0];
1625
1626         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1627                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1628                 atomic64_inc(&cntr->validation_drop_cnt);
1629                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1630                         chunk->queue_index);
1631                 rc = -EINVAL;
1632                 goto free_cs_chunk_array;
1633         }
1634
1635         q_idx = chunk->queue_index;
1636         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1637         q_type = hw_queue_prop->type;
1638
1639         if (!hw_queue_prop->supports_sync_stream) {
1640                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1641                 atomic64_inc(&cntr->validation_drop_cnt);
1642                 dev_err(hdev->dev,
1643                         "Queue index %d does not support sync stream operations\n",
1644                         q_idx);
1645                 rc = -EINVAL;
1646                 goto free_cs_chunk_array;
1647         }
1648
1649         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1650                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1651                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1652                         atomic64_inc(&cntr->validation_drop_cnt);
1653                         dev_err(hdev->dev,
1654                                 "Queue index %d is invalid\n", q_idx);
1655                         rc = -EINVAL;
1656                         goto free_cs_chunk_array;
1657                 }
1658
1659                 collective_engine_id = chunk->collective_engine_id;
1660         }
1661
1662         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1663                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
1664                 if (rc)
1665                         goto free_cs_chunk_array;
1666
1667                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
1668                 if (IS_ERR(sig_fence)) {
1669                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1670                         atomic64_inc(&cntr->validation_drop_cnt);
1671                         dev_err(hdev->dev,
1672                                 "Failed to get signal CS with seq 0x%llx\n",
1673                                 signal_seq);
1674                         rc = PTR_ERR(sig_fence);
1675                         goto free_cs_chunk_array;
1676                 }
1677
1678                 if (!sig_fence) {
1679                         /* signal CS already finished */
1680                         rc = 0;
1681                         goto free_cs_chunk_array;
1682                 }
1683
1684                 sig_waitcs_cmpl =
1685                         container_of(sig_fence, struct hl_cs_compl, base_fence);
1686
1687                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
1688                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1689                         atomic64_inc(&cntr->validation_drop_cnt);
1690                         dev_err(hdev->dev,
1691                                 "CS seq 0x%llx is not of a signal CS\n",
1692                                 signal_seq);
1693                         hl_fence_put(sig_fence);
1694                         rc = -EINVAL;
1695                         goto free_cs_chunk_array;
1696                 }
1697
1698                 if (completion_done(&sig_fence->completion)) {
1699                         /* signal CS already finished */
1700                         hl_fence_put(sig_fence);
1701                         rc = 0;
1702                         goto free_cs_chunk_array;
1703                 }
1704         }
1705
1706         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
1707         if (rc) {
1708                 if (cs_type == CS_TYPE_WAIT ||
1709                         cs_type == CS_TYPE_COLLECTIVE_WAIT)
1710                         hl_fence_put(sig_fence);
1711                 goto free_cs_chunk_array;
1712         }
1713
1714         /*
1715          * Save the signal CS fence for later initialization right before
1716          * hanging the wait CS on the queue.
1717          */
1718         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
1719                 cs->signal_fence = sig_fence;
1720
1721         hl_debugfs_add_cs(cs);
1722
1723         *cs_seq = cs->sequence;
1724
1725         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
1726                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
1727                                 q_idx);
1728         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
1729                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
1730                                 cs, q_idx, collective_engine_id);
1731         else {
1732                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1733                 atomic64_inc(&cntr->validation_drop_cnt);
1734                 rc = -EINVAL;
1735         }
1736
1737         if (rc)
1738                 goto free_cs_object;
1739
1740         rc = hl_hw_queue_schedule_cs(cs);
1741         if (rc) {
1742                 if (rc != -EAGAIN)
1743                         dev_err(hdev->dev,
1744                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1745                                 ctx->asid, cs->sequence, rc);
1746                 goto free_cs_object;
1747         }
1748
1749         rc = HL_CS_STATUS_SUCCESS;
1750         goto put_cs;
1751
1752 free_cs_object:
1753         cs_rollback(hdev, cs);
1754         *cs_seq = ULLONG_MAX;
1755         /* The path below is both for good and erroneous exits */
1756 put_cs:
1757         /* We finished with the CS in this function, so put the ref */
1758         cs_put(cs);
1759 free_cs_chunk_array:
1760         kfree(cs_chunk_array);
1761 out:
1762         return rc;
1763 }
1764
1765 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
1766 {
1767         union hl_cs_args *args = data;
1768         enum hl_cs_type cs_type;
1769         u64 cs_seq = ULONG_MAX;
1770         void __user *chunks;
1771         u32 num_chunks, flags, timeout;
1772         int rc;
1773
1774         rc = hl_cs_sanity_checks(hpriv, args);
1775         if (rc)
1776                 goto out;
1777
1778         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
1779         if (rc)
1780                 goto out;
1781
1782         rc = hl_submit_pending_cb(hpriv);
1783         if (rc)
1784                 goto out;
1785
1786         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
1787                                         ~HL_CS_FLAGS_FORCE_RESTORE);
1788         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
1789         num_chunks = args->in.num_chunks_execute;
1790         flags = args->in.cs_flags;
1791
1792         /* In case this is a staged CS, user should supply the CS sequence */
1793         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1794                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1795                 cs_seq = args->in.seq;
1796
1797         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
1798                         ? msecs_to_jiffies(args->in.timeout * 1000)
1799                         : hpriv->hdev->timeout_jiffies;
1800
1801         switch (cs_type) {
1802         case CS_TYPE_SIGNAL:
1803         case CS_TYPE_WAIT:
1804         case CS_TYPE_COLLECTIVE_WAIT:
1805                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
1806                                         &cs_seq, args->in.cs_flags, timeout);
1807                 break;
1808         default:
1809                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
1810                                                 args->in.cs_flags, timeout);
1811                 break;
1812         }
1813
1814 out:
1815         if (rc != -EAGAIN) {
1816                 memset(args, 0, sizeof(*args));
1817                 args->out.status = rc;
1818                 args->out.seq = cs_seq;
1819         }
1820
1821         return rc;
1822 }
1823
1824 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1825                                 u64 timeout_us, u64 seq,
1826                                 enum hl_cs_wait_status *status, s64 *timestamp)
1827 {
1828         struct hl_fence *fence;
1829         unsigned long timeout;
1830         int rc = 0;
1831         long completion_rc;
1832
1833         if (timestamp)
1834                 *timestamp = 0;
1835
1836         if (timeout_us == MAX_SCHEDULE_TIMEOUT)
1837                 timeout = timeout_us;
1838         else
1839                 timeout = usecs_to_jiffies(timeout_us);
1840
1841         hl_ctx_get(hdev, ctx);
1842
1843         fence = hl_ctx_get_fence(ctx, seq);
1844         if (IS_ERR(fence)) {
1845                 rc = PTR_ERR(fence);
1846                 if (rc == -EINVAL)
1847                         dev_notice_ratelimited(hdev->dev,
1848                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
1849                                 seq, ctx->cs_sequence);
1850         } else if (fence) {
1851                 if (!timeout_us)
1852                         completion_rc = completion_done(&fence->completion);
1853                 else
1854                         completion_rc =
1855                                 wait_for_completion_interruptible_timeout(
1856                                         &fence->completion, timeout);
1857
1858                 if (completion_rc > 0) {
1859                         *status = CS_WAIT_STATUS_COMPLETED;
1860                         if (timestamp)
1861                                 *timestamp = ktime_to_ns(fence->timestamp);
1862                 } else {
1863                         *status = CS_WAIT_STATUS_BUSY;
1864                 }
1865
1866                 if (fence->error == -ETIMEDOUT)
1867                         rc = -ETIMEDOUT;
1868                 else if (fence->error == -EIO)
1869                         rc = -EIO;
1870
1871                 hl_fence_put(fence);
1872         } else {
1873                 dev_dbg(hdev->dev,
1874                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
1875                         seq, ctx->cs_sequence);
1876                 *status = CS_WAIT_STATUS_GONE;
1877         }
1878
1879         hl_ctx_put(ctx);
1880
1881         return rc;
1882 }
1883
1884 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
1885 {
1886         struct hl_device *hdev = hpriv->hdev;
1887         union hl_wait_cs_args *args = data;
1888         enum hl_cs_wait_status status;
1889         u64 seq = args->in.seq;
1890         s64 timestamp;
1891         int rc;
1892
1893         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
1894                                 &status, &timestamp);
1895
1896         memset(args, 0, sizeof(*args));
1897
1898         if (rc) {
1899                 if (rc == -ERESTARTSYS) {
1900                         dev_err_ratelimited(hdev->dev,
1901                                 "user process got signal while waiting for CS handle %llu\n",
1902                                 seq);
1903                         args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
1904                         rc = -EINTR;
1905                 } else if (rc == -ETIMEDOUT) {
1906                         dev_err_ratelimited(hdev->dev,
1907                                 "CS %llu has timed-out while user process is waiting for it\n",
1908                                 seq);
1909                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
1910                 } else if (rc == -EIO) {
1911                         dev_err_ratelimited(hdev->dev,
1912                                 "CS %llu has been aborted while user process is waiting for it\n",
1913                                 seq);
1914                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
1915                 }
1916                 return rc;
1917         }
1918
1919         if (timestamp) {
1920                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
1921                 args->out.timestamp_nsec = timestamp;
1922         }
1923
1924         switch (status) {
1925         case CS_WAIT_STATUS_GONE:
1926                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
1927                 fallthrough;
1928         case CS_WAIT_STATUS_COMPLETED:
1929                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
1930                 break;
1931         case CS_WAIT_STATUS_BUSY:
1932         default:
1933                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
1934                 break;
1935         }
1936
1937         return 0;
1938 }
1939
1940 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1941                                 u32 timeout_us, u64 user_address,
1942                                 u32 target_value, u16 interrupt_offset,
1943                                 enum hl_cs_wait_status *status)
1944 {
1945         struct hl_user_pending_interrupt *pend;
1946         struct hl_user_interrupt *interrupt;
1947         unsigned long timeout;
1948         long completion_rc;
1949         u32 completion_value;
1950         int rc = 0;
1951
1952         if (timeout_us == U32_MAX)
1953                 timeout = timeout_us;
1954         else
1955                 timeout = usecs_to_jiffies(timeout_us);
1956
1957         hl_ctx_get(hdev, ctx);
1958
1959         pend = kmalloc(sizeof(*pend), GFP_KERNEL);
1960         if (!pend) {
1961                 hl_ctx_put(ctx);
1962                 return -ENOMEM;
1963         }
1964
1965         hl_fence_init(&pend->fence, ULONG_MAX);
1966
1967         if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
1968                 interrupt = &hdev->common_user_interrupt;
1969         else
1970                 interrupt = &hdev->user_interrupt[interrupt_offset];
1971
1972         spin_lock(&interrupt->wait_list_lock);
1973         if (!hl_device_operational(hdev, NULL)) {
1974                 rc = -EPERM;
1975                 goto unlock_and_free_fence;
1976         }
1977
1978         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
1979                 dev_err(hdev->dev,
1980                         "Failed to copy completion value from user\n");
1981                 rc = -EFAULT;
1982                 goto unlock_and_free_fence;
1983         }
1984
1985         if (completion_value >= target_value)
1986                 *status = CS_WAIT_STATUS_COMPLETED;
1987         else
1988                 *status = CS_WAIT_STATUS_BUSY;
1989
1990         if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
1991                 goto unlock_and_free_fence;
1992
1993         /* Add pending user interrupt to relevant list for the interrupt
1994          * handler to monitor
1995          */
1996         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
1997         spin_unlock(&interrupt->wait_list_lock);
1998
1999 wait_again:
2000         /* Wait for interrupt handler to signal completion */
2001         completion_rc =
2002                 wait_for_completion_interruptible_timeout(
2003                                 &pend->fence.completion, timeout);
2004
2005         /* If timeout did not expire we need to perform the comparison.
2006          * If comparison fails, keep waiting until timeout expires
2007          */
2008         if (completion_rc > 0) {
2009                 if (copy_from_user(&completion_value,
2010                                 u64_to_user_ptr(user_address), 4)) {
2011                         dev_err(hdev->dev,
2012                                 "Failed to copy completion value from user\n");
2013                         rc = -EFAULT;
2014                         goto remove_pending_user_interrupt;
2015                 }
2016
2017                 if (completion_value >= target_value) {
2018                         *status = CS_WAIT_STATUS_COMPLETED;
2019                 } else {
2020                         timeout = completion_rc;
2021                         goto wait_again;
2022                 }
2023         } else {
2024                 *status = CS_WAIT_STATUS_BUSY;
2025         }
2026
2027 remove_pending_user_interrupt:
2028         spin_lock(&interrupt->wait_list_lock);
2029         list_del(&pend->wait_list_node);
2030
2031 unlock_and_free_fence:
2032         spin_unlock(&interrupt->wait_list_lock);
2033         kfree(pend);
2034         hl_ctx_put(ctx);
2035
2036         return rc;
2037 }
2038
2039 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2040 {
2041         u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
2042         struct hl_device *hdev = hpriv->hdev;
2043         struct asic_fixed_properties *prop;
2044         union hl_wait_cs_args *args = data;
2045         enum hl_cs_wait_status status;
2046         int rc;
2047
2048         prop = &hdev->asic_prop;
2049
2050         if (!prop->user_interrupt_count) {
2051                 dev_err(hdev->dev, "no user interrupts allowed");
2052                 return -EPERM;
2053         }
2054
2055         interrupt_id =
2056                 FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
2057
2058         first_interrupt = prop->first_available_user_msix_interrupt;
2059         last_interrupt = prop->first_available_user_msix_interrupt +
2060                                                 prop->user_interrupt_count - 1;
2061
2062         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
2063                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
2064                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
2065                 return -EINVAL;
2066         }
2067
2068         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
2069                 interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
2070         else
2071                 interrupt_offset = interrupt_id - first_interrupt;
2072
2073         rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
2074                                 args->in.interrupt_timeout_us, args->in.addr,
2075                                 args->in.target, interrupt_offset, &status);
2076
2077         memset(args, 0, sizeof(*args));
2078
2079         if (rc) {
2080                 dev_err_ratelimited(hdev->dev,
2081                         "interrupt_wait_ioctl failed (%d)\n", rc);
2082
2083                 return rc;
2084         }
2085
2086         switch (status) {
2087         case CS_WAIT_STATUS_COMPLETED:
2088                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2089                 break;
2090         case CS_WAIT_STATUS_BUSY:
2091         default:
2092                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2093                 break;
2094         }
2095
2096         return 0;
2097 }
2098
2099 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2100 {
2101         union hl_wait_cs_args *args = data;
2102         u32 flags = args->in.flags;
2103         int rc;
2104
2105         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
2106                 rc = hl_interrupt_wait_ioctl(hpriv, data);
2107         else
2108                 rc = hl_cs_wait_ioctl(hpriv, data);
2109
2110         return rc;
2111 }