habanalabs: modify multi-CS to wait on stream masters
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / common / command_submission.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
10
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
13
14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
16
17 /**
18  * enum hl_cs_wait_status - cs wait status
19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
20  * @CS_WAIT_STATUS_COMPLETED: cs completed
21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
22  */
23 enum hl_cs_wait_status {
24         CS_WAIT_STATUS_BUSY,
25         CS_WAIT_STATUS_COMPLETED,
26         CS_WAIT_STATUS_GONE
27 };
28
29 static void job_wq_completion(struct work_struct *work);
30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
31                                 u64 timeout_us, u64 seq,
32                                 enum hl_cs_wait_status *status, s64 *timestamp);
33 static void cs_do_release(struct kref *ref);
34
35 static void hl_sob_reset(struct kref *ref)
36 {
37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
38                                                         kref);
39         struct hl_device *hdev = hw_sob->hdev;
40
41         dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
42
43         hdev->asic_funcs->reset_sob(hdev, hw_sob);
44
45         hw_sob->need_reset = false;
46 }
47
48 void hl_sob_reset_error(struct kref *ref)
49 {
50         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
51                                                         kref);
52         struct hl_device *hdev = hw_sob->hdev;
53
54         dev_crit(hdev->dev,
55                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
56                 hw_sob->q_idx, hw_sob->sob_id);
57 }
58
59 void hw_sob_put(struct hl_hw_sob *hw_sob)
60 {
61         if (hw_sob)
62                 kref_put(&hw_sob->kref, hl_sob_reset);
63 }
64
65 static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
66 {
67         if (hw_sob)
68                 kref_put(&hw_sob->kref, hl_sob_reset_error);
69 }
70
71 void hw_sob_get(struct hl_hw_sob *hw_sob)
72 {
73         if (hw_sob)
74                 kref_get(&hw_sob->kref);
75 }
76
77 /**
78  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
79  * @sob_base: sob base id
80  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
81  * @mask: generated mask
82  *
83  * Return: 0 if given parameters are valid
84  */
85 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
86 {
87         int i;
88
89         if (sob_mask == 0)
90                 return -EINVAL;
91
92         if (sob_mask == 0x1) {
93                 *mask = ~(1 << (sob_base & 0x7));
94         } else {
95                 /* find msb in order to verify sob range is valid */
96                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
97                         if (BIT(i) & sob_mask)
98                                 break;
99
100                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
101                         return -EINVAL;
102
103                 *mask = ~sob_mask;
104         }
105
106         return 0;
107 }
108
109 static void hl_fence_release(struct kref *kref)
110 {
111         struct hl_fence *fence =
112                 container_of(kref, struct hl_fence, refcount);
113         struct hl_cs_compl *hl_cs_cmpl =
114                 container_of(fence, struct hl_cs_compl, base_fence);
115
116         kfree(hl_cs_cmpl);
117 }
118
119 void hl_fence_put(struct hl_fence *fence)
120 {
121         if (IS_ERR_OR_NULL(fence))
122                 return;
123         kref_put(&fence->refcount, hl_fence_release);
124 }
125
126 void hl_fences_put(struct hl_fence **fence, int len)
127 {
128         int i;
129
130         for (i = 0; i < len; i++, fence++)
131                 hl_fence_put(*fence);
132 }
133
134 void hl_fence_get(struct hl_fence *fence)
135 {
136         if (fence)
137                 kref_get(&fence->refcount);
138 }
139
140 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
141 {
142         kref_init(&fence->refcount);
143         fence->cs_sequence = sequence;
144         fence->error = 0;
145         fence->timestamp = ktime_set(0, 0);
146         init_completion(&fence->completion);
147 }
148
149 void cs_get(struct hl_cs *cs)
150 {
151         kref_get(&cs->refcount);
152 }
153
154 static int cs_get_unless_zero(struct hl_cs *cs)
155 {
156         return kref_get_unless_zero(&cs->refcount);
157 }
158
159 static void cs_put(struct hl_cs *cs)
160 {
161         kref_put(&cs->refcount, cs_do_release);
162 }
163
164 static void cs_job_do_release(struct kref *ref)
165 {
166         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
167
168         kfree(job);
169 }
170
171 static void cs_job_put(struct hl_cs_job *job)
172 {
173         kref_put(&job->refcount, cs_job_do_release);
174 }
175
176 bool cs_needs_completion(struct hl_cs *cs)
177 {
178         /* In case this is a staged CS, only the last CS in sequence should
179          * get a completion, any non staged CS will always get a completion
180          */
181         if (cs->staged_cs && !cs->staged_last)
182                 return false;
183
184         return true;
185 }
186
187 bool cs_needs_timeout(struct hl_cs *cs)
188 {
189         /* In case this is a staged CS, only the first CS in sequence should
190          * get a timeout, any non staged CS will always get a timeout
191          */
192         if (cs->staged_cs && !cs->staged_first)
193                 return false;
194
195         return true;
196 }
197
198 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
199 {
200         /*
201          * Patched CB is created for external queues jobs, and for H/W queues
202          * jobs if the user CB was allocated by driver and MMU is disabled.
203          */
204         return (job->queue_type == QUEUE_TYPE_EXT ||
205                         (job->queue_type == QUEUE_TYPE_HW &&
206                                         job->is_kernel_allocated_cb &&
207                                         !hdev->mmu_enable));
208 }
209
210 /*
211  * cs_parser - parse the user command submission
212  *
213  * @hpriv       : pointer to the private data of the fd
214  * @job        : pointer to the job that holds the command submission info
215  *
216  * The function parses the command submission of the user. It calls the
217  * ASIC specific parser, which returns a list of memory blocks to send
218  * to the device as different command buffers
219  *
220  */
221 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
222 {
223         struct hl_device *hdev = hpriv->hdev;
224         struct hl_cs_parser parser;
225         int rc;
226
227         parser.ctx_id = job->cs->ctx->asid;
228         parser.cs_sequence = job->cs->sequence;
229         parser.job_id = job->id;
230
231         parser.hw_queue_id = job->hw_queue_id;
232         parser.job_userptr_list = &job->userptr_list;
233         parser.patched_cb = NULL;
234         parser.user_cb = job->user_cb;
235         parser.user_cb_size = job->user_cb_size;
236         parser.queue_type = job->queue_type;
237         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
238         job->patched_cb = NULL;
239         parser.completion = cs_needs_completion(job->cs);
240
241         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
242
243         if (is_cb_patched(hdev, job)) {
244                 if (!rc) {
245                         job->patched_cb = parser.patched_cb;
246                         job->job_cb_size = parser.patched_cb_size;
247                         job->contains_dma_pkt = parser.contains_dma_pkt;
248                         atomic_inc(&job->patched_cb->cs_cnt);
249                 }
250
251                 /*
252                  * Whether the parsing worked or not, we don't need the
253                  * original CB anymore because it was already parsed and
254                  * won't be accessed again for this CS
255                  */
256                 atomic_dec(&job->user_cb->cs_cnt);
257                 hl_cb_put(job->user_cb);
258                 job->user_cb = NULL;
259         } else if (!rc) {
260                 job->job_cb_size = job->user_cb_size;
261         }
262
263         return rc;
264 }
265
266 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
267 {
268         struct hl_cs *cs = job->cs;
269
270         if (is_cb_patched(hdev, job)) {
271                 hl_userptr_delete_list(hdev, &job->userptr_list);
272
273                 /*
274                  * We might arrive here from rollback and patched CB wasn't
275                  * created, so we need to check it's not NULL
276                  */
277                 if (job->patched_cb) {
278                         atomic_dec(&job->patched_cb->cs_cnt);
279                         hl_cb_put(job->patched_cb);
280                 }
281         }
282
283         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
284          * enabled, the user CB isn't released in cs_parser() and thus should be
285          * released here.
286          * This is also true for INT queues jobs which were allocated by driver
287          */
288         if (job->is_kernel_allocated_cb &&
289                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
290                                 job->queue_type == QUEUE_TYPE_INT)) {
291                 atomic_dec(&job->user_cb->cs_cnt);
292                 hl_cb_put(job->user_cb);
293         }
294
295         /*
296          * This is the only place where there can be multiple threads
297          * modifying the list at the same time
298          */
299         spin_lock(&cs->job_lock);
300         list_del(&job->cs_node);
301         spin_unlock(&cs->job_lock);
302
303         hl_debugfs_remove_job(hdev, job);
304
305         /* We decrement reference only for a CS that gets completion
306          * because the reference was incremented only for this kind of CS
307          * right before it was scheduled.
308          *
309          * In staged submission, only the last CS marked as 'staged_last'
310          * gets completion, hence its release function will be called from here.
311          * As for all the rest CS's in the staged submission which do not get
312          * completion, their CS reference will be decremented by the
313          * 'staged_last' CS during the CS release flow.
314          * All relevant PQ CI counters will be incremented during the CS release
315          * flow by calling 'hl_hw_queue_update_ci'.
316          */
317         if (cs_needs_completion(cs) &&
318                 (job->queue_type == QUEUE_TYPE_EXT ||
319                         job->queue_type == QUEUE_TYPE_HW))
320                 cs_put(cs);
321
322         cs_job_put(job);
323 }
324
325 /*
326  * hl_staged_cs_find_first - locate the first CS in this staged submission
327  *
328  * @hdev: pointer to device structure
329  * @cs_seq: staged submission sequence number
330  *
331  * @note: This function must be called under 'hdev->cs_mirror_lock'
332  *
333  * Find and return a CS pointer with the given sequence
334  */
335 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
336 {
337         struct hl_cs *cs;
338
339         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
340                 if (cs->staged_cs && cs->staged_first &&
341                                 cs->sequence == cs_seq)
342                         return cs;
343
344         return NULL;
345 }
346
347 /*
348  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
349  *
350  * @hdev: pointer to device structure
351  * @cs: staged submission member
352  *
353  */
354 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
355 {
356         struct hl_cs *last_entry;
357
358         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
359                                                                 staged_cs_node);
360
361         if (last_entry->staged_last)
362                 return true;
363
364         return false;
365 }
366
367 /*
368  * staged_cs_get - get CS reference if this CS is a part of a staged CS
369  *
370  * @hdev: pointer to device structure
371  * @cs: current CS
372  * @cs_seq: staged submission sequence number
373  *
374  * Increment CS reference for every CS in this staged submission except for
375  * the CS which get completion.
376  */
377 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
378 {
379         /* Only the last CS in this staged submission will get a completion.
380          * We must increment the reference for all other CS's in this
381          * staged submission.
382          * Once we get a completion we will release the whole staged submission.
383          */
384         if (!cs->staged_last)
385                 cs_get(cs);
386 }
387
388 /*
389  * staged_cs_put - put a CS in case it is part of staged submission
390  *
391  * @hdev: pointer to device structure
392  * @cs: CS to put
393  *
394  * This function decrements a CS reference (for a non completion CS)
395  */
396 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
397 {
398         /* We release all CS's in a staged submission except the last
399          * CS which we have never incremented its reference.
400          */
401         if (!cs_needs_completion(cs))
402                 cs_put(cs);
403 }
404
405 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
406 {
407         bool next_entry_found = false;
408         struct hl_cs *next;
409
410         if (!cs_needs_timeout(cs))
411                 return;
412
413         spin_lock(&hdev->cs_mirror_lock);
414
415         /* We need to handle tdr only once for the complete staged submission.
416          * Hence, we choose the CS that reaches this function first which is
417          * the CS marked as 'staged_last'.
418          */
419         if (cs->staged_cs && cs->staged_last)
420                 cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
421
422         spin_unlock(&hdev->cs_mirror_lock);
423
424         /* Don't cancel TDR in case this CS was timedout because we might be
425          * running from the TDR context
426          */
427         if (cs && (cs->timedout ||
428                         hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
429                 return;
430
431         if (cs && cs->tdr_active)
432                 cancel_delayed_work_sync(&cs->work_tdr);
433
434         spin_lock(&hdev->cs_mirror_lock);
435
436         /* queue TDR for next CS */
437         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
438                 if (cs_needs_timeout(next)) {
439                         next_entry_found = true;
440                         break;
441                 }
442
443         if (next_entry_found && !next->tdr_active) {
444                 next->tdr_active = true;
445                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
446         }
447
448         spin_unlock(&hdev->cs_mirror_lock);
449 }
450
451 /*
452  * force_complete_multi_cs - complete all contexts that wait on multi-CS
453  *
454  * @hdev: pointer to habanalabs device structure
455  */
456 static void force_complete_multi_cs(struct hl_device *hdev)
457 {
458         int i;
459
460         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
461                 struct multi_cs_completion *mcs_compl;
462
463                 mcs_compl = &hdev->multi_cs_completion[i];
464
465                 spin_lock(&mcs_compl->lock);
466
467                 if (!mcs_compl->used) {
468                         spin_unlock(&mcs_compl->lock);
469                         continue;
470                 }
471
472                 /* when calling force complete no context should be waiting on
473                  * multi-cS.
474                  * We are calling the function as a protection for such case
475                  * to free any pending context and print error message
476                  */
477                 dev_err(hdev->dev,
478                                 "multi-CS completion context %d still waiting when calling force completion\n",
479                                 i);
480                 complete_all(&mcs_compl->completion);
481                 spin_unlock(&mcs_compl->lock);
482         }
483 }
484
485 /*
486  * complete_multi_cs - complete all waiting entities on multi-CS
487  *
488  * @hdev: pointer to habanalabs device structure
489  * @cs: CS structure
490  * The function signals a waiting entity that has an overlapping stream masters
491  * with the completed CS.
492  * For example:
493  * - a completed CS worked on stream master QID 4, multi CS completion
494  *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
495  *   common stream master QID
496  * - a completed CS worked on stream master QID 4, multi CS completion
497  *   is actively waiting on stream master QIDs 3, 4. send signal as stream
498  *   master QID 4 is common
499  */
500 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
501 {
502         struct hl_fence *fence = cs->fence;
503         int i;
504
505         /* in case of multi CS check for completion only for the first CS */
506         if (cs->staged_cs && !cs->staged_first)
507                 return;
508
509         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
510                 struct multi_cs_completion *mcs_compl;
511
512                 mcs_compl = &hdev->multi_cs_completion[i];
513                 if (!mcs_compl->used)
514                         continue;
515
516                 spin_lock(&mcs_compl->lock);
517
518                 /*
519                  * complete if:
520                  * 1. still waiting for completion
521                  * 2. the completed CS has at least one overlapping stream
522                  *    master with the stream masters in the completion
523                  */
524                 if (mcs_compl->used &&
525                                 (fence->stream_master_qid_map &
526                                         mcs_compl->stream_master_qid_map)) {
527                         /* extract the timestamp only of first completed CS */
528                         if (!mcs_compl->timestamp)
529                                 mcs_compl->timestamp =
530                                                 ktime_to_ns(fence->timestamp);
531                         complete_all(&mcs_compl->completion);
532                 }
533
534                 spin_unlock(&mcs_compl->lock);
535         }
536 }
537
538 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
539                                         struct hl_cs *cs,
540                                         struct hl_cs_compl *hl_cs_cmpl)
541 {
542         /* Skip this handler if the cs wasn't submitted, to avoid putting
543          * the hw_sob twice, since this case already handled at this point,
544          * also skip if the hw_sob pointer wasn't set.
545          */
546         if (!hl_cs_cmpl->hw_sob || !cs->submitted)
547                 return;
548
549         spin_lock(&hl_cs_cmpl->lock);
550
551         /*
552          * we get refcount upon reservation of signals or signal/wait cs for the
553          * hw_sob object, and need to put it when the first staged cs
554          * (which cotains the encaps signals) or cs signal/wait is completed.
555          */
556         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
557                         (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
558                         (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
559                         (!!hl_cs_cmpl->encaps_signals)) {
560                 dev_dbg(hdev->dev,
561                                 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
562                                 hl_cs_cmpl->cs_seq,
563                                 hl_cs_cmpl->type,
564                                 hl_cs_cmpl->hw_sob->sob_id,
565                                 hl_cs_cmpl->sob_val);
566
567                 hw_sob_put(hl_cs_cmpl->hw_sob);
568
569                 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
570                         hdev->asic_funcs->reset_sob_group(hdev,
571                                         hl_cs_cmpl->sob_group);
572         }
573
574         spin_unlock(&hl_cs_cmpl->lock);
575 }
576
577 static void cs_do_release(struct kref *ref)
578 {
579         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
580         struct hl_device *hdev = cs->ctx->hdev;
581         struct hl_cs_job *job, *tmp;
582         struct hl_cs_compl *hl_cs_cmpl =
583                         container_of(cs->fence, struct hl_cs_compl, base_fence);
584
585         cs->completed = true;
586
587         /*
588          * Although if we reached here it means that all external jobs have
589          * finished, because each one of them took refcnt to CS, we still
590          * need to go over the internal jobs and complete them. Otherwise, we
591          * will have leaked memory and what's worse, the CS object (and
592          * potentially the CTX object) could be released, while the JOB
593          * still holds a pointer to them (but no reference).
594          */
595         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
596                 complete_job(hdev, job);
597
598         if (!cs->submitted) {
599                 /*
600                  * In case the wait for signal CS was submitted, the fence put
601                  * occurs in init_signal_wait_cs() or collective_wait_init_cs()
602                  * right before hanging on the PQ.
603                  */
604                 if (cs->type == CS_TYPE_WAIT ||
605                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
606                         hl_fence_put(cs->signal_fence);
607
608                 goto out;
609         }
610
611         /* Need to update CI for all queue jobs that does not get completion */
612         hl_hw_queue_update_ci(cs);
613
614         /* remove CS from CS mirror list */
615         spin_lock(&hdev->cs_mirror_lock);
616         list_del_init(&cs->mirror_node);
617         spin_unlock(&hdev->cs_mirror_lock);
618
619         cs_handle_tdr(hdev, cs);
620
621         if (cs->staged_cs) {
622                 /* the completion CS decrements reference for the entire
623                  * staged submission
624                  */
625                 if (cs->staged_last) {
626                         struct hl_cs *staged_cs, *tmp;
627
628                         list_for_each_entry_safe(staged_cs, tmp,
629                                         &cs->staged_cs_node, staged_cs_node)
630                                 staged_cs_put(hdev, staged_cs);
631                 }
632
633                 /* A staged CS will be a member in the list only after it
634                  * was submitted. We used 'cs_mirror_lock' when inserting
635                  * it to list so we will use it again when removing it
636                  */
637                 if (cs->submitted) {
638                         spin_lock(&hdev->cs_mirror_lock);
639                         list_del(&cs->staged_cs_node);
640                         spin_unlock(&hdev->cs_mirror_lock);
641                 }
642
643                 /* decrement refcount to handle when first staged cs
644                  * with encaps signals is completed.
645                  */
646                 if (hl_cs_cmpl->encaps_signals)
647                         kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
648                                                 hl_encaps_handle_do_release);
649         }
650
651         if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
652                         && cs->encaps_signals)
653                 kref_put(&cs->encaps_sig_hdl->refcount,
654                                         hl_encaps_handle_do_release);
655
656 out:
657         /* Must be called before hl_ctx_put because inside we use ctx to get
658          * the device
659          */
660         hl_debugfs_remove_cs(cs);
661
662         hl_ctx_put(cs->ctx);
663
664         /* We need to mark an error for not submitted because in that case
665          * the hl fence release flow is different. Mainly, we don't need
666          * to handle hw_sob for signal/wait
667          */
668         if (cs->timedout)
669                 cs->fence->error = -ETIMEDOUT;
670         else if (cs->aborted)
671                 cs->fence->error = -EIO;
672         else if (!cs->submitted)
673                 cs->fence->error = -EBUSY;
674
675         if (unlikely(cs->skip_reset_on_timeout)) {
676                 dev_err(hdev->dev,
677                         "Command submission %llu completed after %llu (s)\n",
678                         cs->sequence,
679                         div_u64(jiffies - cs->submission_time_jiffies, HZ));
680         }
681
682         if (cs->timestamp)
683                 cs->fence->timestamp = ktime_get();
684         complete_all(&cs->fence->completion);
685         complete_multi_cs(hdev, cs);
686
687         cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
688
689         hl_fence_put(cs->fence);
690
691         kfree(cs->jobs_in_queue_cnt);
692         kfree(cs);
693 }
694
695 static void cs_timedout(struct work_struct *work)
696 {
697         struct hl_device *hdev;
698         int rc;
699         struct hl_cs *cs = container_of(work, struct hl_cs,
700                                                  work_tdr.work);
701         bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
702
703         rc = cs_get_unless_zero(cs);
704         if (!rc)
705                 return;
706
707         if ((!cs->submitted) || (cs->completed)) {
708                 cs_put(cs);
709                 return;
710         }
711
712         /* Mark the CS is timed out so we won't try to cancel its TDR */
713         if (likely(!skip_reset_on_timeout))
714                 cs->timedout = true;
715
716         hdev = cs->ctx->hdev;
717
718         switch (cs->type) {
719         case CS_TYPE_SIGNAL:
720                 dev_err(hdev->dev,
721                         "Signal command submission %llu has not finished in time!\n",
722                         cs->sequence);
723                 break;
724
725         case CS_TYPE_WAIT:
726                 dev_err(hdev->dev,
727                         "Wait command submission %llu has not finished in time!\n",
728                         cs->sequence);
729                 break;
730
731         case CS_TYPE_COLLECTIVE_WAIT:
732                 dev_err(hdev->dev,
733                         "Collective Wait command submission %llu has not finished in time!\n",
734                         cs->sequence);
735                 break;
736
737         default:
738                 dev_err(hdev->dev,
739                         "Command submission %llu has not finished in time!\n",
740                         cs->sequence);
741                 break;
742         }
743
744         rc = hl_state_dump(hdev);
745         if (rc)
746                 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
747
748         cs_put(cs);
749
750         if (likely(!skip_reset_on_timeout)) {
751                 if (hdev->reset_on_lockup)
752                         hl_device_reset(hdev, HL_RESET_TDR);
753                 else
754                         hdev->needs_reset = true;
755         }
756 }
757
758 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
759                         enum hl_cs_type cs_type, u64 user_sequence,
760                         struct hl_cs **cs_new, u32 flags, u32 timeout)
761 {
762         struct hl_cs_counters_atomic *cntr;
763         struct hl_fence *other = NULL;
764         struct hl_cs_compl *cs_cmpl;
765         struct hl_cs *cs;
766         int rc;
767
768         cntr = &hdev->aggregated_cs_counters;
769
770         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
771         if (!cs)
772                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
773
774         if (!cs) {
775                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
776                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
777                 return -ENOMEM;
778         }
779
780         /* increment refcnt for context */
781         hl_ctx_get(hdev, ctx);
782
783         cs->ctx = ctx;
784         cs->submitted = false;
785         cs->completed = false;
786         cs->type = cs_type;
787         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
788         cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
789         cs->timeout_jiffies = timeout;
790         cs->skip_reset_on_timeout =
791                 hdev->skip_reset_on_timeout ||
792                 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
793         cs->submission_time_jiffies = jiffies;
794         INIT_LIST_HEAD(&cs->job_list);
795         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
796         kref_init(&cs->refcount);
797         spin_lock_init(&cs->job_lock);
798
799         cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
800         if (!cs_cmpl)
801                 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
802
803         if (!cs_cmpl) {
804                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
805                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
806                 rc = -ENOMEM;
807                 goto free_cs;
808         }
809
810         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
811                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
812         if (!cs->jobs_in_queue_cnt)
813                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
814                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
815
816         if (!cs->jobs_in_queue_cnt) {
817                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
818                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
819                 rc = -ENOMEM;
820                 goto free_cs_cmpl;
821         }
822
823         cs_cmpl->hdev = hdev;
824         cs_cmpl->type = cs->type;
825         spin_lock_init(&cs_cmpl->lock);
826         cs->fence = &cs_cmpl->base_fence;
827
828         spin_lock(&ctx->cs_lock);
829
830         cs_cmpl->cs_seq = ctx->cs_sequence;
831         other = ctx->cs_pending[cs_cmpl->cs_seq &
832                                 (hdev->asic_prop.max_pending_cs - 1)];
833
834         if (other && !completion_done(&other->completion)) {
835                 /* If the following statement is true, it means we have reached
836                  * a point in which only part of the staged submission was
837                  * submitted and we don't have enough room in the 'cs_pending'
838                  * array for the rest of the submission.
839                  * This causes a deadlock because this CS will never be
840                  * completed as it depends on future CS's for completion.
841                  */
842                 if (other->cs_sequence == user_sequence)
843                         dev_crit_ratelimited(hdev->dev,
844                                 "Staged CS %llu deadlock due to lack of resources",
845                                 user_sequence);
846
847                 dev_dbg_ratelimited(hdev->dev,
848                         "Rejecting CS because of too many in-flights CS\n");
849                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
850                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
851                 rc = -EAGAIN;
852                 goto free_fence;
853         }
854
855         /* init hl_fence */
856         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
857
858         cs->sequence = cs_cmpl->cs_seq;
859
860         ctx->cs_pending[cs_cmpl->cs_seq &
861                         (hdev->asic_prop.max_pending_cs - 1)] =
862                                                         &cs_cmpl->base_fence;
863         ctx->cs_sequence++;
864
865         hl_fence_get(&cs_cmpl->base_fence);
866
867         hl_fence_put(other);
868
869         spin_unlock(&ctx->cs_lock);
870
871         *cs_new = cs;
872
873         return 0;
874
875 free_fence:
876         spin_unlock(&ctx->cs_lock);
877         kfree(cs->jobs_in_queue_cnt);
878 free_cs_cmpl:
879         kfree(cs_cmpl);
880 free_cs:
881         kfree(cs);
882         hl_ctx_put(ctx);
883         return rc;
884 }
885
886 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
887 {
888         struct hl_cs_job *job, *tmp;
889
890         staged_cs_put(hdev, cs);
891
892         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
893                 complete_job(hdev, job);
894 }
895
896 void hl_cs_rollback_all(struct hl_device *hdev)
897 {
898         int i;
899         struct hl_cs *cs, *tmp;
900
901         flush_workqueue(hdev->sob_reset_wq);
902
903         /* flush all completions before iterating over the CS mirror list in
904          * order to avoid a race with the release functions
905          */
906         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
907                 flush_workqueue(hdev->cq_wq[i]);
908
909         /* Make sure we don't have leftovers in the CS mirror list */
910         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
911                 cs_get(cs);
912                 cs->aborted = true;
913                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
914                                 cs->ctx->asid, cs->sequence);
915                 cs_rollback(hdev, cs);
916                 cs_put(cs);
917         }
918
919         force_complete_multi_cs(hdev);
920 }
921
922 static void
923 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
924 {
925         struct hl_user_pending_interrupt *pend;
926
927         spin_lock(&interrupt->wait_list_lock);
928         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
929                 pend->fence.error = -EIO;
930                 complete_all(&pend->fence.completion);
931         }
932         spin_unlock(&interrupt->wait_list_lock);
933 }
934
935 void hl_release_pending_user_interrupts(struct hl_device *hdev)
936 {
937         struct asic_fixed_properties *prop = &hdev->asic_prop;
938         struct hl_user_interrupt *interrupt;
939         int i;
940
941         if (!prop->user_interrupt_count)
942                 return;
943
944         /* We iterate through the user interrupt requests and waking up all
945          * user threads waiting for interrupt completion. We iterate the
946          * list under a lock, this is why all user threads, once awake,
947          * will wait on the same lock and will release the waiting object upon
948          * unlock.
949          */
950
951         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
952                 interrupt = &hdev->user_interrupt[i];
953                 wake_pending_user_interrupt_threads(interrupt);
954         }
955
956         interrupt = &hdev->common_user_interrupt;
957         wake_pending_user_interrupt_threads(interrupt);
958 }
959
960 static void job_wq_completion(struct work_struct *work)
961 {
962         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
963                                                 finish_work);
964         struct hl_cs *cs = job->cs;
965         struct hl_device *hdev = cs->ctx->hdev;
966
967         /* job is no longer needed */
968         complete_job(hdev, job);
969 }
970
971 static int validate_queue_index(struct hl_device *hdev,
972                                 struct hl_cs_chunk *chunk,
973                                 enum hl_queue_type *queue_type,
974                                 bool *is_kernel_allocated_cb)
975 {
976         struct asic_fixed_properties *asic = &hdev->asic_prop;
977         struct hw_queue_properties *hw_queue_prop;
978
979         /* This must be checked here to prevent out-of-bounds access to
980          * hw_queues_props array
981          */
982         if (chunk->queue_index >= asic->max_queues) {
983                 dev_err(hdev->dev, "Queue index %d is invalid\n",
984                         chunk->queue_index);
985                 return -EINVAL;
986         }
987
988         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
989
990         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
991                 dev_err(hdev->dev, "Queue index %d is invalid\n",
992                         chunk->queue_index);
993                 return -EINVAL;
994         }
995
996         if (hw_queue_prop->driver_only) {
997                 dev_err(hdev->dev,
998                         "Queue index %d is restricted for the kernel driver\n",
999                         chunk->queue_index);
1000                 return -EINVAL;
1001         }
1002
1003         /* When hw queue type isn't QUEUE_TYPE_HW,
1004          * USER_ALLOC_CB flag shall be referred as "don't care".
1005          */
1006         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1007                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1008                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1009                                 dev_err(hdev->dev,
1010                                         "Queue index %d doesn't support user CB\n",
1011                                         chunk->queue_index);
1012                                 return -EINVAL;
1013                         }
1014
1015                         *is_kernel_allocated_cb = false;
1016                 } else {
1017                         if (!(hw_queue_prop->cb_alloc_flags &
1018                                         CB_ALLOC_KERNEL)) {
1019                                 dev_err(hdev->dev,
1020                                         "Queue index %d doesn't support kernel CB\n",
1021                                         chunk->queue_index);
1022                                 return -EINVAL;
1023                         }
1024
1025                         *is_kernel_allocated_cb = true;
1026                 }
1027         } else {
1028                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1029                                                 & CB_ALLOC_KERNEL);
1030         }
1031
1032         *queue_type = hw_queue_prop->type;
1033         return 0;
1034 }
1035
1036 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1037                                         struct hl_cb_mgr *cb_mgr,
1038                                         struct hl_cs_chunk *chunk)
1039 {
1040         struct hl_cb *cb;
1041         u32 cb_handle;
1042
1043         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
1044
1045         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
1046         if (!cb) {
1047                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
1048                 return NULL;
1049         }
1050
1051         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1052                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1053                 goto release_cb;
1054         }
1055
1056         atomic_inc(&cb->cs_cnt);
1057
1058         return cb;
1059
1060 release_cb:
1061         hl_cb_put(cb);
1062         return NULL;
1063 }
1064
1065 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1066                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1067 {
1068         struct hl_cs_job *job;
1069
1070         job = kzalloc(sizeof(*job), GFP_ATOMIC);
1071         if (!job)
1072                 job = kzalloc(sizeof(*job), GFP_KERNEL);
1073
1074         if (!job)
1075                 return NULL;
1076
1077         kref_init(&job->refcount);
1078         job->queue_type = queue_type;
1079         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1080
1081         if (is_cb_patched(hdev, job))
1082                 INIT_LIST_HEAD(&job->userptr_list);
1083
1084         if (job->queue_type == QUEUE_TYPE_EXT)
1085                 INIT_WORK(&job->finish_work, job_wq_completion);
1086
1087         return job;
1088 }
1089
1090 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1091 {
1092         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1093                 return CS_TYPE_SIGNAL;
1094         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1095                 return CS_TYPE_WAIT;
1096         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1097                 return CS_TYPE_COLLECTIVE_WAIT;
1098         else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1099                 return CS_RESERVE_SIGNALS;
1100         else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1101                 return CS_UNRESERVE_SIGNALS;
1102         else
1103                 return CS_TYPE_DEFAULT;
1104 }
1105
1106 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1107 {
1108         struct hl_device *hdev = hpriv->hdev;
1109         struct hl_ctx *ctx = hpriv->ctx;
1110         u32 cs_type_flags, num_chunks;
1111         enum hl_device_status status;
1112         enum hl_cs_type cs_type;
1113
1114         if (!hl_device_operational(hdev, &status)) {
1115                 dev_warn_ratelimited(hdev->dev,
1116                         "Device is %s. Can't submit new CS\n",
1117                         hdev->status[status]);
1118                 return -EBUSY;
1119         }
1120
1121         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1122                         !hdev->supports_staged_submission) {
1123                 dev_err(hdev->dev, "staged submission not supported");
1124                 return -EPERM;
1125         }
1126
1127         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1128
1129         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1130                 dev_err(hdev->dev,
1131                         "CS type flags are mutually exclusive, context %d\n",
1132                         ctx->asid);
1133                 return -EINVAL;
1134         }
1135
1136         cs_type = hl_cs_get_cs_type(cs_type_flags);
1137         num_chunks = args->in.num_chunks_execute;
1138
1139         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1140                                         !hdev->supports_sync_stream)) {
1141                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1142                 return -EINVAL;
1143         }
1144
1145         if (cs_type == CS_TYPE_DEFAULT) {
1146                 if (!num_chunks) {
1147                         dev_err(hdev->dev,
1148                                 "Got execute CS with 0 chunks, context %d\n",
1149                                 ctx->asid);
1150                         return -EINVAL;
1151                 }
1152         } else if (num_chunks != 1) {
1153                 dev_err(hdev->dev,
1154                         "Sync stream CS mandates one chunk only, context %d\n",
1155                         ctx->asid);
1156                 return -EINVAL;
1157         }
1158
1159         return 0;
1160 }
1161
1162 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1163                                         struct hl_cs_chunk **cs_chunk_array,
1164                                         void __user *chunks, u32 num_chunks,
1165                                         struct hl_ctx *ctx)
1166 {
1167         u32 size_to_copy;
1168
1169         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1170                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1171                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1172                 dev_err(hdev->dev,
1173                         "Number of chunks can NOT be larger than %d\n",
1174                         HL_MAX_JOBS_PER_CS);
1175                 return -EINVAL;
1176         }
1177
1178         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1179                                         GFP_ATOMIC);
1180         if (!*cs_chunk_array)
1181                 *cs_chunk_array = kmalloc_array(num_chunks,
1182                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1183         if (!*cs_chunk_array) {
1184                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1185                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1186                 return -ENOMEM;
1187         }
1188
1189         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1190         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1191                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1192                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1193                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1194                 kfree(*cs_chunk_array);
1195                 return -EFAULT;
1196         }
1197
1198         return 0;
1199 }
1200
1201 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1202                                 u64 sequence, u32 flags,
1203                                 u32 encaps_signal_handle)
1204 {
1205         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1206                 return 0;
1207
1208         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1209         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1210
1211         if (cs->staged_first) {
1212                 /* Staged CS sequence is the first CS sequence */
1213                 INIT_LIST_HEAD(&cs->staged_cs_node);
1214                 cs->staged_sequence = cs->sequence;
1215
1216                 if (cs->encaps_signals)
1217                         cs->encaps_sig_hdl_id = encaps_signal_handle;
1218         } else {
1219                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1220                  * under the cs_mirror_lock
1221                  */
1222                 cs->staged_sequence = sequence;
1223         }
1224
1225         /* Increment CS reference if needed */
1226         staged_cs_get(hdev, cs);
1227
1228         cs->staged_cs = true;
1229
1230         return 0;
1231 }
1232
1233 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1234 {
1235         int i;
1236
1237         for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1238                 if (qid == hdev->stream_master_qid_arr[i])
1239                         return BIT(i);
1240
1241         return 0;
1242 }
1243
1244 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1245                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1246                                 u32 encaps_signals_handle, u32 timeout)
1247 {
1248         bool staged_mid, int_queues_only = true;
1249         struct hl_device *hdev = hpriv->hdev;
1250         struct hl_cs_chunk *cs_chunk_array;
1251         struct hl_cs_counters_atomic *cntr;
1252         struct hl_ctx *ctx = hpriv->ctx;
1253         struct hl_cs_job *job;
1254         struct hl_cs *cs;
1255         struct hl_cb *cb;
1256         u64 user_sequence;
1257         u8 stream_master_qid_map = 0;
1258         int rc, i;
1259
1260         cntr = &hdev->aggregated_cs_counters;
1261         user_sequence = *cs_seq;
1262         *cs_seq = ULLONG_MAX;
1263
1264         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1265                         hpriv->ctx);
1266         if (rc)
1267                 goto out;
1268
1269         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1270                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1271                 staged_mid = true;
1272         else
1273                 staged_mid = false;
1274
1275         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1276                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1277                         timeout);
1278         if (rc)
1279                 goto free_cs_chunk_array;
1280
1281         *cs_seq = cs->sequence;
1282
1283         hl_debugfs_add_cs(cs);
1284
1285         rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1286                                                 encaps_signals_handle);
1287         if (rc)
1288                 goto free_cs_object;
1289
1290         /* Validate ALL the CS chunks before submitting the CS */
1291         for (i = 0 ; i < num_chunks ; i++) {
1292                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1293                 enum hl_queue_type queue_type;
1294                 bool is_kernel_allocated_cb;
1295
1296                 rc = validate_queue_index(hdev, chunk, &queue_type,
1297                                                 &is_kernel_allocated_cb);
1298                 if (rc) {
1299                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1300                         atomic64_inc(&cntr->validation_drop_cnt);
1301                         goto free_cs_object;
1302                 }
1303
1304                 if (is_kernel_allocated_cb) {
1305                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1306                         if (!cb) {
1307                                 atomic64_inc(
1308                                         &ctx->cs_counters.validation_drop_cnt);
1309                                 atomic64_inc(&cntr->validation_drop_cnt);
1310                                 rc = -EINVAL;
1311                                 goto free_cs_object;
1312                         }
1313                 } else {
1314                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1315                 }
1316
1317                 if (queue_type == QUEUE_TYPE_EXT ||
1318                                                 queue_type == QUEUE_TYPE_HW) {
1319                         int_queues_only = false;
1320
1321                         /*
1322                          * store which stream are being used for external/HW
1323                          * queues of this CS
1324                          */
1325                         if (hdev->supports_wait_for_multi_cs)
1326                                 stream_master_qid_map |=
1327                                         get_stream_master_qid_mask(hdev,
1328                                                         chunk->queue_index);
1329                 }
1330
1331                 job = hl_cs_allocate_job(hdev, queue_type,
1332                                                 is_kernel_allocated_cb);
1333                 if (!job) {
1334                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1335                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1336                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1337                         rc = -ENOMEM;
1338                         if (is_kernel_allocated_cb)
1339                                 goto release_cb;
1340
1341                         goto free_cs_object;
1342                 }
1343
1344                 job->id = i + 1;
1345                 job->cs = cs;
1346                 job->user_cb = cb;
1347                 job->user_cb_size = chunk->cb_size;
1348                 job->hw_queue_id = chunk->queue_index;
1349
1350                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1351
1352                 list_add_tail(&job->cs_node, &cs->job_list);
1353
1354                 /*
1355                  * Increment CS reference. When CS reference is 0, CS is
1356                  * done and can be signaled to user and free all its resources
1357                  * Only increment for JOB on external or H/W queues, because
1358                  * only for those JOBs we get completion
1359                  */
1360                 if (cs_needs_completion(cs) &&
1361                         (job->queue_type == QUEUE_TYPE_EXT ||
1362                                 job->queue_type == QUEUE_TYPE_HW))
1363                         cs_get(cs);
1364
1365                 hl_debugfs_add_job(hdev, job);
1366
1367                 rc = cs_parser(hpriv, job);
1368                 if (rc) {
1369                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1370                         atomic64_inc(&cntr->parsing_drop_cnt);
1371                         dev_err(hdev->dev,
1372                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1373                                 cs->ctx->asid, cs->sequence, job->id, rc);
1374                         goto free_cs_object;
1375                 }
1376         }
1377
1378         /* We allow a CS with any queue type combination as long as it does
1379          * not get a completion
1380          */
1381         if (int_queues_only && cs_needs_completion(cs)) {
1382                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1383                 atomic64_inc(&cntr->validation_drop_cnt);
1384                 dev_err(hdev->dev,
1385                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1386                         cs->ctx->asid, cs->sequence);
1387                 rc = -EINVAL;
1388                 goto free_cs_object;
1389         }
1390
1391         /*
1392          * store the (external/HW queues) streams used by the CS in the
1393          * fence object for multi-CS completion
1394          */
1395         if (hdev->supports_wait_for_multi_cs)
1396                 cs->fence->stream_master_qid_map = stream_master_qid_map;
1397
1398         rc = hl_hw_queue_schedule_cs(cs);
1399         if (rc) {
1400                 if (rc != -EAGAIN)
1401                         dev_err(hdev->dev,
1402                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1403                                 cs->ctx->asid, cs->sequence, rc);
1404                 goto free_cs_object;
1405         }
1406
1407         rc = HL_CS_STATUS_SUCCESS;
1408         goto put_cs;
1409
1410 release_cb:
1411         atomic_dec(&cb->cs_cnt);
1412         hl_cb_put(cb);
1413 free_cs_object:
1414         cs_rollback(hdev, cs);
1415         *cs_seq = ULLONG_MAX;
1416         /* The path below is both for good and erroneous exits */
1417 put_cs:
1418         /* We finished with the CS in this function, so put the ref */
1419         cs_put(cs);
1420 free_cs_chunk_array:
1421         kfree(cs_chunk_array);
1422 out:
1423         return rc;
1424 }
1425
1426 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1427                                 u64 *cs_seq)
1428 {
1429         struct hl_device *hdev = hpriv->hdev;
1430         struct hl_ctx *ctx = hpriv->ctx;
1431         bool need_soft_reset = false;
1432         int rc = 0, do_ctx_switch;
1433         void __user *chunks;
1434         u32 num_chunks, tmp;
1435         int ret;
1436
1437         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1438
1439         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1440                 mutex_lock(&hpriv->restore_phase_mutex);
1441
1442                 if (do_ctx_switch) {
1443                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1444                         if (rc) {
1445                                 dev_err_ratelimited(hdev->dev,
1446                                         "Failed to switch to context %d, rejecting CS! %d\n",
1447                                         ctx->asid, rc);
1448                                 /*
1449                                  * If we timedout, or if the device is not IDLE
1450                                  * while we want to do context-switch (-EBUSY),
1451                                  * we need to soft-reset because QMAN is
1452                                  * probably stuck. However, we can't call to
1453                                  * reset here directly because of deadlock, so
1454                                  * need to do it at the very end of this
1455                                  * function
1456                                  */
1457                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1458                                         need_soft_reset = true;
1459                                 mutex_unlock(&hpriv->restore_phase_mutex);
1460                                 goto out;
1461                         }
1462                 }
1463
1464                 hdev->asic_funcs->restore_phase_topology(hdev);
1465
1466                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1467                 num_chunks = args->in.num_chunks_restore;
1468
1469                 if (!num_chunks) {
1470                         dev_dbg(hdev->dev,
1471                                 "Need to run restore phase but restore CS is empty\n");
1472                         rc = 0;
1473                 } else {
1474                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1475                                         cs_seq, 0, 0, hdev->timeout_jiffies);
1476                 }
1477
1478                 mutex_unlock(&hpriv->restore_phase_mutex);
1479
1480                 if (rc) {
1481                         dev_err(hdev->dev,
1482                                 "Failed to submit restore CS for context %d (%d)\n",
1483                                 ctx->asid, rc);
1484                         goto out;
1485                 }
1486
1487                 /* Need to wait for restore completion before execution phase */
1488                 if (num_chunks) {
1489                         enum hl_cs_wait_status status;
1490 wait_again:
1491                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1492                                         jiffies_to_usecs(hdev->timeout_jiffies),
1493                                         *cs_seq, &status, NULL);
1494                         if (ret) {
1495                                 if (ret == -ERESTARTSYS) {
1496                                         usleep_range(100, 200);
1497                                         goto wait_again;
1498                                 }
1499
1500                                 dev_err(hdev->dev,
1501                                         "Restore CS for context %d failed to complete %d\n",
1502                                         ctx->asid, ret);
1503                                 rc = -ENOEXEC;
1504                                 goto out;
1505                         }
1506                 }
1507
1508                 ctx->thread_ctx_switch_wait_token = 1;
1509
1510         } else if (!ctx->thread_ctx_switch_wait_token) {
1511                 rc = hl_poll_timeout_memory(hdev,
1512                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1513                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1514
1515                 if (rc == -ETIMEDOUT) {
1516                         dev_err(hdev->dev,
1517                                 "context switch phase timeout (%d)\n", tmp);
1518                         goto out;
1519                 }
1520         }
1521
1522 out:
1523         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1524                 hl_device_reset(hdev, 0);
1525
1526         return rc;
1527 }
1528
1529 /*
1530  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1531  * if the SOB value reaches the max value move to the other SOB reserved
1532  * to the queue.
1533  * @hdev: pointer to device structure
1534  * @q_idx: stream queue index
1535  * @hw_sob: the H/W SOB used in this signal CS.
1536  * @count: signals count
1537  * @encaps_sig: tells whether it's reservation for encaps signals or not.
1538  *
1539  * Note that this function must be called while hw_queues_lock is taken.
1540  */
1541 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1542                         struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1543
1544 {
1545         struct hl_sync_stream_properties *prop;
1546         struct hl_hw_sob *sob = *hw_sob, *other_sob;
1547         u8 other_sob_offset;
1548
1549         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1550
1551         hw_sob_get(sob);
1552
1553         /* check for wraparound */
1554         if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1555                 /*
1556                  * Decrement as we reached the max value.
1557                  * The release function won't be called here as we've
1558                  * just incremented the refcount right before calling this
1559                  * function.
1560                  */
1561                 hw_sob_put_err(sob);
1562
1563                 /*
1564                  * check the other sob value, if it still in use then fail
1565                  * otherwise make the switch
1566                  */
1567                 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1568                 other_sob = &prop->hw_sob[other_sob_offset];
1569
1570                 if (kref_read(&other_sob->kref) != 1) {
1571                         dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1572                                                                 q_idx);
1573                         return -EINVAL;
1574                 }
1575
1576                 /*
1577                  * next_sob_val always points to the next available signal
1578                  * in the sob, so in encaps signals it will be the next one
1579                  * after reserving the required amount.
1580                  */
1581                 if (encaps_sig)
1582                         prop->next_sob_val = count + 1;
1583                 else
1584                         prop->next_sob_val = count;
1585
1586                 /* only two SOBs are currently in use */
1587                 prop->curr_sob_offset = other_sob_offset;
1588                 *hw_sob = other_sob;
1589
1590                 /*
1591                  * check if other_sob needs reset, then do it before using it
1592                  * for the reservation or the next signal cs.
1593                  * we do it here, and for both encaps and regular signal cs
1594                  * cases in order to avoid possible races of two kref_put
1595                  * of the sob which can occur at the same time if we move the
1596                  * sob reset(kref_put) to cs_do_release function.
1597                  * in addition, if we have combination of cs signal and
1598                  * encaps, and at the point we need to reset the sob there was
1599                  * no more reservations and only signal cs keep coming,
1600                  * in such case we need signal_cs to put the refcount and
1601                  * reset the sob.
1602                  */
1603                 if (other_sob->need_reset)
1604                         hw_sob_put(other_sob);
1605
1606                 if (encaps_sig) {
1607                         /* set reset indication for the sob */
1608                         sob->need_reset = true;
1609                         hw_sob_get(other_sob);
1610                 }
1611
1612                 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1613                                 prop->curr_sob_offset, q_idx);
1614         } else {
1615                 prop->next_sob_val += count;
1616         }
1617
1618         return 0;
1619 }
1620
1621 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1622                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1623                 bool encaps_signals)
1624 {
1625         u64 *signal_seq_arr = NULL;
1626         u32 size_to_copy, signal_seq_arr_len;
1627         int rc = 0;
1628
1629         if (encaps_signals) {
1630                 *signal_seq = chunk->encaps_signal_seq;
1631                 return 0;
1632         }
1633
1634         signal_seq_arr_len = chunk->num_signal_seq_arr;
1635
1636         /* currently only one signal seq is supported */
1637         if (signal_seq_arr_len != 1) {
1638                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1639                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1640                 dev_err(hdev->dev,
1641                         "Wait for signal CS supports only one signal CS seq\n");
1642                 return -EINVAL;
1643         }
1644
1645         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1646                                         sizeof(*signal_seq_arr),
1647                                         GFP_ATOMIC);
1648         if (!signal_seq_arr)
1649                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1650                                         sizeof(*signal_seq_arr),
1651                                         GFP_KERNEL);
1652         if (!signal_seq_arr) {
1653                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1654                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1655                 return -ENOMEM;
1656         }
1657
1658         size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1659         if (copy_from_user(signal_seq_arr,
1660                                 u64_to_user_ptr(chunk->signal_seq_arr),
1661                                 size_to_copy)) {
1662                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1663                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1664                 dev_err(hdev->dev,
1665                         "Failed to copy signal seq array from user\n");
1666                 rc = -EFAULT;
1667                 goto out;
1668         }
1669
1670         /* currently it is guaranteed to have only one signal seq */
1671         *signal_seq = signal_seq_arr[0];
1672
1673 out:
1674         kfree(signal_seq_arr);
1675
1676         return rc;
1677 }
1678
1679 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1680                 struct hl_ctx *ctx, struct hl_cs *cs,
1681                 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1682 {
1683         struct hl_cs_counters_atomic *cntr;
1684         struct hl_cs_job *job;
1685         struct hl_cb *cb;
1686         u32 cb_size;
1687
1688         cntr = &hdev->aggregated_cs_counters;
1689
1690         job = hl_cs_allocate_job(hdev, q_type, true);
1691         if (!job) {
1692                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1693                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1694                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1695                 return -ENOMEM;
1696         }
1697
1698         if (cs->type == CS_TYPE_WAIT)
1699                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1700         else
1701                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1702
1703         cb = hl_cb_kernel_create(hdev, cb_size,
1704                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1705         if (!cb) {
1706                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1707                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1708                 kfree(job);
1709                 return -EFAULT;
1710         }
1711
1712         job->id = 0;
1713         job->cs = cs;
1714         job->user_cb = cb;
1715         atomic_inc(&job->user_cb->cs_cnt);
1716         job->user_cb_size = cb_size;
1717         job->hw_queue_id = q_idx;
1718
1719         if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1720                         && cs->encaps_signals)
1721                 job->encaps_sig_wait_offset = encaps_signal_offset;
1722         /*
1723          * No need in parsing, user CB is the patched CB.
1724          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1725          * the CB idr anymore and to decrement its refcount as it was
1726          * incremented inside hl_cb_kernel_create().
1727          */
1728         job->patched_cb = job->user_cb;
1729         job->job_cb_size = job->user_cb_size;
1730         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1731
1732         /* increment refcount as for external queues we get completion */
1733         cs_get(cs);
1734
1735         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1736
1737         list_add_tail(&job->cs_node, &cs->job_list);
1738
1739         hl_debugfs_add_job(hdev, job);
1740
1741         return 0;
1742 }
1743
1744 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
1745                                 u32 q_idx, u32 count,
1746                                 u32 *handle_id, u32 *sob_addr,
1747                                 u32 *signals_count)
1748 {
1749         struct hw_queue_properties *hw_queue_prop;
1750         struct hl_sync_stream_properties *prop;
1751         struct hl_device *hdev = hpriv->hdev;
1752         struct hl_cs_encaps_sig_handle *handle;
1753         struct hl_encaps_signals_mgr *mgr;
1754         struct hl_hw_sob *hw_sob;
1755         int hdl_id;
1756         int rc = 0;
1757
1758         if (count >= HL_MAX_SOB_VAL) {
1759                 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
1760                                                 count);
1761                 rc = -EINVAL;
1762                 goto out;
1763         }
1764
1765         if (q_idx >= hdev->asic_prop.max_queues) {
1766                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1767                         q_idx);
1768                 rc = -EINVAL;
1769                 goto out;
1770         }
1771
1772         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1773
1774         if (!hw_queue_prop->supports_sync_stream) {
1775                 dev_err(hdev->dev,
1776                         "Queue index %d does not support sync stream operations\n",
1777                                                                         q_idx);
1778                 rc = -EINVAL;
1779                 goto out;
1780         }
1781
1782         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1783
1784         handle = kzalloc(sizeof(*handle), GFP_KERNEL);
1785         if (!handle) {
1786                 rc = -ENOMEM;
1787                 goto out;
1788         }
1789
1790         handle->count = count;
1791         mgr = &hpriv->ctx->sig_mgr;
1792
1793         spin_lock(&mgr->lock);
1794         hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
1795         spin_unlock(&mgr->lock);
1796
1797         if (hdl_id < 0) {
1798                 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
1799                 rc = -EINVAL;
1800                 goto out;
1801         }
1802
1803         handle->id = hdl_id;
1804         handle->q_idx = q_idx;
1805         handle->hdev = hdev;
1806         kref_init(&handle->refcount);
1807
1808         hdev->asic_funcs->hw_queues_lock(hdev);
1809
1810         hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1811
1812         /*
1813          * Increment the SOB value by count by user request
1814          * to reserve those signals
1815          * check if the signals amount to reserve is not exceeding the max sob
1816          * value, if yes then switch sob.
1817          */
1818         rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
1819                                                                 true);
1820         if (rc) {
1821                 dev_err(hdev->dev, "Failed to switch SOB\n");
1822                 hdev->asic_funcs->hw_queues_unlock(hdev);
1823                 rc = -EINVAL;
1824                 goto remove_idr;
1825         }
1826         /* set the hw_sob to the handle after calling the sob wraparound handler
1827          * since sob could have changed.
1828          */
1829         handle->hw_sob = hw_sob;
1830
1831         /* store the current sob value for unreserve validity check, and
1832          * signal offset support
1833          */
1834         handle->pre_sob_val = prop->next_sob_val - handle->count;
1835
1836         *signals_count = prop->next_sob_val;
1837         hdev->asic_funcs->hw_queues_unlock(hdev);
1838
1839         *sob_addr = handle->hw_sob->sob_addr;
1840         *handle_id = hdl_id;
1841
1842         dev_dbg(hdev->dev,
1843                 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
1844                         hw_sob->sob_id, handle->hw_sob->sob_addr,
1845                         prop->next_sob_val - 1, q_idx, hdl_id);
1846         goto out;
1847
1848 remove_idr:
1849         spin_lock(&mgr->lock);
1850         idr_remove(&mgr->handles, hdl_id);
1851         spin_unlock(&mgr->lock);
1852
1853         kfree(handle);
1854 out:
1855         return rc;
1856 }
1857
1858 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
1859 {
1860         struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
1861         struct hl_sync_stream_properties *prop;
1862         struct hl_device *hdev = hpriv->hdev;
1863         struct hl_encaps_signals_mgr *mgr;
1864         struct hl_hw_sob *hw_sob;
1865         u32 q_idx, sob_addr;
1866         int rc = 0;
1867
1868         mgr = &hpriv->ctx->sig_mgr;
1869
1870         spin_lock(&mgr->lock);
1871         encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
1872         if (encaps_sig_hdl) {
1873                 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
1874                                 handle_id, encaps_sig_hdl->hw_sob->sob_addr,
1875                                         encaps_sig_hdl->count);
1876
1877                 hdev->asic_funcs->hw_queues_lock(hdev);
1878
1879                 q_idx = encaps_sig_hdl->q_idx;
1880                 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1881                 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1882                 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
1883
1884                 /* Check if sob_val got out of sync due to other
1885                  * signal submission requests which were handled
1886                  * between the reserve-unreserve calls or SOB switch
1887                  * upon reaching SOB max value.
1888                  */
1889                 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
1890                                 != prop->next_sob_val ||
1891                                 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
1892                         dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
1893                                 encaps_sig_hdl->pre_sob_val,
1894                                 (prop->next_sob_val - encaps_sig_hdl->count));
1895
1896                         hdev->asic_funcs->hw_queues_unlock(hdev);
1897                         rc = -EINVAL;
1898                         goto out;
1899                 }
1900
1901                 /*
1902                  * Decrement the SOB value by count by user request
1903                  * to unreserve those signals
1904                  */
1905                 prop->next_sob_val -= encaps_sig_hdl->count;
1906
1907                 hdev->asic_funcs->hw_queues_unlock(hdev);
1908
1909                 hw_sob_put(hw_sob);
1910
1911                 /* Release the id and free allocated memory of the handle */
1912                 idr_remove(&mgr->handles, handle_id);
1913                 kfree(encaps_sig_hdl);
1914         } else {
1915                 rc = -EINVAL;
1916                 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
1917         }
1918 out:
1919         spin_unlock(&mgr->lock);
1920
1921         return rc;
1922 }
1923
1924 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1925                                 void __user *chunks, u32 num_chunks,
1926                                 u64 *cs_seq, u32 flags, u32 timeout)
1927 {
1928         struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
1929         bool handle_found = false, is_wait_cs = false,
1930                         wait_cs_submitted = false,
1931                         cs_encaps_signals = false;
1932         struct hl_cs_chunk *cs_chunk_array, *chunk;
1933         bool staged_cs_with_encaps_signals = false;
1934         struct hw_queue_properties *hw_queue_prop;
1935         struct hl_device *hdev = hpriv->hdev;
1936         struct hl_cs_compl *sig_waitcs_cmpl;
1937         u32 q_idx, collective_engine_id = 0;
1938         struct hl_cs_counters_atomic *cntr;
1939         struct hl_fence *sig_fence = NULL;
1940         struct hl_ctx *ctx = hpriv->ctx;
1941         enum hl_queue_type q_type;
1942         struct hl_cs *cs;
1943         u64 signal_seq;
1944         int rc;
1945
1946         cntr = &hdev->aggregated_cs_counters;
1947         *cs_seq = ULLONG_MAX;
1948
1949         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1950                         ctx);
1951         if (rc)
1952                 goto out;
1953
1954         /* currently it is guaranteed to have only one chunk */
1955         chunk = &cs_chunk_array[0];
1956
1957         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1958                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1959                 atomic64_inc(&cntr->validation_drop_cnt);
1960                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1961                         chunk->queue_index);
1962                 rc = -EINVAL;
1963                 goto free_cs_chunk_array;
1964         }
1965
1966         q_idx = chunk->queue_index;
1967         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1968         q_type = hw_queue_prop->type;
1969
1970         if (!hw_queue_prop->supports_sync_stream) {
1971                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1972                 atomic64_inc(&cntr->validation_drop_cnt);
1973                 dev_err(hdev->dev,
1974                         "Queue index %d does not support sync stream operations\n",
1975                         q_idx);
1976                 rc = -EINVAL;
1977                 goto free_cs_chunk_array;
1978         }
1979
1980         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1981                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1982                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1983                         atomic64_inc(&cntr->validation_drop_cnt);
1984                         dev_err(hdev->dev,
1985                                 "Queue index %d is invalid\n", q_idx);
1986                         rc = -EINVAL;
1987                         goto free_cs_chunk_array;
1988                 }
1989
1990                 collective_engine_id = chunk->collective_engine_id;
1991         }
1992
1993         is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
1994                         cs_type == CS_TYPE_COLLECTIVE_WAIT);
1995
1996         cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
1997
1998         if (is_wait_cs) {
1999                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2000                                 ctx, cs_encaps_signals);
2001                 if (rc)
2002                         goto free_cs_chunk_array;
2003
2004                 if (cs_encaps_signals) {
2005                         /* check if cs sequence has encapsulated
2006                          * signals handle
2007                          */
2008                         struct idr *idp;
2009                         u32 id;
2010
2011                         spin_lock(&ctx->sig_mgr.lock);
2012                         idp = &ctx->sig_mgr.handles;
2013                         idr_for_each_entry(idp, encaps_sig_hdl, id) {
2014                                 if (encaps_sig_hdl->cs_seq == signal_seq) {
2015                                         handle_found = true;
2016                                         /* get refcount to protect removing
2017                                          * this handle from idr, needed when
2018                                          * multiple wait cs are used with offset
2019                                          * to wait on reserved encaps signals.
2020                                          */
2021                                         kref_get(&encaps_sig_hdl->refcount);
2022                                         break;
2023                                 }
2024                         }
2025                         spin_unlock(&ctx->sig_mgr.lock);
2026
2027                         if (!handle_found) {
2028                                 dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2029                                                 signal_seq);
2030                                 rc = -EINVAL;
2031                                 goto free_cs_chunk_array;
2032                         }
2033
2034                         /* validate also the signal offset value */
2035                         if (chunk->encaps_signal_offset >
2036                                         encaps_sig_hdl->count) {
2037                                 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2038                                                 chunk->encaps_signal_offset,
2039                                                 encaps_sig_hdl->count);
2040                                 rc = -EINVAL;
2041                                 goto free_cs_chunk_array;
2042                         }
2043                 }
2044
2045                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2046                 if (IS_ERR(sig_fence)) {
2047                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2048                         atomic64_inc(&cntr->validation_drop_cnt);
2049                         dev_err(hdev->dev,
2050                                 "Failed to get signal CS with seq 0x%llx\n",
2051                                 signal_seq);
2052                         rc = PTR_ERR(sig_fence);
2053                         goto free_cs_chunk_array;
2054                 }
2055
2056                 if (!sig_fence) {
2057                         /* signal CS already finished */
2058                         rc = 0;
2059                         goto free_cs_chunk_array;
2060                 }
2061
2062                 sig_waitcs_cmpl =
2063                         container_of(sig_fence, struct hl_cs_compl, base_fence);
2064
2065                 staged_cs_with_encaps_signals = !!
2066                                 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2067                                 (flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2068
2069                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2070                                 !staged_cs_with_encaps_signals) {
2071                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2072                         atomic64_inc(&cntr->validation_drop_cnt);
2073                         dev_err(hdev->dev,
2074                                 "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2075                                 signal_seq);
2076                         hl_fence_put(sig_fence);
2077                         rc = -EINVAL;
2078                         goto free_cs_chunk_array;
2079                 }
2080
2081                 if (completion_done(&sig_fence->completion)) {
2082                         /* signal CS already finished */
2083                         hl_fence_put(sig_fence);
2084                         rc = 0;
2085                         goto free_cs_chunk_array;
2086                 }
2087         }
2088
2089         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2090         if (rc) {
2091                 if (is_wait_cs)
2092                         hl_fence_put(sig_fence);
2093
2094                 goto free_cs_chunk_array;
2095         }
2096
2097         /*
2098          * Save the signal CS fence for later initialization right before
2099          * hanging the wait CS on the queue.
2100          * for encaps signals case, we save the cs sequence and handle pointer
2101          * for later initialization.
2102          */
2103         if (is_wait_cs) {
2104                 cs->signal_fence = sig_fence;
2105                 /* store the handle pointer, so we don't have to
2106                  * look for it again, later on the flow
2107                  * when we need to set SOB info in hw_queue.
2108                  */
2109                 if (cs->encaps_signals)
2110                         cs->encaps_sig_hdl = encaps_sig_hdl;
2111         }
2112
2113         hl_debugfs_add_cs(cs);
2114
2115         *cs_seq = cs->sequence;
2116
2117         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2118                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2119                                 q_idx, chunk->encaps_signal_offset);
2120         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2121                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2122                                 cs, q_idx, collective_engine_id,
2123                                 chunk->encaps_signal_offset);
2124         else {
2125                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2126                 atomic64_inc(&cntr->validation_drop_cnt);
2127                 rc = -EINVAL;
2128         }
2129
2130         if (rc)
2131                 goto free_cs_object;
2132
2133         rc = hl_hw_queue_schedule_cs(cs);
2134         if (rc) {
2135                 /* In case wait cs failed here, it means the signal cs
2136                  * already completed. we want to free all it's related objects
2137                  * but we don't want to fail the ioctl.
2138                  */
2139                 if (is_wait_cs)
2140                         rc = 0;
2141                 else if (rc != -EAGAIN)
2142                         dev_err(hdev->dev,
2143                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
2144                                 ctx->asid, cs->sequence, rc);
2145                 goto free_cs_object;
2146         }
2147
2148         rc = HL_CS_STATUS_SUCCESS;
2149         if (is_wait_cs)
2150                 wait_cs_submitted = true;
2151         goto put_cs;
2152
2153 free_cs_object:
2154         cs_rollback(hdev, cs);
2155         *cs_seq = ULLONG_MAX;
2156         /* The path below is both for good and erroneous exits */
2157 put_cs:
2158         /* We finished with the CS in this function, so put the ref */
2159         cs_put(cs);
2160 free_cs_chunk_array:
2161         if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
2162                                                         is_wait_cs)
2163                 kref_put(&encaps_sig_hdl->refcount,
2164                                 hl_encaps_handle_do_release);
2165         kfree(cs_chunk_array);
2166 out:
2167         return rc;
2168 }
2169
2170 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
2171 {
2172         union hl_cs_args *args = data;
2173         enum hl_cs_type cs_type = 0;
2174         u64 cs_seq = ULONG_MAX;
2175         void __user *chunks;
2176         u32 num_chunks, flags, timeout,
2177                 signals_count = 0, sob_addr = 0, handle_id = 0;
2178         int rc;
2179
2180         rc = hl_cs_sanity_checks(hpriv, args);
2181         if (rc)
2182                 goto out;
2183
2184         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2185         if (rc)
2186                 goto out;
2187
2188         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2189                                         ~HL_CS_FLAGS_FORCE_RESTORE);
2190         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2191         num_chunks = args->in.num_chunks_execute;
2192         flags = args->in.cs_flags;
2193
2194         /* In case this is a staged CS, user should supply the CS sequence */
2195         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2196                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2197                 cs_seq = args->in.seq;
2198
2199         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2200                         ? msecs_to_jiffies(args->in.timeout * 1000)
2201                         : hpriv->hdev->timeout_jiffies;
2202
2203         switch (cs_type) {
2204         case CS_TYPE_SIGNAL:
2205         case CS_TYPE_WAIT:
2206         case CS_TYPE_COLLECTIVE_WAIT:
2207                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2208                                         &cs_seq, args->in.cs_flags, timeout);
2209                 break;
2210         case CS_RESERVE_SIGNALS:
2211                 rc = cs_ioctl_reserve_signals(hpriv,
2212                                         args->in.encaps_signals_q_idx,
2213                                         args->in.encaps_signals_count,
2214                                         &handle_id, &sob_addr, &signals_count);
2215                 break;
2216         case CS_UNRESERVE_SIGNALS:
2217                 rc = cs_ioctl_unreserve_signals(hpriv,
2218                                         args->in.encaps_sig_handle_id);
2219                 break;
2220         default:
2221                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2222                                                 args->in.cs_flags,
2223                                                 args->in.encaps_sig_handle_id,
2224                                                 timeout);
2225                 break;
2226         }
2227 out:
2228         if (rc != -EAGAIN) {
2229                 memset(args, 0, sizeof(*args));
2230
2231                 if (cs_type == CS_RESERVE_SIGNALS) {
2232                         args->out.handle_id = handle_id;
2233                         args->out.sob_base_addr_offset = sob_addr;
2234                         args->out.count = signals_count;
2235                 } else {
2236                         args->out.seq = cs_seq;
2237                 }
2238                 args->out.status = rc;
2239         }
2240
2241         return rc;
2242 }
2243
2244 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2245                                 enum hl_cs_wait_status *status, u64 timeout_us,
2246                                 s64 *timestamp)
2247 {
2248         struct hl_device *hdev = ctx->hdev;
2249         long completion_rc;
2250         int rc = 0;
2251
2252         if (IS_ERR(fence)) {
2253                 rc = PTR_ERR(fence);
2254                 if (rc == -EINVAL)
2255                         dev_notice_ratelimited(hdev->dev,
2256                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
2257                                 seq, ctx->cs_sequence);
2258                 return rc;
2259         }
2260
2261         if (!fence) {
2262                 dev_dbg(hdev->dev,
2263                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2264                                 seq, ctx->cs_sequence);
2265
2266                 *status = CS_WAIT_STATUS_GONE;
2267                 return 0;
2268         }
2269
2270         if (!timeout_us) {
2271                 completion_rc = completion_done(&fence->completion);
2272         } else {
2273                 unsigned long timeout;
2274
2275                 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2276                                 timeout_us : usecs_to_jiffies(timeout_us);
2277                 completion_rc =
2278                         wait_for_completion_interruptible_timeout(
2279                                 &fence->completion, timeout);
2280         }
2281
2282         if (completion_rc > 0) {
2283                 *status = CS_WAIT_STATUS_COMPLETED;
2284                 if (timestamp)
2285                         *timestamp = ktime_to_ns(fence->timestamp);
2286         } else {
2287                 *status = CS_WAIT_STATUS_BUSY;
2288         }
2289
2290         if (fence->error == -ETIMEDOUT)
2291                 rc = -ETIMEDOUT;
2292         else if (fence->error == -EIO)
2293                 rc = -EIO;
2294
2295         return rc;
2296 }
2297
2298 /*
2299  * hl_cs_poll_fences - iterate CS fences to check for CS completion
2300  *
2301  * @mcs_data: multi-CS internal data
2302  *
2303  * @return 0 on success, otherwise non 0 error code
2304  *
2305  * The function iterates on all CS sequence in the list and set bit in
2306  * completion_bitmap for each completed CS.
2307  * while iterating, the function can extracts the stream map to be later
2308  * used by the waiting function.
2309  * this function shall be called after taking context ref
2310  */
2311 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
2312 {
2313         struct hl_fence **fence_ptr = mcs_data->fence_arr;
2314         struct hl_device *hdev = mcs_data->ctx->hdev;
2315         int i, rc, arr_len = mcs_data->arr_len;
2316         u64 *seq_arr = mcs_data->seq_arr;
2317         ktime_t max_ktime, first_cs_time;
2318         enum hl_cs_wait_status status;
2319
2320         memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
2321
2322         /* get all fences under the same lock */
2323         rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2324         if (rc)
2325                 return rc;
2326
2327         /*
2328          * set to maximum time to verify timestamp is valid: if at the end
2329          * this value is maintained- no timestamp was updated
2330          */
2331         max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2332         first_cs_time = max_ktime;
2333
2334         for (i = 0; i < arr_len; i++, fence_ptr++) {
2335                 struct hl_fence *fence = *fence_ptr;
2336
2337                 /*
2338                  * function won't sleep as it is called with timeout 0 (i.e.
2339                  * poll the fence)
2340                  */
2341                 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
2342                                                 &status, 0, NULL);
2343                 if (rc) {
2344                         dev_err(hdev->dev,
2345                                 "wait_for_fence error :%d for CS seq %llu\n",
2346                                                                 rc, seq_arr[i]);
2347                         break;
2348                 }
2349
2350                 mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
2351
2352                 if (status == CS_WAIT_STATUS_BUSY)
2353                         continue;
2354
2355                 mcs_data->completion_bitmap |= BIT(i);
2356
2357                 /*
2358                  * best effort to extract timestamp. few notes:
2359                  * - if even single fence is gone we cannot extract timestamp
2360                  *   (as fence not exist anymore)
2361                  * - for all completed CSs we take the earliest timestamp.
2362                  *   for this we have to validate that:
2363                  *       1. given timestamp was indeed set
2364                  *       2. the timestamp is earliest of all timestamps so far
2365                  */
2366
2367                 if (status == CS_WAIT_STATUS_GONE) {
2368                         mcs_data->update_ts = false;
2369                         mcs_data->gone_cs = true;
2370                 } else if (mcs_data->update_ts &&
2371                         (ktime_compare(fence->timestamp,
2372                                                 ktime_set(0, 0)) > 0) &&
2373                         (ktime_compare(fence->timestamp, first_cs_time) < 0)) {
2374                         first_cs_time = fence->timestamp;
2375                 }
2376         }
2377
2378         hl_fences_put(mcs_data->fence_arr, arr_len);
2379
2380         if (mcs_data->update_ts &&
2381                         (ktime_compare(first_cs_time, max_ktime) != 0))
2382                 mcs_data->timestamp = ktime_to_ns(first_cs_time);
2383
2384         return rc;
2385 }
2386
2387 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2388                                 u64 timeout_us, u64 seq,
2389                                 enum hl_cs_wait_status *status, s64 *timestamp)
2390 {
2391         struct hl_fence *fence;
2392         int rc = 0;
2393
2394         if (timestamp)
2395                 *timestamp = 0;
2396
2397         hl_ctx_get(hdev, ctx);
2398
2399         fence = hl_ctx_get_fence(ctx, seq);
2400
2401         rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2402         hl_fence_put(fence);
2403         hl_ctx_put(ctx);
2404
2405         return rc;
2406 }
2407
2408 /*
2409  * hl_wait_multi_cs_completion_init - init completion structure
2410  *
2411  * @hdev: pointer to habanalabs device structure
2412  * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2413  *                        master QID to wait on
2414  *
2415  * @return valid completion struct pointer on success, otherwise error pointer
2416  *
2417  * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2418  * the function gets the first available completion (by marking it "used")
2419  * and initialize its values.
2420  */
2421 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
2422                                                         struct hl_device *hdev,
2423                                                         u8 stream_master_bitmap)
2424 {
2425         struct multi_cs_completion *mcs_compl;
2426         int i;
2427
2428         /* find free multi_cs completion structure */
2429         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2430                 mcs_compl = &hdev->multi_cs_completion[i];
2431                 spin_lock(&mcs_compl->lock);
2432                 if (!mcs_compl->used) {
2433                         mcs_compl->used = 1;
2434                         mcs_compl->timestamp = 0;
2435                         mcs_compl->stream_master_qid_map = stream_master_bitmap;
2436                         reinit_completion(&mcs_compl->completion);
2437                         spin_unlock(&mcs_compl->lock);
2438                         break;
2439                 }
2440                 spin_unlock(&mcs_compl->lock);
2441         }
2442
2443         if (i == MULTI_CS_MAX_USER_CTX) {
2444                 dev_err(hdev->dev,
2445                                 "no available multi-CS completion structure\n");
2446                 return ERR_PTR(-ENOMEM);
2447         }
2448         return mcs_compl;
2449 }
2450
2451 /*
2452  * hl_wait_multi_cs_completion_fini - return completion structure and set as
2453  *                                    unused
2454  *
2455  * @mcs_compl: pointer to the completion structure
2456  */
2457 static void hl_wait_multi_cs_completion_fini(
2458                                         struct multi_cs_completion *mcs_compl)
2459 {
2460         /*
2461          * free completion structure, do it under lock to be in-sync with the
2462          * thread that signals completion
2463          */
2464         spin_lock(&mcs_compl->lock);
2465         mcs_compl->used = 0;
2466         spin_unlock(&mcs_compl->lock);
2467 }
2468
2469 /*
2470  * hl_wait_multi_cs_completion - wait for first CS to complete
2471  *
2472  * @mcs_data: multi-CS internal data
2473  *
2474  * @return 0 on success, otherwise non 0 error code
2475  */
2476 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
2477 {
2478         struct hl_device *hdev = mcs_data->ctx->hdev;
2479         struct multi_cs_completion *mcs_compl;
2480         long completion_rc;
2481
2482         mcs_compl = hl_wait_multi_cs_completion_init(hdev,
2483                                         mcs_data->stream_master_qid_map);
2484         if (IS_ERR(mcs_compl))
2485                 return PTR_ERR(mcs_compl);
2486
2487         completion_rc = wait_for_completion_interruptible_timeout(
2488                                         &mcs_compl->completion,
2489                                         usecs_to_jiffies(mcs_data->timeout_us));
2490
2491         /* update timestamp */
2492         if (completion_rc > 0)
2493                 mcs_data->timestamp = mcs_compl->timestamp;
2494
2495         hl_wait_multi_cs_completion_fini(mcs_compl);
2496
2497         mcs_data->wait_status = completion_rc;
2498
2499         return 0;
2500 }
2501
2502 /*
2503  * hl_multi_cs_completion_init - init array of multi-CS completion structures
2504  *
2505  * @hdev: pointer to habanalabs device structure
2506  */
2507 void hl_multi_cs_completion_init(struct hl_device *hdev)
2508 {
2509         struct multi_cs_completion *mcs_cmpl;
2510         int i;
2511
2512         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2513                 mcs_cmpl = &hdev->multi_cs_completion[i];
2514                 mcs_cmpl->used = 0;
2515                 spin_lock_init(&mcs_cmpl->lock);
2516                 init_completion(&mcs_cmpl->completion);
2517         }
2518 }
2519
2520 /*
2521  * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2522  *
2523  * @hpriv: pointer to the private data of the fd
2524  * @data: pointer to multi-CS wait ioctl in/out args
2525  *
2526  */
2527 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2528 {
2529         struct hl_device *hdev = hpriv->hdev;
2530         struct multi_cs_data mcs_data = {0};
2531         union hl_wait_cs_args *args = data;
2532         struct hl_ctx *ctx = hpriv->ctx;
2533         struct hl_fence **fence_arr;
2534         void __user *seq_arr;
2535         u32 size_to_copy;
2536         u64 *cs_seq_arr;
2537         u8 seq_arr_len;
2538         int rc;
2539
2540         if (!hdev->supports_wait_for_multi_cs) {
2541                 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
2542                 return -EPERM;
2543         }
2544
2545         seq_arr_len = args->in.seq_arr_len;
2546
2547         if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
2548                 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
2549                                 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
2550                 return -EINVAL;
2551         }
2552
2553         /* allocate memory for sequence array */
2554         cs_seq_arr =
2555                 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
2556         if (!cs_seq_arr)
2557                 return -ENOMEM;
2558
2559         /* copy CS sequence array from user */
2560         seq_arr = (void __user *) (uintptr_t) args->in.seq;
2561         size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
2562         if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
2563                 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
2564                 rc = -EFAULT;
2565                 goto free_seq_arr;
2566         }
2567
2568         /* allocate array for the fences */
2569         fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
2570         if (!fence_arr) {
2571                 rc = -ENOMEM;
2572                 goto free_seq_arr;
2573         }
2574
2575         /* initialize the multi-CS internal data */
2576         mcs_data.ctx = ctx;
2577         mcs_data.seq_arr = cs_seq_arr;
2578         mcs_data.fence_arr = fence_arr;
2579         mcs_data.arr_len = seq_arr_len;
2580
2581         hl_ctx_get(hdev, ctx);
2582
2583         /* poll all CS fences, extract timestamp */
2584         mcs_data.update_ts = true;
2585         rc = hl_cs_poll_fences(&mcs_data);
2586         /*
2587          * skip wait for CS completion when one of the below is true:
2588          * - an error on the poll function
2589          * - one or more CS in the list completed
2590          * - the user called ioctl with timeout 0
2591          */
2592         if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
2593                 goto put_ctx;
2594
2595         /* wait (with timeout) for the first CS to be completed */
2596         mcs_data.timeout_us = args->in.timeout_us;
2597         rc = hl_wait_multi_cs_completion(&mcs_data);
2598         if (rc)
2599                 goto put_ctx;
2600
2601         if (mcs_data.wait_status > 0) {
2602                 /*
2603                  * poll fences once again to update the CS map.
2604                  * no timestamp should be updated this time.
2605                  */
2606                 mcs_data.update_ts = false;
2607                 rc = hl_cs_poll_fences(&mcs_data);
2608
2609                 /*
2610                  * if hl_wait_multi_cs_completion returned before timeout (i.e.
2611                  * it got a completion) we expect to see at least one CS
2612                  * completed after the poll function.
2613                  */
2614                 if (!mcs_data.completion_bitmap) {
2615                         dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
2616                         rc = -EFAULT;
2617                 }
2618         }
2619
2620 put_ctx:
2621         hl_ctx_put(ctx);
2622         kfree(fence_arr);
2623
2624 free_seq_arr:
2625         kfree(cs_seq_arr);
2626
2627         /* update output args */
2628         memset(args, 0, sizeof(*args));
2629         if (rc)
2630                 return rc;
2631
2632         if (mcs_data.completion_bitmap) {
2633                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2634                 args->out.cs_completion_map = mcs_data.completion_bitmap;
2635
2636                 /* if timestamp not 0- it's valid */
2637                 if (mcs_data.timestamp) {
2638                         args->out.timestamp_nsec = mcs_data.timestamp;
2639                         args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2640                 }
2641
2642                 /* update if some CS was gone */
2643                 if (mcs_data.timestamp)
2644                         args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2645         } else if (mcs_data.wait_status == -ERESTARTSYS) {
2646                 args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
2647         } else {
2648                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2649         }
2650
2651         return 0;
2652 }
2653
2654 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2655 {
2656         struct hl_device *hdev = hpriv->hdev;
2657         union hl_wait_cs_args *args = data;
2658         enum hl_cs_wait_status status;
2659         u64 seq = args->in.seq;
2660         s64 timestamp;
2661         int rc;
2662
2663         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
2664                                 &status, &timestamp);
2665
2666         memset(args, 0, sizeof(*args));
2667
2668         if (rc) {
2669                 if (rc == -ERESTARTSYS) {
2670                         dev_err_ratelimited(hdev->dev,
2671                                 "user process got signal while waiting for CS handle %llu\n",
2672                                 seq);
2673                         args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
2674                         rc = -EINTR;
2675                 } else if (rc == -ETIMEDOUT) {
2676                         dev_err_ratelimited(hdev->dev,
2677                                 "CS %llu has timed-out while user process is waiting for it\n",
2678                                 seq);
2679                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
2680                 } else if (rc == -EIO) {
2681                         dev_err_ratelimited(hdev->dev,
2682                                 "CS %llu has been aborted while user process is waiting for it\n",
2683                                 seq);
2684                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
2685                 }
2686                 return rc;
2687         }
2688
2689         if (timestamp) {
2690                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2691                 args->out.timestamp_nsec = timestamp;
2692         }
2693
2694         switch (status) {
2695         case CS_WAIT_STATUS_GONE:
2696                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2697                 fallthrough;
2698         case CS_WAIT_STATUS_COMPLETED:
2699                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2700                 break;
2701         case CS_WAIT_STATUS_BUSY:
2702         default:
2703                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2704                 break;
2705         }
2706
2707         return 0;
2708 }
2709
2710 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2711                                 u32 timeout_us, u64 user_address,
2712                                 u32 target_value, u16 interrupt_offset,
2713                                 enum hl_cs_wait_status *status)
2714 {
2715         struct hl_user_pending_interrupt *pend;
2716         struct hl_user_interrupt *interrupt;
2717         unsigned long timeout;
2718         long completion_rc;
2719         u32 completion_value;
2720         int rc = 0;
2721
2722         if (timeout_us == U32_MAX)
2723                 timeout = timeout_us;
2724         else
2725                 timeout = usecs_to_jiffies(timeout_us);
2726
2727         hl_ctx_get(hdev, ctx);
2728
2729         pend = kmalloc(sizeof(*pend), GFP_KERNEL);
2730         if (!pend) {
2731                 hl_ctx_put(ctx);
2732                 return -ENOMEM;
2733         }
2734
2735         hl_fence_init(&pend->fence, ULONG_MAX);
2736
2737         if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
2738                 interrupt = &hdev->common_user_interrupt;
2739         else
2740                 interrupt = &hdev->user_interrupt[interrupt_offset];
2741
2742         spin_lock(&interrupt->wait_list_lock);
2743         if (!hl_device_operational(hdev, NULL)) {
2744                 rc = -EPERM;
2745                 goto unlock_and_free_fence;
2746         }
2747
2748         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address),
2749                                                                         4)) {
2750                 dev_err(hdev->dev,
2751                         "Failed to copy completion value from user\n");
2752                 rc = -EFAULT;
2753                 goto unlock_and_free_fence;
2754         }
2755
2756         if (completion_value >= target_value)
2757                 *status = CS_WAIT_STATUS_COMPLETED;
2758         else
2759                 *status = CS_WAIT_STATUS_BUSY;
2760
2761         if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
2762                 goto unlock_and_free_fence;
2763
2764         /* Add pending user interrupt to relevant list for the interrupt
2765          * handler to monitor
2766          */
2767         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2768         spin_unlock(&interrupt->wait_list_lock);
2769
2770 wait_again:
2771         /* Wait for interrupt handler to signal completion */
2772         completion_rc =
2773                 wait_for_completion_interruptible_timeout(
2774                                 &pend->fence.completion, timeout);
2775
2776         /* If timeout did not expire we need to perform the comparison.
2777          * If comparison fails, keep waiting until timeout expires
2778          */
2779         if (completion_rc > 0) {
2780                 spin_lock(&interrupt->wait_list_lock);
2781
2782                 if (copy_from_user(&completion_value,
2783                                 u64_to_user_ptr(user_address), 4)) {
2784
2785                         spin_unlock(&interrupt->wait_list_lock);
2786
2787                         dev_err(hdev->dev,
2788                                 "Failed to copy completion value from user\n");
2789                         rc = -EFAULT;
2790
2791                         goto remove_pending_user_interrupt;
2792                 }
2793
2794                 if (completion_value >= target_value) {
2795                         spin_unlock(&interrupt->wait_list_lock);
2796                         *status = CS_WAIT_STATUS_COMPLETED;
2797                 } else {
2798                         reinit_completion(&pend->fence.completion);
2799                         timeout = completion_rc;
2800
2801                         spin_unlock(&interrupt->wait_list_lock);
2802                         goto wait_again;
2803                 }
2804         } else if (completion_rc == -ERESTARTSYS) {
2805                 dev_err_ratelimited(hdev->dev,
2806                         "user process got signal while waiting for interrupt ID %d\n",
2807                         interrupt->interrupt_id);
2808                 *status = HL_WAIT_CS_STATUS_INTERRUPTED;
2809                 rc = -EINTR;
2810         } else {
2811                 *status = CS_WAIT_STATUS_BUSY;
2812         }
2813
2814 remove_pending_user_interrupt:
2815         spin_lock(&interrupt->wait_list_lock);
2816         list_del(&pend->wait_list_node);
2817
2818 unlock_and_free_fence:
2819         spin_unlock(&interrupt->wait_list_lock);
2820         kfree(pend);
2821         hl_ctx_put(ctx);
2822
2823         return rc;
2824 }
2825
2826 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2827 {
2828         u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
2829         struct hl_device *hdev = hpriv->hdev;
2830         struct asic_fixed_properties *prop;
2831         union hl_wait_cs_args *args = data;
2832         enum hl_cs_wait_status status;
2833         int rc;
2834
2835         prop = &hdev->asic_prop;
2836
2837         if (!prop->user_interrupt_count) {
2838                 dev_err(hdev->dev, "no user interrupts allowed");
2839                 return -EPERM;
2840         }
2841
2842         interrupt_id =
2843                 FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
2844
2845         first_interrupt = prop->first_available_user_msix_interrupt;
2846         last_interrupt = prop->first_available_user_msix_interrupt +
2847                                                 prop->user_interrupt_count - 1;
2848
2849         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
2850                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
2851                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
2852                 return -EINVAL;
2853         }
2854
2855         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
2856                 interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
2857         else
2858                 interrupt_offset = interrupt_id - first_interrupt;
2859
2860         rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
2861                                 args->in.interrupt_timeout_us, args->in.addr,
2862                                 args->in.target, interrupt_offset, &status);
2863
2864         memset(args, 0, sizeof(*args));
2865
2866         if (rc) {
2867                 if (rc != -EINTR)
2868                         dev_err_ratelimited(hdev->dev,
2869                                 "interrupt_wait_ioctl failed (%d)\n", rc);
2870
2871                 return rc;
2872         }
2873
2874         switch (status) {
2875         case CS_WAIT_STATUS_COMPLETED:
2876                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2877                 break;
2878         case CS_WAIT_STATUS_BUSY:
2879         default:
2880                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2881                 break;
2882         }
2883
2884         return 0;
2885 }
2886
2887 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2888 {
2889         union hl_wait_cs_args *args = data;
2890         u32 flags = args->in.flags;
2891         int rc;
2892
2893         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
2894                 rc = hl_interrupt_wait_ioctl(hpriv, data);
2895         else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
2896                 rc = hl_multi_cs_wait_ioctl(hpriv, data);
2897         else
2898                 rc = hl_cs_wait_ioctl(hpriv, data);
2899
2900         return rc;
2901 }