Merge tag 'drm-next-2021-09-10' of git://anongit.freedesktop.org/drm/drm
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / common / command_submission.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
10
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
13
14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
16
17 /**
18  * enum hl_cs_wait_status - cs wait status
19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
20  * @CS_WAIT_STATUS_COMPLETED: cs completed
21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
22  */
23 enum hl_cs_wait_status {
24         CS_WAIT_STATUS_BUSY,
25         CS_WAIT_STATUS_COMPLETED,
26         CS_WAIT_STATUS_GONE
27 };
28
29 static void job_wq_completion(struct work_struct *work);
30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
31                                 u64 timeout_us, u64 seq,
32                                 enum hl_cs_wait_status *status, s64 *timestamp);
33 static void cs_do_release(struct kref *ref);
34
35 static void hl_sob_reset(struct kref *ref)
36 {
37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
38                                                         kref);
39         struct hl_device *hdev = hw_sob->hdev;
40
41         hdev->asic_funcs->reset_sob(hdev, hw_sob);
42 }
43
44 void hl_sob_reset_error(struct kref *ref)
45 {
46         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
47                                                         kref);
48         struct hl_device *hdev = hw_sob->hdev;
49
50         dev_crit(hdev->dev,
51                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
52                 hw_sob->q_idx, hw_sob->sob_id);
53 }
54
55 /**
56  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
57  * @sob_base: sob base id
58  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
59  * @mask: generated mask
60  *
61  * Return: 0 if given parameters are valid
62  */
63 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
64 {
65         int i;
66
67         if (sob_mask == 0)
68                 return -EINVAL;
69
70         if (sob_mask == 0x1) {
71                 *mask = ~(1 << (sob_base & 0x7));
72         } else {
73                 /* find msb in order to verify sob range is valid */
74                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
75                         if (BIT(i) & sob_mask)
76                                 break;
77
78                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
79                         return -EINVAL;
80
81                 *mask = ~sob_mask;
82         }
83
84         return 0;
85 }
86
87 static void sob_reset_work(struct work_struct *work)
88 {
89         struct hl_cs_compl *hl_cs_cmpl =
90                 container_of(work, struct hl_cs_compl, sob_reset_work);
91         struct hl_device *hdev = hl_cs_cmpl->hdev;
92
93         /*
94          * A signal CS can get completion while the corresponding wait
95          * for signal CS is on its way to the PQ. The wait for signal CS
96          * will get stuck if the signal CS incremented the SOB to its
97          * max value and there are no pending (submitted) waits on this
98          * SOB.
99          * We do the following to void this situation:
100          * 1. The wait for signal CS must get a ref for the signal CS as
101          *    soon as possible in cs_ioctl_signal_wait() and put it
102          *    before being submitted to the PQ but after it incremented
103          *    the SOB refcnt in init_signal_wait_cs().
104          * 2. Signal/Wait for signal CS will decrement the SOB refcnt
105          *    here.
106          * These two measures guarantee that the wait for signal CS will
107          * reset the SOB upon completion rather than the signal CS and
108          * hence the above scenario is avoided.
109          */
110         kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
111
112         if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
113                 hdev->asic_funcs->reset_sob_group(hdev,
114                                 hl_cs_cmpl->sob_group);
115
116         kfree(hl_cs_cmpl);
117 }
118
119 static void hl_fence_release(struct kref *kref)
120 {
121         struct hl_fence *fence =
122                 container_of(kref, struct hl_fence, refcount);
123         struct hl_cs_compl *hl_cs_cmpl =
124                 container_of(fence, struct hl_cs_compl, base_fence);
125         struct hl_device *hdev = hl_cs_cmpl->hdev;
126
127         /* EBUSY means the CS was never submitted and hence we don't have
128          * an attached hw_sob object that we should handle here
129          */
130         if (fence->error == -EBUSY)
131                 goto free;
132
133         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
134                 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
135                 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
136
137                 dev_dbg(hdev->dev,
138                         "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
139                         hl_cs_cmpl->cs_seq,
140                         hl_cs_cmpl->type,
141                         hl_cs_cmpl->hw_sob->sob_id,
142                         hl_cs_cmpl->sob_val);
143
144                 queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
145
146                 return;
147         }
148
149 free:
150         kfree(hl_cs_cmpl);
151 }
152
153 void hl_fence_put(struct hl_fence *fence)
154 {
155         if (fence)
156                 kref_put(&fence->refcount, hl_fence_release);
157 }
158
159 void hl_fence_get(struct hl_fence *fence)
160 {
161         if (fence)
162                 kref_get(&fence->refcount);
163 }
164
165 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
166 {
167         kref_init(&fence->refcount);
168         fence->cs_sequence = sequence;
169         fence->error = 0;
170         fence->timestamp = ktime_set(0, 0);
171         init_completion(&fence->completion);
172 }
173
174 void cs_get(struct hl_cs *cs)
175 {
176         kref_get(&cs->refcount);
177 }
178
179 static int cs_get_unless_zero(struct hl_cs *cs)
180 {
181         return kref_get_unless_zero(&cs->refcount);
182 }
183
184 static void cs_put(struct hl_cs *cs)
185 {
186         kref_put(&cs->refcount, cs_do_release);
187 }
188
189 static void cs_job_do_release(struct kref *ref)
190 {
191         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
192
193         kfree(job);
194 }
195
196 static void cs_job_put(struct hl_cs_job *job)
197 {
198         kref_put(&job->refcount, cs_job_do_release);
199 }
200
201 bool cs_needs_completion(struct hl_cs *cs)
202 {
203         /* In case this is a staged CS, only the last CS in sequence should
204          * get a completion, any non staged CS will always get a completion
205          */
206         if (cs->staged_cs && !cs->staged_last)
207                 return false;
208
209         return true;
210 }
211
212 bool cs_needs_timeout(struct hl_cs *cs)
213 {
214         /* In case this is a staged CS, only the first CS in sequence should
215          * get a timeout, any non staged CS will always get a timeout
216          */
217         if (cs->staged_cs && !cs->staged_first)
218                 return false;
219
220         return true;
221 }
222
223 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
224 {
225         /*
226          * Patched CB is created for external queues jobs, and for H/W queues
227          * jobs if the user CB was allocated by driver and MMU is disabled.
228          */
229         return (job->queue_type == QUEUE_TYPE_EXT ||
230                         (job->queue_type == QUEUE_TYPE_HW &&
231                                         job->is_kernel_allocated_cb &&
232                                         !hdev->mmu_enable));
233 }
234
235 /*
236  * cs_parser - parse the user command submission
237  *
238  * @hpriv       : pointer to the private data of the fd
239  * @job        : pointer to the job that holds the command submission info
240  *
241  * The function parses the command submission of the user. It calls the
242  * ASIC specific parser, which returns a list of memory blocks to send
243  * to the device as different command buffers
244  *
245  */
246 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
247 {
248         struct hl_device *hdev = hpriv->hdev;
249         struct hl_cs_parser parser;
250         int rc;
251
252         parser.ctx_id = job->cs->ctx->asid;
253         parser.cs_sequence = job->cs->sequence;
254         parser.job_id = job->id;
255
256         parser.hw_queue_id = job->hw_queue_id;
257         parser.job_userptr_list = &job->userptr_list;
258         parser.patched_cb = NULL;
259         parser.user_cb = job->user_cb;
260         parser.user_cb_size = job->user_cb_size;
261         parser.queue_type = job->queue_type;
262         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
263         job->patched_cb = NULL;
264         parser.completion = cs_needs_completion(job->cs);
265
266         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
267
268         if (is_cb_patched(hdev, job)) {
269                 if (!rc) {
270                         job->patched_cb = parser.patched_cb;
271                         job->job_cb_size = parser.patched_cb_size;
272                         job->contains_dma_pkt = parser.contains_dma_pkt;
273                         atomic_inc(&job->patched_cb->cs_cnt);
274                 }
275
276                 /*
277                  * Whether the parsing worked or not, we don't need the
278                  * original CB anymore because it was already parsed and
279                  * won't be accessed again for this CS
280                  */
281                 atomic_dec(&job->user_cb->cs_cnt);
282                 hl_cb_put(job->user_cb);
283                 job->user_cb = NULL;
284         } else if (!rc) {
285                 job->job_cb_size = job->user_cb_size;
286         }
287
288         return rc;
289 }
290
291 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
292 {
293         struct hl_cs *cs = job->cs;
294
295         if (is_cb_patched(hdev, job)) {
296                 hl_userptr_delete_list(hdev, &job->userptr_list);
297
298                 /*
299                  * We might arrive here from rollback and patched CB wasn't
300                  * created, so we need to check it's not NULL
301                  */
302                 if (job->patched_cb) {
303                         atomic_dec(&job->patched_cb->cs_cnt);
304                         hl_cb_put(job->patched_cb);
305                 }
306         }
307
308         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
309          * enabled, the user CB isn't released in cs_parser() and thus should be
310          * released here.
311          * This is also true for INT queues jobs which were allocated by driver
312          */
313         if (job->is_kernel_allocated_cb &&
314                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
315                                 job->queue_type == QUEUE_TYPE_INT)) {
316                 atomic_dec(&job->user_cb->cs_cnt);
317                 hl_cb_put(job->user_cb);
318         }
319
320         /*
321          * This is the only place where there can be multiple threads
322          * modifying the list at the same time
323          */
324         spin_lock(&cs->job_lock);
325         list_del(&job->cs_node);
326         spin_unlock(&cs->job_lock);
327
328         hl_debugfs_remove_job(hdev, job);
329
330         /* We decrement reference only for a CS that gets completion
331          * because the reference was incremented only for this kind of CS
332          * right before it was scheduled.
333          *
334          * In staged submission, only the last CS marked as 'staged_last'
335          * gets completion, hence its release function will be called from here.
336          * As for all the rest CS's in the staged submission which do not get
337          * completion, their CS reference will be decremented by the
338          * 'staged_last' CS during the CS release flow.
339          * All relevant PQ CI counters will be incremented during the CS release
340          * flow by calling 'hl_hw_queue_update_ci'.
341          */
342         if (cs_needs_completion(cs) &&
343                 (job->queue_type == QUEUE_TYPE_EXT ||
344                         job->queue_type == QUEUE_TYPE_HW))
345                 cs_put(cs);
346
347         cs_job_put(job);
348 }
349
350 /*
351  * hl_staged_cs_find_first - locate the first CS in this staged submission
352  *
353  * @hdev: pointer to device structure
354  * @cs_seq: staged submission sequence number
355  *
356  * @note: This function must be called under 'hdev->cs_mirror_lock'
357  *
358  * Find and return a CS pointer with the given sequence
359  */
360 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
361 {
362         struct hl_cs *cs;
363
364         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
365                 if (cs->staged_cs && cs->staged_first &&
366                                 cs->sequence == cs_seq)
367                         return cs;
368
369         return NULL;
370 }
371
372 /*
373  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
374  *
375  * @hdev: pointer to device structure
376  * @cs: staged submission member
377  *
378  */
379 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
380 {
381         struct hl_cs *last_entry;
382
383         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
384                                                                 staged_cs_node);
385
386         if (last_entry->staged_last)
387                 return true;
388
389         return false;
390 }
391
392 /*
393  * staged_cs_get - get CS reference if this CS is a part of a staged CS
394  *
395  * @hdev: pointer to device structure
396  * @cs: current CS
397  * @cs_seq: staged submission sequence number
398  *
399  * Increment CS reference for every CS in this staged submission except for
400  * the CS which get completion.
401  */
402 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
403 {
404         /* Only the last CS in this staged submission will get a completion.
405          * We must increment the reference for all other CS's in this
406          * staged submission.
407          * Once we get a completion we will release the whole staged submission.
408          */
409         if (!cs->staged_last)
410                 cs_get(cs);
411 }
412
413 /*
414  * staged_cs_put - put a CS in case it is part of staged submission
415  *
416  * @hdev: pointer to device structure
417  * @cs: CS to put
418  *
419  * This function decrements a CS reference (for a non completion CS)
420  */
421 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
422 {
423         /* We release all CS's in a staged submission except the last
424          * CS which we have never incremented its reference.
425          */
426         if (!cs_needs_completion(cs))
427                 cs_put(cs);
428 }
429
430 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
431 {
432         bool next_entry_found = false;
433         struct hl_cs *next;
434
435         if (!cs_needs_timeout(cs))
436                 return;
437
438         spin_lock(&hdev->cs_mirror_lock);
439
440         /* We need to handle tdr only once for the complete staged submission.
441          * Hence, we choose the CS that reaches this function first which is
442          * the CS marked as 'staged_last'.
443          */
444         if (cs->staged_cs && cs->staged_last)
445                 cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
446
447         spin_unlock(&hdev->cs_mirror_lock);
448
449         /* Don't cancel TDR in case this CS was timedout because we might be
450          * running from the TDR context
451          */
452         if (cs && (cs->timedout ||
453                         hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
454                 return;
455
456         if (cs && cs->tdr_active)
457                 cancel_delayed_work_sync(&cs->work_tdr);
458
459         spin_lock(&hdev->cs_mirror_lock);
460
461         /* queue TDR for next CS */
462         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
463                 if (cs_needs_timeout(next)) {
464                         next_entry_found = true;
465                         break;
466                 }
467
468         if (next_entry_found && !next->tdr_active) {
469                 next->tdr_active = true;
470                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
471         }
472
473         spin_unlock(&hdev->cs_mirror_lock);
474 }
475
476 static void cs_do_release(struct kref *ref)
477 {
478         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
479         struct hl_device *hdev = cs->ctx->hdev;
480         struct hl_cs_job *job, *tmp;
481
482         cs->completed = true;
483
484         /*
485          * Although if we reached here it means that all external jobs have
486          * finished, because each one of them took refcnt to CS, we still
487          * need to go over the internal jobs and complete them. Otherwise, we
488          * will have leaked memory and what's worse, the CS object (and
489          * potentially the CTX object) could be released, while the JOB
490          * still holds a pointer to them (but no reference).
491          */
492         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
493                 complete_job(hdev, job);
494
495         if (!cs->submitted) {
496                 /* In case the wait for signal CS was submitted, the put occurs
497                  * in init_signal_wait_cs() or collective_wait_init_cs()
498                  * right before hanging on the PQ.
499                  */
500                 if (cs->type == CS_TYPE_WAIT ||
501                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
502                         hl_fence_put(cs->signal_fence);
503
504                 goto out;
505         }
506
507         /* Need to update CI for all queue jobs that does not get completion */
508         hl_hw_queue_update_ci(cs);
509
510         /* remove CS from CS mirror list */
511         spin_lock(&hdev->cs_mirror_lock);
512         list_del_init(&cs->mirror_node);
513         spin_unlock(&hdev->cs_mirror_lock);
514
515         cs_handle_tdr(hdev, cs);
516
517         if (cs->staged_cs) {
518                 /* the completion CS decrements reference for the entire
519                  * staged submission
520                  */
521                 if (cs->staged_last) {
522                         struct hl_cs *staged_cs, *tmp;
523
524                         list_for_each_entry_safe(staged_cs, tmp,
525                                         &cs->staged_cs_node, staged_cs_node)
526                                 staged_cs_put(hdev, staged_cs);
527                 }
528
529                 /* A staged CS will be a member in the list only after it
530                  * was submitted. We used 'cs_mirror_lock' when inserting
531                  * it to list so we will use it again when removing it
532                  */
533                 if (cs->submitted) {
534                         spin_lock(&hdev->cs_mirror_lock);
535                         list_del(&cs->staged_cs_node);
536                         spin_unlock(&hdev->cs_mirror_lock);
537                 }
538         }
539
540 out:
541         /* Must be called before hl_ctx_put because inside we use ctx to get
542          * the device
543          */
544         hl_debugfs_remove_cs(cs);
545
546         hl_ctx_put(cs->ctx);
547
548         /* We need to mark an error for not submitted because in that case
549          * the hl fence release flow is different. Mainly, we don't need
550          * to handle hw_sob for signal/wait
551          */
552         if (cs->timedout)
553                 cs->fence->error = -ETIMEDOUT;
554         else if (cs->aborted)
555                 cs->fence->error = -EIO;
556         else if (!cs->submitted)
557                 cs->fence->error = -EBUSY;
558
559         if (unlikely(cs->skip_reset_on_timeout)) {
560                 dev_err(hdev->dev,
561                         "Command submission %llu completed after %llu (s)\n",
562                         cs->sequence,
563                         div_u64(jiffies - cs->submission_time_jiffies, HZ));
564         }
565
566         if (cs->timestamp)
567                 cs->fence->timestamp = ktime_get();
568         complete_all(&cs->fence->completion);
569         hl_fence_put(cs->fence);
570
571         kfree(cs->jobs_in_queue_cnt);
572         kfree(cs);
573 }
574
575 static void cs_timedout(struct work_struct *work)
576 {
577         struct hl_device *hdev;
578         int rc;
579         struct hl_cs *cs = container_of(work, struct hl_cs,
580                                                  work_tdr.work);
581         bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
582
583         rc = cs_get_unless_zero(cs);
584         if (!rc)
585                 return;
586
587         if ((!cs->submitted) || (cs->completed)) {
588                 cs_put(cs);
589                 return;
590         }
591
592         /* Mark the CS is timed out so we won't try to cancel its TDR */
593         if (likely(!skip_reset_on_timeout))
594                 cs->timedout = true;
595
596         hdev = cs->ctx->hdev;
597
598         switch (cs->type) {
599         case CS_TYPE_SIGNAL:
600                 dev_err(hdev->dev,
601                         "Signal command submission %llu has not finished in time!\n",
602                         cs->sequence);
603                 break;
604
605         case CS_TYPE_WAIT:
606                 dev_err(hdev->dev,
607                         "Wait command submission %llu has not finished in time!\n",
608                         cs->sequence);
609                 break;
610
611         case CS_TYPE_COLLECTIVE_WAIT:
612                 dev_err(hdev->dev,
613                         "Collective Wait command submission %llu has not finished in time!\n",
614                         cs->sequence);
615                 break;
616
617         default:
618                 dev_err(hdev->dev,
619                         "Command submission %llu has not finished in time!\n",
620                         cs->sequence);
621                 break;
622         }
623
624         cs_put(cs);
625
626         if (likely(!skip_reset_on_timeout)) {
627                 if (hdev->reset_on_lockup)
628                         hl_device_reset(hdev, HL_RESET_TDR);
629                 else
630                         hdev->needs_reset = true;
631         }
632 }
633
634 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
635                         enum hl_cs_type cs_type, u64 user_sequence,
636                         struct hl_cs **cs_new, u32 flags, u32 timeout)
637 {
638         struct hl_cs_counters_atomic *cntr;
639         struct hl_fence *other = NULL;
640         struct hl_cs_compl *cs_cmpl;
641         struct hl_cs *cs;
642         int rc;
643
644         cntr = &hdev->aggregated_cs_counters;
645
646         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
647         if (!cs)
648                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
649
650         if (!cs) {
651                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
652                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
653                 return -ENOMEM;
654         }
655
656         /* increment refcnt for context */
657         hl_ctx_get(hdev, ctx);
658
659         cs->ctx = ctx;
660         cs->submitted = false;
661         cs->completed = false;
662         cs->type = cs_type;
663         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
664         cs->timeout_jiffies = timeout;
665         cs->skip_reset_on_timeout =
666                 hdev->skip_reset_on_timeout ||
667                 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
668         cs->submission_time_jiffies = jiffies;
669         INIT_LIST_HEAD(&cs->job_list);
670         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
671         kref_init(&cs->refcount);
672         spin_lock_init(&cs->job_lock);
673
674         cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
675         if (!cs_cmpl)
676                 cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
677
678         if (!cs_cmpl) {
679                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
680                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
681                 rc = -ENOMEM;
682                 goto free_cs;
683         }
684
685         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
686                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
687         if (!cs->jobs_in_queue_cnt)
688                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
689                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
690
691         if (!cs->jobs_in_queue_cnt) {
692                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
693                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
694                 rc = -ENOMEM;
695                 goto free_cs_cmpl;
696         }
697
698         cs_cmpl->hdev = hdev;
699         cs_cmpl->type = cs->type;
700         spin_lock_init(&cs_cmpl->lock);
701         INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
702         cs->fence = &cs_cmpl->base_fence;
703
704         spin_lock(&ctx->cs_lock);
705
706         cs_cmpl->cs_seq = ctx->cs_sequence;
707         other = ctx->cs_pending[cs_cmpl->cs_seq &
708                                 (hdev->asic_prop.max_pending_cs - 1)];
709
710         if (other && !completion_done(&other->completion)) {
711                 /* If the following statement is true, it means we have reached
712                  * a point in which only part of the staged submission was
713                  * submitted and we don't have enough room in the 'cs_pending'
714                  * array for the rest of the submission.
715                  * This causes a deadlock because this CS will never be
716                  * completed as it depends on future CS's for completion.
717                  */
718                 if (other->cs_sequence == user_sequence)
719                         dev_crit_ratelimited(hdev->dev,
720                                 "Staged CS %llu deadlock due to lack of resources",
721                                 user_sequence);
722
723                 dev_dbg_ratelimited(hdev->dev,
724                         "Rejecting CS because of too many in-flights CS\n");
725                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
726                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
727                 rc = -EAGAIN;
728                 goto free_fence;
729         }
730
731         /* init hl_fence */
732         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
733
734         cs->sequence = cs_cmpl->cs_seq;
735
736         ctx->cs_pending[cs_cmpl->cs_seq &
737                         (hdev->asic_prop.max_pending_cs - 1)] =
738                                                         &cs_cmpl->base_fence;
739         ctx->cs_sequence++;
740
741         hl_fence_get(&cs_cmpl->base_fence);
742
743         hl_fence_put(other);
744
745         spin_unlock(&ctx->cs_lock);
746
747         *cs_new = cs;
748
749         return 0;
750
751 free_fence:
752         spin_unlock(&ctx->cs_lock);
753         kfree(cs->jobs_in_queue_cnt);
754 free_cs_cmpl:
755         kfree(cs_cmpl);
756 free_cs:
757         kfree(cs);
758         hl_ctx_put(ctx);
759         return rc;
760 }
761
762 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
763 {
764         struct hl_cs_job *job, *tmp;
765
766         staged_cs_put(hdev, cs);
767
768         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
769                 complete_job(hdev, job);
770 }
771
772 void hl_cs_rollback_all(struct hl_device *hdev)
773 {
774         int i;
775         struct hl_cs *cs, *tmp;
776
777         flush_workqueue(hdev->sob_reset_wq);
778
779         /* flush all completions before iterating over the CS mirror list in
780          * order to avoid a race with the release functions
781          */
782         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
783                 flush_workqueue(hdev->cq_wq[i]);
784
785         /* Make sure we don't have leftovers in the CS mirror list */
786         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
787                 cs_get(cs);
788                 cs->aborted = true;
789                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
790                                 cs->ctx->asid, cs->sequence);
791                 cs_rollback(hdev, cs);
792                 cs_put(cs);
793         }
794 }
795
796 void hl_pending_cb_list_flush(struct hl_ctx *ctx)
797 {
798         struct hl_pending_cb *pending_cb, *tmp;
799
800         list_for_each_entry_safe(pending_cb, tmp,
801                         &ctx->pending_cb_list, cb_node) {
802                 list_del(&pending_cb->cb_node);
803                 hl_cb_put(pending_cb->cb);
804                 kfree(pending_cb);
805         }
806 }
807
808 static void
809 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
810 {
811         struct hl_user_pending_interrupt *pend;
812
813         spin_lock(&interrupt->wait_list_lock);
814         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
815                 pend->fence.error = -EIO;
816                 complete_all(&pend->fence.completion);
817         }
818         spin_unlock(&interrupt->wait_list_lock);
819 }
820
821 void hl_release_pending_user_interrupts(struct hl_device *hdev)
822 {
823         struct asic_fixed_properties *prop = &hdev->asic_prop;
824         struct hl_user_interrupt *interrupt;
825         int i;
826
827         if (!prop->user_interrupt_count)
828                 return;
829
830         /* We iterate through the user interrupt requests and waking up all
831          * user threads waiting for interrupt completion. We iterate the
832          * list under a lock, this is why all user threads, once awake,
833          * will wait on the same lock and will release the waiting object upon
834          * unlock.
835          */
836
837         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
838                 interrupt = &hdev->user_interrupt[i];
839                 wake_pending_user_interrupt_threads(interrupt);
840         }
841
842         interrupt = &hdev->common_user_interrupt;
843         wake_pending_user_interrupt_threads(interrupt);
844 }
845
846 static void job_wq_completion(struct work_struct *work)
847 {
848         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
849                                                 finish_work);
850         struct hl_cs *cs = job->cs;
851         struct hl_device *hdev = cs->ctx->hdev;
852
853         /* job is no longer needed */
854         complete_job(hdev, job);
855 }
856
857 static int validate_queue_index(struct hl_device *hdev,
858                                 struct hl_cs_chunk *chunk,
859                                 enum hl_queue_type *queue_type,
860                                 bool *is_kernel_allocated_cb)
861 {
862         struct asic_fixed_properties *asic = &hdev->asic_prop;
863         struct hw_queue_properties *hw_queue_prop;
864
865         /* This must be checked here to prevent out-of-bounds access to
866          * hw_queues_props array
867          */
868         if (chunk->queue_index >= asic->max_queues) {
869                 dev_err(hdev->dev, "Queue index %d is invalid\n",
870                         chunk->queue_index);
871                 return -EINVAL;
872         }
873
874         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
875
876         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
877                 dev_err(hdev->dev, "Queue index %d is invalid\n",
878                         chunk->queue_index);
879                 return -EINVAL;
880         }
881
882         if (hw_queue_prop->driver_only) {
883                 dev_err(hdev->dev,
884                         "Queue index %d is restricted for the kernel driver\n",
885                         chunk->queue_index);
886                 return -EINVAL;
887         }
888
889         /* When hw queue type isn't QUEUE_TYPE_HW,
890          * USER_ALLOC_CB flag shall be referred as "don't care".
891          */
892         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
893                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
894                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
895                                 dev_err(hdev->dev,
896                                         "Queue index %d doesn't support user CB\n",
897                                         chunk->queue_index);
898                                 return -EINVAL;
899                         }
900
901                         *is_kernel_allocated_cb = false;
902                 } else {
903                         if (!(hw_queue_prop->cb_alloc_flags &
904                                         CB_ALLOC_KERNEL)) {
905                                 dev_err(hdev->dev,
906                                         "Queue index %d doesn't support kernel CB\n",
907                                         chunk->queue_index);
908                                 return -EINVAL;
909                         }
910
911                         *is_kernel_allocated_cb = true;
912                 }
913         } else {
914                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
915                                                 & CB_ALLOC_KERNEL);
916         }
917
918         *queue_type = hw_queue_prop->type;
919         return 0;
920 }
921
922 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
923                                         struct hl_cb_mgr *cb_mgr,
924                                         struct hl_cs_chunk *chunk)
925 {
926         struct hl_cb *cb;
927         u32 cb_handle;
928
929         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
930
931         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
932         if (!cb) {
933                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
934                 return NULL;
935         }
936
937         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
938                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
939                 goto release_cb;
940         }
941
942         atomic_inc(&cb->cs_cnt);
943
944         return cb;
945
946 release_cb:
947         hl_cb_put(cb);
948         return NULL;
949 }
950
951 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
952                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
953 {
954         struct hl_cs_job *job;
955
956         job = kzalloc(sizeof(*job), GFP_ATOMIC);
957         if (!job)
958                 job = kzalloc(sizeof(*job), GFP_KERNEL);
959
960         if (!job)
961                 return NULL;
962
963         kref_init(&job->refcount);
964         job->queue_type = queue_type;
965         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
966
967         if (is_cb_patched(hdev, job))
968                 INIT_LIST_HEAD(&job->userptr_list);
969
970         if (job->queue_type == QUEUE_TYPE_EXT)
971                 INIT_WORK(&job->finish_work, job_wq_completion);
972
973         return job;
974 }
975
976 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
977 {
978         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
979                 return CS_TYPE_SIGNAL;
980         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
981                 return CS_TYPE_WAIT;
982         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
983                 return CS_TYPE_COLLECTIVE_WAIT;
984         else
985                 return CS_TYPE_DEFAULT;
986 }
987
988 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
989 {
990         struct hl_device *hdev = hpriv->hdev;
991         struct hl_ctx *ctx = hpriv->ctx;
992         u32 cs_type_flags, num_chunks;
993         enum hl_device_status status;
994         enum hl_cs_type cs_type;
995
996         if (!hl_device_operational(hdev, &status)) {
997                 dev_warn_ratelimited(hdev->dev,
998                         "Device is %s. Can't submit new CS\n",
999                         hdev->status[status]);
1000                 return -EBUSY;
1001         }
1002
1003         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1004                         !hdev->supports_staged_submission) {
1005                 dev_err(hdev->dev, "staged submission not supported");
1006                 return -EPERM;
1007         }
1008
1009         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1010
1011         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1012                 dev_err(hdev->dev,
1013                         "CS type flags are mutually exclusive, context %d\n",
1014                         ctx->asid);
1015                 return -EINVAL;
1016         }
1017
1018         cs_type = hl_cs_get_cs_type(cs_type_flags);
1019         num_chunks = args->in.num_chunks_execute;
1020
1021         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1022                                         !hdev->supports_sync_stream)) {
1023                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1024                 return -EINVAL;
1025         }
1026
1027         if (cs_type == CS_TYPE_DEFAULT) {
1028                 if (!num_chunks) {
1029                         dev_err(hdev->dev,
1030                                 "Got execute CS with 0 chunks, context %d\n",
1031                                 ctx->asid);
1032                         return -EINVAL;
1033                 }
1034         } else if (num_chunks != 1) {
1035                 dev_err(hdev->dev,
1036                         "Sync stream CS mandates one chunk only, context %d\n",
1037                         ctx->asid);
1038                 return -EINVAL;
1039         }
1040
1041         return 0;
1042 }
1043
1044 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1045                                         struct hl_cs_chunk **cs_chunk_array,
1046                                         void __user *chunks, u32 num_chunks,
1047                                         struct hl_ctx *ctx)
1048 {
1049         u32 size_to_copy;
1050
1051         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1052                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1053                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1054                 dev_err(hdev->dev,
1055                         "Number of chunks can NOT be larger than %d\n",
1056                         HL_MAX_JOBS_PER_CS);
1057                 return -EINVAL;
1058         }
1059
1060         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1061                                         GFP_ATOMIC);
1062         if (!*cs_chunk_array)
1063                 *cs_chunk_array = kmalloc_array(num_chunks,
1064                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1065         if (!*cs_chunk_array) {
1066                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1067                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1068                 return -ENOMEM;
1069         }
1070
1071         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1072         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1073                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1074                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1075                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1076                 kfree(*cs_chunk_array);
1077                 return -EFAULT;
1078         }
1079
1080         return 0;
1081 }
1082
1083 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1084                                 u64 sequence, u32 flags)
1085 {
1086         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1087                 return 0;
1088
1089         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1090         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1091
1092         if (cs->staged_first) {
1093                 /* Staged CS sequence is the first CS sequence */
1094                 INIT_LIST_HEAD(&cs->staged_cs_node);
1095                 cs->staged_sequence = cs->sequence;
1096         } else {
1097                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1098                  * under the cs_mirror_lock
1099                  */
1100                 cs->staged_sequence = sequence;
1101         }
1102
1103         /* Increment CS reference if needed */
1104         staged_cs_get(hdev, cs);
1105
1106         cs->staged_cs = true;
1107
1108         return 0;
1109 }
1110
1111 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1112                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1113                                 u32 timeout)
1114 {
1115         bool staged_mid, int_queues_only = true;
1116         struct hl_device *hdev = hpriv->hdev;
1117         struct hl_cs_chunk *cs_chunk_array;
1118         struct hl_cs_counters_atomic *cntr;
1119         struct hl_ctx *ctx = hpriv->ctx;
1120         struct hl_cs_job *job;
1121         struct hl_cs *cs;
1122         struct hl_cb *cb;
1123         u64 user_sequence;
1124         int rc, i;
1125
1126         cntr = &hdev->aggregated_cs_counters;
1127         user_sequence = *cs_seq;
1128         *cs_seq = ULLONG_MAX;
1129
1130         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1131                         hpriv->ctx);
1132         if (rc)
1133                 goto out;
1134
1135         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1136                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1137                 staged_mid = true;
1138         else
1139                 staged_mid = false;
1140
1141         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1142                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1143                         timeout);
1144         if (rc)
1145                 goto free_cs_chunk_array;
1146
1147         *cs_seq = cs->sequence;
1148
1149         hl_debugfs_add_cs(cs);
1150
1151         rc = cs_staged_submission(hdev, cs, user_sequence, flags);
1152         if (rc)
1153                 goto free_cs_object;
1154
1155         /* Validate ALL the CS chunks before submitting the CS */
1156         for (i = 0 ; i < num_chunks ; i++) {
1157                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1158                 enum hl_queue_type queue_type;
1159                 bool is_kernel_allocated_cb;
1160
1161                 rc = validate_queue_index(hdev, chunk, &queue_type,
1162                                                 &is_kernel_allocated_cb);
1163                 if (rc) {
1164                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1165                         atomic64_inc(&cntr->validation_drop_cnt);
1166                         goto free_cs_object;
1167                 }
1168
1169                 if (is_kernel_allocated_cb) {
1170                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1171                         if (!cb) {
1172                                 atomic64_inc(
1173                                         &ctx->cs_counters.validation_drop_cnt);
1174                                 atomic64_inc(&cntr->validation_drop_cnt);
1175                                 rc = -EINVAL;
1176                                 goto free_cs_object;
1177                         }
1178                 } else {
1179                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1180                 }
1181
1182                 if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
1183                         int_queues_only = false;
1184
1185                 job = hl_cs_allocate_job(hdev, queue_type,
1186                                                 is_kernel_allocated_cb);
1187                 if (!job) {
1188                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1189                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1190                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1191                         rc = -ENOMEM;
1192                         if (is_kernel_allocated_cb)
1193                                 goto release_cb;
1194
1195                         goto free_cs_object;
1196                 }
1197
1198                 job->id = i + 1;
1199                 job->cs = cs;
1200                 job->user_cb = cb;
1201                 job->user_cb_size = chunk->cb_size;
1202                 job->hw_queue_id = chunk->queue_index;
1203
1204                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1205
1206                 list_add_tail(&job->cs_node, &cs->job_list);
1207
1208                 /*
1209                  * Increment CS reference. When CS reference is 0, CS is
1210                  * done and can be signaled to user and free all its resources
1211                  * Only increment for JOB on external or H/W queues, because
1212                  * only for those JOBs we get completion
1213                  */
1214                 if (cs_needs_completion(cs) &&
1215                         (job->queue_type == QUEUE_TYPE_EXT ||
1216                                 job->queue_type == QUEUE_TYPE_HW))
1217                         cs_get(cs);
1218
1219                 hl_debugfs_add_job(hdev, job);
1220
1221                 rc = cs_parser(hpriv, job);
1222                 if (rc) {
1223                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1224                         atomic64_inc(&cntr->parsing_drop_cnt);
1225                         dev_err(hdev->dev,
1226                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1227                                 cs->ctx->asid, cs->sequence, job->id, rc);
1228                         goto free_cs_object;
1229                 }
1230         }
1231
1232         /* We allow a CS with any queue type combination as long as it does
1233          * not get a completion
1234          */
1235         if (int_queues_only && cs_needs_completion(cs)) {
1236                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1237                 atomic64_inc(&cntr->validation_drop_cnt);
1238                 dev_err(hdev->dev,
1239                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1240                         cs->ctx->asid, cs->sequence);
1241                 rc = -EINVAL;
1242                 goto free_cs_object;
1243         }
1244
1245         rc = hl_hw_queue_schedule_cs(cs);
1246         if (rc) {
1247                 if (rc != -EAGAIN)
1248                         dev_err(hdev->dev,
1249                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1250                                 cs->ctx->asid, cs->sequence, rc);
1251                 goto free_cs_object;
1252         }
1253
1254         rc = HL_CS_STATUS_SUCCESS;
1255         goto put_cs;
1256
1257 release_cb:
1258         atomic_dec(&cb->cs_cnt);
1259         hl_cb_put(cb);
1260 free_cs_object:
1261         cs_rollback(hdev, cs);
1262         *cs_seq = ULLONG_MAX;
1263         /* The path below is both for good and erroneous exits */
1264 put_cs:
1265         /* We finished with the CS in this function, so put the ref */
1266         cs_put(cs);
1267 free_cs_chunk_array:
1268         kfree(cs_chunk_array);
1269 out:
1270         return rc;
1271 }
1272
1273 static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
1274                 struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
1275 {
1276         struct hw_queue_properties *hw_queue_prop;
1277         struct hl_cs_counters_atomic *cntr;
1278         struct hl_cs_job *job;
1279
1280         hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
1281         cntr = &hdev->aggregated_cs_counters;
1282
1283         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1284         if (!job) {
1285                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1286                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1287                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1288                 return -ENOMEM;
1289         }
1290
1291         job->id = 0;
1292         job->cs = cs;
1293         job->user_cb = cb;
1294         atomic_inc(&job->user_cb->cs_cnt);
1295         job->user_cb_size = size;
1296         job->hw_queue_id = hw_queue_id;
1297         job->patched_cb = job->user_cb;
1298         job->job_cb_size = job->user_cb_size;
1299
1300         /* increment refcount as for external queues we get completion */
1301         cs_get(cs);
1302
1303         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1304
1305         list_add_tail(&job->cs_node, &cs->job_list);
1306
1307         hl_debugfs_add_job(hdev, job);
1308
1309         return 0;
1310 }
1311
1312 static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
1313 {
1314         struct hl_device *hdev = hpriv->hdev;
1315         struct hl_ctx *ctx = hpriv->ctx;
1316         struct hl_pending_cb *pending_cb, *tmp;
1317         struct list_head local_cb_list;
1318         struct hl_cs *cs;
1319         struct hl_cb *cb;
1320         u32 hw_queue_id;
1321         u32 cb_size;
1322         int process_list, rc = 0;
1323
1324         if (list_empty(&ctx->pending_cb_list))
1325                 return 0;
1326
1327         process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
1328
1329         /* Only a single thread is allowed to process the list */
1330         if (!process_list)
1331                 return 0;
1332
1333         if (list_empty(&ctx->pending_cb_list))
1334                 goto free_pending_cb_token;
1335
1336         /* move all list elements to a local list */
1337         INIT_LIST_HEAD(&local_cb_list);
1338         spin_lock(&ctx->pending_cb_lock);
1339         list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
1340                                                                 cb_node)
1341                 list_move_tail(&pending_cb->cb_node, &local_cb_list);
1342         spin_unlock(&ctx->pending_cb_lock);
1343
1344         rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
1345                                 hdev->timeout_jiffies);
1346         if (rc)
1347                 goto add_list_elements;
1348
1349         hl_debugfs_add_cs(cs);
1350
1351         /* Iterate through pending cb list, create jobs and add to CS */
1352         list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
1353                 cb = pending_cb->cb;
1354                 cb_size = pending_cb->cb_size;
1355                 hw_queue_id = pending_cb->hw_queue_id;
1356
1357                 rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
1358                                                                 hw_queue_id);
1359                 if (rc)
1360                         goto free_cs_object;
1361         }
1362
1363         rc = hl_hw_queue_schedule_cs(cs);
1364         if (rc) {
1365                 if (rc != -EAGAIN)
1366                         dev_err(hdev->dev,
1367                                 "Failed to submit CS %d.%llu (%d)\n",
1368                                 ctx->asid, cs->sequence, rc);
1369                 goto free_cs_object;
1370         }
1371
1372         /* pending cb was scheduled successfully */
1373         list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
1374                 list_del(&pending_cb->cb_node);
1375                 kfree(pending_cb);
1376         }
1377
1378         cs_put(cs);
1379
1380         goto free_pending_cb_token;
1381
1382 free_cs_object:
1383         cs_rollback(hdev, cs);
1384         cs_put(cs);
1385 add_list_elements:
1386         spin_lock(&ctx->pending_cb_lock);
1387         list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
1388                                                                 cb_node)
1389                 list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
1390         spin_unlock(&ctx->pending_cb_lock);
1391 free_pending_cb_token:
1392         atomic_set(&ctx->thread_pending_cb_token, 1);
1393
1394         return rc;
1395 }
1396
1397 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1398                                 u64 *cs_seq)
1399 {
1400         struct hl_device *hdev = hpriv->hdev;
1401         struct hl_ctx *ctx = hpriv->ctx;
1402         bool need_soft_reset = false;
1403         int rc = 0, do_ctx_switch;
1404         void __user *chunks;
1405         u32 num_chunks, tmp;
1406         int ret;
1407
1408         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1409
1410         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1411                 mutex_lock(&hpriv->restore_phase_mutex);
1412
1413                 if (do_ctx_switch) {
1414                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1415                         if (rc) {
1416                                 dev_err_ratelimited(hdev->dev,
1417                                         "Failed to switch to context %d, rejecting CS! %d\n",
1418                                         ctx->asid, rc);
1419                                 /*
1420                                  * If we timedout, or if the device is not IDLE
1421                                  * while we want to do context-switch (-EBUSY),
1422                                  * we need to soft-reset because QMAN is
1423                                  * probably stuck. However, we can't call to
1424                                  * reset here directly because of deadlock, so
1425                                  * need to do it at the very end of this
1426                                  * function
1427                                  */
1428                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1429                                         need_soft_reset = true;
1430                                 mutex_unlock(&hpriv->restore_phase_mutex);
1431                                 goto out;
1432                         }
1433                 }
1434
1435                 hdev->asic_funcs->restore_phase_topology(hdev);
1436
1437                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1438                 num_chunks = args->in.num_chunks_restore;
1439
1440                 if (!num_chunks) {
1441                         dev_dbg(hdev->dev,
1442                                 "Need to run restore phase but restore CS is empty\n");
1443                         rc = 0;
1444                 } else {
1445                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1446                                         cs_seq, 0, hdev->timeout_jiffies);
1447                 }
1448
1449                 mutex_unlock(&hpriv->restore_phase_mutex);
1450
1451                 if (rc) {
1452                         dev_err(hdev->dev,
1453                                 "Failed to submit restore CS for context %d (%d)\n",
1454                                 ctx->asid, rc);
1455                         goto out;
1456                 }
1457
1458                 /* Need to wait for restore completion before execution phase */
1459                 if (num_chunks) {
1460                         enum hl_cs_wait_status status;
1461 wait_again:
1462                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1463                                         jiffies_to_usecs(hdev->timeout_jiffies),
1464                                         *cs_seq, &status, NULL);
1465                         if (ret) {
1466                                 if (ret == -ERESTARTSYS) {
1467                                         usleep_range(100, 200);
1468                                         goto wait_again;
1469                                 }
1470
1471                                 dev_err(hdev->dev,
1472                                         "Restore CS for context %d failed to complete %d\n",
1473                                         ctx->asid, ret);
1474                                 rc = -ENOEXEC;
1475                                 goto out;
1476                         }
1477                 }
1478
1479                 ctx->thread_ctx_switch_wait_token = 1;
1480
1481         } else if (!ctx->thread_ctx_switch_wait_token) {
1482                 rc = hl_poll_timeout_memory(hdev,
1483                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1484                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1485
1486                 if (rc == -ETIMEDOUT) {
1487                         dev_err(hdev->dev,
1488                                 "context switch phase timeout (%d)\n", tmp);
1489                         goto out;
1490                 }
1491         }
1492
1493 out:
1494         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1495                 hl_device_reset(hdev, 0);
1496
1497         return rc;
1498 }
1499
1500 /*
1501  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1502  * if the SOB value reaches the max value move to the other SOB reserved
1503  * to the queue.
1504  * Note that this function must be called while hw_queues_lock is taken.
1505  */
1506 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1507                         struct hl_hw_sob **hw_sob, u32 count)
1508 {
1509         struct hl_sync_stream_properties *prop;
1510         struct hl_hw_sob *sob = *hw_sob, *other_sob;
1511         u8 other_sob_offset;
1512
1513         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1514
1515         kref_get(&sob->kref);
1516
1517         /* check for wraparound */
1518         if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1519                 /*
1520                  * Decrement as we reached the max value.
1521                  * The release function won't be called here as we've
1522                  * just incremented the refcount right before calling this
1523                  * function.
1524                  */
1525                 kref_put(&sob->kref, hl_sob_reset_error);
1526
1527                 /*
1528                  * check the other sob value, if it still in use then fail
1529                  * otherwise make the switch
1530                  */
1531                 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1532                 other_sob = &prop->hw_sob[other_sob_offset];
1533
1534                 if (kref_read(&other_sob->kref) != 1) {
1535                         dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1536                                                                 q_idx);
1537                         return -EINVAL;
1538                 }
1539
1540                 prop->next_sob_val = 1;
1541
1542                 /* only two SOBs are currently in use */
1543                 prop->curr_sob_offset = other_sob_offset;
1544                 *hw_sob = other_sob;
1545
1546                 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1547                                 prop->curr_sob_offset, q_idx);
1548         } else {
1549                 prop->next_sob_val += count;
1550         }
1551
1552         return 0;
1553 }
1554
1555 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1556                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
1557 {
1558         u64 *signal_seq_arr = NULL;
1559         u32 size_to_copy, signal_seq_arr_len;
1560         int rc = 0;
1561
1562         signal_seq_arr_len = chunk->num_signal_seq_arr;
1563
1564         /* currently only one signal seq is supported */
1565         if (signal_seq_arr_len != 1) {
1566                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1567                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1568                 dev_err(hdev->dev,
1569                         "Wait for signal CS supports only one signal CS seq\n");
1570                 return -EINVAL;
1571         }
1572
1573         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1574                                         sizeof(*signal_seq_arr),
1575                                         GFP_ATOMIC);
1576         if (!signal_seq_arr)
1577                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1578                                         sizeof(*signal_seq_arr),
1579                                         GFP_KERNEL);
1580         if (!signal_seq_arr) {
1581                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1582                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1583                 return -ENOMEM;
1584         }
1585
1586         size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
1587         if (copy_from_user(signal_seq_arr,
1588                                 u64_to_user_ptr(chunk->signal_seq_arr),
1589                                 size_to_copy)) {
1590                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1591                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1592                 dev_err(hdev->dev,
1593                         "Failed to copy signal seq array from user\n");
1594                 rc = -EFAULT;
1595                 goto out;
1596         }
1597
1598         /* currently it is guaranteed to have only one signal seq */
1599         *signal_seq = signal_seq_arr[0];
1600
1601 out:
1602         kfree(signal_seq_arr);
1603
1604         return rc;
1605 }
1606
1607 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1608                 struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
1609                 u32 q_idx)
1610 {
1611         struct hl_cs_counters_atomic *cntr;
1612         struct hl_cs_job *job;
1613         struct hl_cb *cb;
1614         u32 cb_size;
1615
1616         cntr = &hdev->aggregated_cs_counters;
1617
1618         job = hl_cs_allocate_job(hdev, q_type, true);
1619         if (!job) {
1620                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1621                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1622                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1623                 return -ENOMEM;
1624         }
1625
1626         if (cs->type == CS_TYPE_WAIT)
1627                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1628         else
1629                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1630
1631         cb = hl_cb_kernel_create(hdev, cb_size,
1632                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1633         if (!cb) {
1634                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1635                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1636                 kfree(job);
1637                 return -EFAULT;
1638         }
1639
1640         job->id = 0;
1641         job->cs = cs;
1642         job->user_cb = cb;
1643         atomic_inc(&job->user_cb->cs_cnt);
1644         job->user_cb_size = cb_size;
1645         job->hw_queue_id = q_idx;
1646
1647         /*
1648          * No need in parsing, user CB is the patched CB.
1649          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1650          * the CB idr anymore and to decrement its refcount as it was
1651          * incremented inside hl_cb_kernel_create().
1652          */
1653         job->patched_cb = job->user_cb;
1654         job->job_cb_size = job->user_cb_size;
1655         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1656
1657         /* increment refcount as for external queues we get completion */
1658         cs_get(cs);
1659
1660         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1661
1662         list_add_tail(&job->cs_node, &cs->job_list);
1663
1664         hl_debugfs_add_job(hdev, job);
1665
1666         return 0;
1667 }
1668
1669 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1670                                 void __user *chunks, u32 num_chunks,
1671                                 u64 *cs_seq, u32 flags, u32 timeout)
1672 {
1673         struct hl_cs_chunk *cs_chunk_array, *chunk;
1674         struct hw_queue_properties *hw_queue_prop;
1675         struct hl_device *hdev = hpriv->hdev;
1676         struct hl_cs_compl *sig_waitcs_cmpl;
1677         u32 q_idx, collective_engine_id = 0;
1678         struct hl_cs_counters_atomic *cntr;
1679         struct hl_fence *sig_fence = NULL;
1680         struct hl_ctx *ctx = hpriv->ctx;
1681         enum hl_queue_type q_type;
1682         struct hl_cs *cs;
1683         u64 signal_seq;
1684         int rc;
1685
1686         cntr = &hdev->aggregated_cs_counters;
1687         *cs_seq = ULLONG_MAX;
1688
1689         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1690                         ctx);
1691         if (rc)
1692                 goto out;
1693
1694         /* currently it is guaranteed to have only one chunk */
1695         chunk = &cs_chunk_array[0];
1696
1697         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1698                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1699                 atomic64_inc(&cntr->validation_drop_cnt);
1700                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1701                         chunk->queue_index);
1702                 rc = -EINVAL;
1703                 goto free_cs_chunk_array;
1704         }
1705
1706         q_idx = chunk->queue_index;
1707         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1708         q_type = hw_queue_prop->type;
1709
1710         if (!hw_queue_prop->supports_sync_stream) {
1711                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1712                 atomic64_inc(&cntr->validation_drop_cnt);
1713                 dev_err(hdev->dev,
1714                         "Queue index %d does not support sync stream operations\n",
1715                         q_idx);
1716                 rc = -EINVAL;
1717                 goto free_cs_chunk_array;
1718         }
1719
1720         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1721                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1722                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1723                         atomic64_inc(&cntr->validation_drop_cnt);
1724                         dev_err(hdev->dev,
1725                                 "Queue index %d is invalid\n", q_idx);
1726                         rc = -EINVAL;
1727                         goto free_cs_chunk_array;
1728                 }
1729
1730                 collective_engine_id = chunk->collective_engine_id;
1731         }
1732
1733         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1734                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
1735                 if (rc)
1736                         goto free_cs_chunk_array;
1737
1738                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
1739                 if (IS_ERR(sig_fence)) {
1740                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1741                         atomic64_inc(&cntr->validation_drop_cnt);
1742                         dev_err(hdev->dev,
1743                                 "Failed to get signal CS with seq 0x%llx\n",
1744                                 signal_seq);
1745                         rc = PTR_ERR(sig_fence);
1746                         goto free_cs_chunk_array;
1747                 }
1748
1749                 if (!sig_fence) {
1750                         /* signal CS already finished */
1751                         rc = 0;
1752                         goto free_cs_chunk_array;
1753                 }
1754
1755                 sig_waitcs_cmpl =
1756                         container_of(sig_fence, struct hl_cs_compl, base_fence);
1757
1758                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
1759                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1760                         atomic64_inc(&cntr->validation_drop_cnt);
1761                         dev_err(hdev->dev,
1762                                 "CS seq 0x%llx is not of a signal CS\n",
1763                                 signal_seq);
1764                         hl_fence_put(sig_fence);
1765                         rc = -EINVAL;
1766                         goto free_cs_chunk_array;
1767                 }
1768
1769                 if (completion_done(&sig_fence->completion)) {
1770                         /* signal CS already finished */
1771                         hl_fence_put(sig_fence);
1772                         rc = 0;
1773                         goto free_cs_chunk_array;
1774                 }
1775         }
1776
1777         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
1778         if (rc) {
1779                 if (cs_type == CS_TYPE_WAIT ||
1780                         cs_type == CS_TYPE_COLLECTIVE_WAIT)
1781                         hl_fence_put(sig_fence);
1782                 goto free_cs_chunk_array;
1783         }
1784
1785         /*
1786          * Save the signal CS fence for later initialization right before
1787          * hanging the wait CS on the queue.
1788          */
1789         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
1790                 cs->signal_fence = sig_fence;
1791
1792         hl_debugfs_add_cs(cs);
1793
1794         *cs_seq = cs->sequence;
1795
1796         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
1797                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
1798                                 q_idx);
1799         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
1800                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
1801                                 cs, q_idx, collective_engine_id);
1802         else {
1803                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1804                 atomic64_inc(&cntr->validation_drop_cnt);
1805                 rc = -EINVAL;
1806         }
1807
1808         if (rc)
1809                 goto free_cs_object;
1810
1811         rc = hl_hw_queue_schedule_cs(cs);
1812         if (rc) {
1813                 if (rc != -EAGAIN)
1814                         dev_err(hdev->dev,
1815                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1816                                 ctx->asid, cs->sequence, rc);
1817                 goto free_cs_object;
1818         }
1819
1820         rc = HL_CS_STATUS_SUCCESS;
1821         goto put_cs;
1822
1823 free_cs_object:
1824         cs_rollback(hdev, cs);
1825         *cs_seq = ULLONG_MAX;
1826         /* The path below is both for good and erroneous exits */
1827 put_cs:
1828         /* We finished with the CS in this function, so put the ref */
1829         cs_put(cs);
1830 free_cs_chunk_array:
1831         kfree(cs_chunk_array);
1832 out:
1833         return rc;
1834 }
1835
1836 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
1837 {
1838         union hl_cs_args *args = data;
1839         enum hl_cs_type cs_type;
1840         u64 cs_seq = ULONG_MAX;
1841         void __user *chunks;
1842         u32 num_chunks, flags, timeout;
1843         int rc;
1844
1845         rc = hl_cs_sanity_checks(hpriv, args);
1846         if (rc)
1847                 goto out;
1848
1849         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
1850         if (rc)
1851                 goto out;
1852
1853         rc = hl_submit_pending_cb(hpriv);
1854         if (rc)
1855                 goto out;
1856
1857         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
1858                                         ~HL_CS_FLAGS_FORCE_RESTORE);
1859         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
1860         num_chunks = args->in.num_chunks_execute;
1861         flags = args->in.cs_flags;
1862
1863         /* In case this is a staged CS, user should supply the CS sequence */
1864         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1865                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1866                 cs_seq = args->in.seq;
1867
1868         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
1869                         ? msecs_to_jiffies(args->in.timeout * 1000)
1870                         : hpriv->hdev->timeout_jiffies;
1871
1872         switch (cs_type) {
1873         case CS_TYPE_SIGNAL:
1874         case CS_TYPE_WAIT:
1875         case CS_TYPE_COLLECTIVE_WAIT:
1876                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
1877                                         &cs_seq, args->in.cs_flags, timeout);
1878                 break;
1879         default:
1880                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
1881                                                 args->in.cs_flags, timeout);
1882                 break;
1883         }
1884
1885 out:
1886         if (rc != -EAGAIN) {
1887                 memset(args, 0, sizeof(*args));
1888                 args->out.status = rc;
1889                 args->out.seq = cs_seq;
1890         }
1891
1892         return rc;
1893 }
1894
1895 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1896                                 u64 timeout_us, u64 seq,
1897                                 enum hl_cs_wait_status *status, s64 *timestamp)
1898 {
1899         struct hl_fence *fence;
1900         unsigned long timeout;
1901         int rc = 0;
1902         long completion_rc;
1903
1904         if (timestamp)
1905                 *timestamp = 0;
1906
1907         if (timeout_us == MAX_SCHEDULE_TIMEOUT)
1908                 timeout = timeout_us;
1909         else
1910                 timeout = usecs_to_jiffies(timeout_us);
1911
1912         hl_ctx_get(hdev, ctx);
1913
1914         fence = hl_ctx_get_fence(ctx, seq);
1915         if (IS_ERR(fence)) {
1916                 rc = PTR_ERR(fence);
1917                 if (rc == -EINVAL)
1918                         dev_notice_ratelimited(hdev->dev,
1919                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
1920                                 seq, ctx->cs_sequence);
1921         } else if (fence) {
1922                 if (!timeout_us)
1923                         completion_rc = completion_done(&fence->completion);
1924                 else
1925                         completion_rc =
1926                                 wait_for_completion_interruptible_timeout(
1927                                         &fence->completion, timeout);
1928
1929                 if (completion_rc > 0) {
1930                         *status = CS_WAIT_STATUS_COMPLETED;
1931                         if (timestamp)
1932                                 *timestamp = ktime_to_ns(fence->timestamp);
1933                 } else {
1934                         *status = CS_WAIT_STATUS_BUSY;
1935                 }
1936
1937                 if (fence->error == -ETIMEDOUT)
1938                         rc = -ETIMEDOUT;
1939                 else if (fence->error == -EIO)
1940                         rc = -EIO;
1941
1942                 hl_fence_put(fence);
1943         } else {
1944                 dev_dbg(hdev->dev,
1945                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
1946                         seq, ctx->cs_sequence);
1947                 *status = CS_WAIT_STATUS_GONE;
1948         }
1949
1950         hl_ctx_put(ctx);
1951
1952         return rc;
1953 }
1954
1955 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
1956 {
1957         struct hl_device *hdev = hpriv->hdev;
1958         union hl_wait_cs_args *args = data;
1959         enum hl_cs_wait_status status;
1960         u64 seq = args->in.seq;
1961         s64 timestamp;
1962         int rc;
1963
1964         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
1965                                 &status, &timestamp);
1966
1967         memset(args, 0, sizeof(*args));
1968
1969         if (rc) {
1970                 if (rc == -ERESTARTSYS) {
1971                         dev_err_ratelimited(hdev->dev,
1972                                 "user process got signal while waiting for CS handle %llu\n",
1973                                 seq);
1974                         args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
1975                         rc = -EINTR;
1976                 } else if (rc == -ETIMEDOUT) {
1977                         dev_err_ratelimited(hdev->dev,
1978                                 "CS %llu has timed-out while user process is waiting for it\n",
1979                                 seq);
1980                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
1981                 } else if (rc == -EIO) {
1982                         dev_err_ratelimited(hdev->dev,
1983                                 "CS %llu has been aborted while user process is waiting for it\n",
1984                                 seq);
1985                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
1986                 }
1987                 return rc;
1988         }
1989
1990         if (timestamp) {
1991                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
1992                 args->out.timestamp_nsec = timestamp;
1993         }
1994
1995         switch (status) {
1996         case CS_WAIT_STATUS_GONE:
1997                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
1998                 fallthrough;
1999         case CS_WAIT_STATUS_COMPLETED:
2000                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2001                 break;
2002         case CS_WAIT_STATUS_BUSY:
2003         default:
2004                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2005                 break;
2006         }
2007
2008         return 0;
2009 }
2010
2011 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2012                                 u32 timeout_us, u64 user_address,
2013                                 u32 target_value, u16 interrupt_offset,
2014                                 enum hl_cs_wait_status *status)
2015 {
2016         struct hl_user_pending_interrupt *pend;
2017         struct hl_user_interrupt *interrupt;
2018         unsigned long timeout;
2019         long completion_rc;
2020         u32 completion_value;
2021         int rc = 0;
2022
2023         if (timeout_us == U32_MAX)
2024                 timeout = timeout_us;
2025         else
2026                 timeout = usecs_to_jiffies(timeout_us);
2027
2028         hl_ctx_get(hdev, ctx);
2029
2030         pend = kmalloc(sizeof(*pend), GFP_KERNEL);
2031         if (!pend) {
2032                 hl_ctx_put(ctx);
2033                 return -ENOMEM;
2034         }
2035
2036         hl_fence_init(&pend->fence, ULONG_MAX);
2037
2038         if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
2039                 interrupt = &hdev->common_user_interrupt;
2040         else
2041                 interrupt = &hdev->user_interrupt[interrupt_offset];
2042
2043         spin_lock(&interrupt->wait_list_lock);
2044         if (!hl_device_operational(hdev, NULL)) {
2045                 rc = -EPERM;
2046                 goto unlock_and_free_fence;
2047         }
2048
2049         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
2050                 dev_err(hdev->dev,
2051                         "Failed to copy completion value from user\n");
2052                 rc = -EFAULT;
2053                 goto unlock_and_free_fence;
2054         }
2055
2056         if (completion_value >= target_value)
2057                 *status = CS_WAIT_STATUS_COMPLETED;
2058         else
2059                 *status = CS_WAIT_STATUS_BUSY;
2060
2061         if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
2062                 goto unlock_and_free_fence;
2063
2064         /* Add pending user interrupt to relevant list for the interrupt
2065          * handler to monitor
2066          */
2067         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2068         spin_unlock(&interrupt->wait_list_lock);
2069
2070 wait_again:
2071         /* Wait for interrupt handler to signal completion */
2072         completion_rc =
2073                 wait_for_completion_interruptible_timeout(
2074                                 &pend->fence.completion, timeout);
2075
2076         /* If timeout did not expire we need to perform the comparison.
2077          * If comparison fails, keep waiting until timeout expires
2078          */
2079         if (completion_rc > 0) {
2080                 if (copy_from_user(&completion_value,
2081                                 u64_to_user_ptr(user_address), 4)) {
2082                         dev_err(hdev->dev,
2083                                 "Failed to copy completion value from user\n");
2084                         rc = -EFAULT;
2085                         goto remove_pending_user_interrupt;
2086                 }
2087
2088                 if (completion_value >= target_value) {
2089                         *status = CS_WAIT_STATUS_COMPLETED;
2090                 } else {
2091                         timeout = completion_rc;
2092                         goto wait_again;
2093                 }
2094         } else {
2095                 *status = CS_WAIT_STATUS_BUSY;
2096         }
2097
2098 remove_pending_user_interrupt:
2099         spin_lock(&interrupt->wait_list_lock);
2100         list_del(&pend->wait_list_node);
2101
2102 unlock_and_free_fence:
2103         spin_unlock(&interrupt->wait_list_lock);
2104         kfree(pend);
2105         hl_ctx_put(ctx);
2106
2107         return rc;
2108 }
2109
2110 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2111 {
2112         u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
2113         struct hl_device *hdev = hpriv->hdev;
2114         struct asic_fixed_properties *prop;
2115         union hl_wait_cs_args *args = data;
2116         enum hl_cs_wait_status status;
2117         int rc;
2118
2119         prop = &hdev->asic_prop;
2120
2121         if (!prop->user_interrupt_count) {
2122                 dev_err(hdev->dev, "no user interrupts allowed");
2123                 return -EPERM;
2124         }
2125
2126         interrupt_id =
2127                 FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
2128
2129         first_interrupt = prop->first_available_user_msix_interrupt;
2130         last_interrupt = prop->first_available_user_msix_interrupt +
2131                                                 prop->user_interrupt_count - 1;
2132
2133         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
2134                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
2135                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
2136                 return -EINVAL;
2137         }
2138
2139         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
2140                 interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
2141         else
2142                 interrupt_offset = interrupt_id - first_interrupt;
2143
2144         rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
2145                                 args->in.interrupt_timeout_us, args->in.addr,
2146                                 args->in.target, interrupt_offset, &status);
2147
2148         memset(args, 0, sizeof(*args));
2149
2150         if (rc) {
2151                 dev_err_ratelimited(hdev->dev,
2152                         "interrupt_wait_ioctl failed (%d)\n", rc);
2153
2154                 return rc;
2155         }
2156
2157         switch (status) {
2158         case CS_WAIT_STATUS_COMPLETED:
2159                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2160                 break;
2161         case CS_WAIT_STATUS_BUSY:
2162         default:
2163                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2164                 break;
2165         }
2166
2167         return 0;
2168 }
2169
2170 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2171 {
2172         union hl_wait_cs_args *args = data;
2173         u32 flags = args->in.flags;
2174         int rc;
2175
2176         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
2177                 rc = hl_interrupt_wait_ioctl(hpriv, data);
2178         else
2179                 rc = hl_cs_wait_ioctl(hpriv, data);
2180
2181         return rc;
2182 }