1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15 HL_CS_FLAGS_COLLECTIVE_WAIT)
18 * enum hl_cs_wait_status - cs wait status
19 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
20 * @CS_WAIT_STATUS_COMPLETED: cs completed
21 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
23 enum hl_cs_wait_status {
25 CS_WAIT_STATUS_COMPLETED,
29 static void job_wq_completion(struct work_struct *work);
30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
31 u64 timeout_us, u64 seq,
32 enum hl_cs_wait_status *status, s64 *timestamp);
33 static void cs_do_release(struct kref *ref);
35 static void hl_sob_reset(struct kref *ref)
37 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
39 struct hl_device *hdev = hw_sob->hdev;
41 hdev->asic_funcs->reset_sob(hdev, hw_sob);
44 void hl_sob_reset_error(struct kref *ref)
46 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
48 struct hl_device *hdev = hw_sob->hdev;
51 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
52 hw_sob->q_idx, hw_sob->sob_id);
56 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
57 * @sob_base: sob base id
58 * @sob_mask: sob user mask, each bit represents a sob offset from sob base
59 * @mask: generated mask
61 * Return: 0 if given parameters are valid
63 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
70 if (sob_mask == 0x1) {
71 *mask = ~(1 << (sob_base & 0x7));
73 /* find msb in order to verify sob range is valid */
74 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
75 if (BIT(i) & sob_mask)
78 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
87 static void hl_fence_release(struct kref *kref)
89 struct hl_fence *fence =
90 container_of(kref, struct hl_fence, refcount);
91 struct hl_cs_compl *hl_cs_cmpl =
92 container_of(fence, struct hl_cs_compl, base_fence);
93 struct hl_device *hdev = hl_cs_cmpl->hdev;
95 /* EBUSY means the CS was never submitted and hence we don't have
96 * an attached hw_sob object that we should handle here
98 if (fence->error == -EBUSY)
101 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
102 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
103 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
106 "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
109 hl_cs_cmpl->hw_sob->sob_id,
110 hl_cs_cmpl->sob_val);
113 * A signal CS can get completion while the corresponding wait
114 * for signal CS is on its way to the PQ. The wait for signal CS
115 * will get stuck if the signal CS incremented the SOB to its
116 * max value and there are no pending (submitted) waits on this
118 * We do the following to void this situation:
119 * 1. The wait for signal CS must get a ref for the signal CS as
120 * soon as possible in cs_ioctl_signal_wait() and put it
121 * before being submitted to the PQ but after it incremented
122 * the SOB refcnt in init_signal_wait_cs().
123 * 2. Signal/Wait for signal CS will decrement the SOB refcnt
125 * These two measures guarantee that the wait for signal CS will
126 * reset the SOB upon completion rather than the signal CS and
127 * hence the above scenario is avoided.
129 kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
131 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
132 hdev->asic_funcs->reset_sob_group(hdev,
133 hl_cs_cmpl->sob_group);
140 void hl_fence_put(struct hl_fence *fence)
143 kref_put(&fence->refcount, hl_fence_release);
146 void hl_fence_get(struct hl_fence *fence)
149 kref_get(&fence->refcount);
152 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
154 kref_init(&fence->refcount);
155 fence->cs_sequence = sequence;
157 fence->timestamp = ktime_set(0, 0);
158 init_completion(&fence->completion);
161 void cs_get(struct hl_cs *cs)
163 kref_get(&cs->refcount);
166 static int cs_get_unless_zero(struct hl_cs *cs)
168 return kref_get_unless_zero(&cs->refcount);
171 static void cs_put(struct hl_cs *cs)
173 kref_put(&cs->refcount, cs_do_release);
176 static void cs_job_do_release(struct kref *ref)
178 struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
183 static void cs_job_put(struct hl_cs_job *job)
185 kref_put(&job->refcount, cs_job_do_release);
188 bool cs_needs_completion(struct hl_cs *cs)
190 /* In case this is a staged CS, only the last CS in sequence should
191 * get a completion, any non staged CS will always get a completion
193 if (cs->staged_cs && !cs->staged_last)
199 bool cs_needs_timeout(struct hl_cs *cs)
201 /* In case this is a staged CS, only the first CS in sequence should
202 * get a timeout, any non staged CS will always get a timeout
204 if (cs->staged_cs && !cs->staged_first)
210 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
213 * Patched CB is created for external queues jobs, and for H/W queues
214 * jobs if the user CB was allocated by driver and MMU is disabled.
216 return (job->queue_type == QUEUE_TYPE_EXT ||
217 (job->queue_type == QUEUE_TYPE_HW &&
218 job->is_kernel_allocated_cb &&
223 * cs_parser - parse the user command submission
225 * @hpriv : pointer to the private data of the fd
226 * @job : pointer to the job that holds the command submission info
228 * The function parses the command submission of the user. It calls the
229 * ASIC specific parser, which returns a list of memory blocks to send
230 * to the device as different command buffers
233 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
235 struct hl_device *hdev = hpriv->hdev;
236 struct hl_cs_parser parser;
239 parser.ctx_id = job->cs->ctx->asid;
240 parser.cs_sequence = job->cs->sequence;
241 parser.job_id = job->id;
243 parser.hw_queue_id = job->hw_queue_id;
244 parser.job_userptr_list = &job->userptr_list;
245 parser.patched_cb = NULL;
246 parser.user_cb = job->user_cb;
247 parser.user_cb_size = job->user_cb_size;
248 parser.queue_type = job->queue_type;
249 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
250 job->patched_cb = NULL;
251 parser.completion = cs_needs_completion(job->cs);
253 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
255 if (is_cb_patched(hdev, job)) {
257 job->patched_cb = parser.patched_cb;
258 job->job_cb_size = parser.patched_cb_size;
259 job->contains_dma_pkt = parser.contains_dma_pkt;
260 atomic_inc(&job->patched_cb->cs_cnt);
264 * Whether the parsing worked or not, we don't need the
265 * original CB anymore because it was already parsed and
266 * won't be accessed again for this CS
268 atomic_dec(&job->user_cb->cs_cnt);
269 hl_cb_put(job->user_cb);
272 job->job_cb_size = job->user_cb_size;
278 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
280 struct hl_cs *cs = job->cs;
282 if (is_cb_patched(hdev, job)) {
283 hl_userptr_delete_list(hdev, &job->userptr_list);
286 * We might arrive here from rollback and patched CB wasn't
287 * created, so we need to check it's not NULL
289 if (job->patched_cb) {
290 atomic_dec(&job->patched_cb->cs_cnt);
291 hl_cb_put(job->patched_cb);
295 /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
296 * enabled, the user CB isn't released in cs_parser() and thus should be
298 * This is also true for INT queues jobs which were allocated by driver
300 if (job->is_kernel_allocated_cb &&
301 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
302 job->queue_type == QUEUE_TYPE_INT)) {
303 atomic_dec(&job->user_cb->cs_cnt);
304 hl_cb_put(job->user_cb);
308 * This is the only place where there can be multiple threads
309 * modifying the list at the same time
311 spin_lock(&cs->job_lock);
312 list_del(&job->cs_node);
313 spin_unlock(&cs->job_lock);
315 hl_debugfs_remove_job(hdev, job);
317 /* We decrement reference only for a CS that gets completion
318 * because the reference was incremented only for this kind of CS
319 * right before it was scheduled.
321 * In staged submission, only the last CS marked as 'staged_last'
322 * gets completion, hence its release function will be called from here.
323 * As for all the rest CS's in the staged submission which do not get
324 * completion, their CS reference will be decremented by the
325 * 'staged_last' CS during the CS release flow.
326 * All relevant PQ CI counters will be incremented during the CS release
327 * flow by calling 'hl_hw_queue_update_ci'.
329 if (cs_needs_completion(cs) &&
330 (job->queue_type == QUEUE_TYPE_EXT ||
331 job->queue_type == QUEUE_TYPE_HW))
338 * hl_staged_cs_find_first - locate the first CS in this staged submission
340 * @hdev: pointer to device structure
341 * @cs_seq: staged submission sequence number
343 * @note: This function must be called under 'hdev->cs_mirror_lock'
345 * Find and return a CS pointer with the given sequence
347 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
351 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
352 if (cs->staged_cs && cs->staged_first &&
353 cs->sequence == cs_seq)
360 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
362 * @hdev: pointer to device structure
363 * @cs: staged submission member
366 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
368 struct hl_cs *last_entry;
370 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
373 if (last_entry->staged_last)
380 * staged_cs_get - get CS reference if this CS is a part of a staged CS
382 * @hdev: pointer to device structure
384 * @cs_seq: staged submission sequence number
386 * Increment CS reference for every CS in this staged submission except for
387 * the CS which get completion.
389 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
391 /* Only the last CS in this staged submission will get a completion.
392 * We must increment the reference for all other CS's in this
394 * Once we get a completion we will release the whole staged submission.
396 if (!cs->staged_last)
401 * staged_cs_put - put a CS in case it is part of staged submission
403 * @hdev: pointer to device structure
406 * This function decrements a CS reference (for a non completion CS)
408 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
410 /* We release all CS's in a staged submission except the last
411 * CS which we have never incremented its reference.
413 if (!cs_needs_completion(cs))
417 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
419 bool next_entry_found = false;
422 if (!cs_needs_timeout(cs))
425 spin_lock(&hdev->cs_mirror_lock);
427 /* We need to handle tdr only once for the complete staged submission.
428 * Hence, we choose the CS that reaches this function first which is
429 * the CS marked as 'staged_last'.
431 if (cs->staged_cs && cs->staged_last)
432 cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
434 spin_unlock(&hdev->cs_mirror_lock);
436 /* Don't cancel TDR in case this CS was timedout because we might be
437 * running from the TDR context
439 if (cs && (cs->timedout ||
440 hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
443 if (cs && cs->tdr_active)
444 cancel_delayed_work_sync(&cs->work_tdr);
446 spin_lock(&hdev->cs_mirror_lock);
448 /* queue TDR for next CS */
449 list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
450 if (cs_needs_timeout(next)) {
451 next_entry_found = true;
455 if (next_entry_found && !next->tdr_active) {
456 next->tdr_active = true;
457 schedule_delayed_work(&next->work_tdr,
458 hdev->timeout_jiffies);
461 spin_unlock(&hdev->cs_mirror_lock);
464 static void cs_do_release(struct kref *ref)
466 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
467 struct hl_device *hdev = cs->ctx->hdev;
468 struct hl_cs_job *job, *tmp;
470 cs->completed = true;
473 * Although if we reached here it means that all external jobs have
474 * finished, because each one of them took refcnt to CS, we still
475 * need to go over the internal jobs and complete them. Otherwise, we
476 * will have leaked memory and what's worse, the CS object (and
477 * potentially the CTX object) could be released, while the JOB
478 * still holds a pointer to them (but no reference).
480 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
481 complete_job(hdev, job);
483 if (!cs->submitted) {
484 /* In case the wait for signal CS was submitted, the put occurs
485 * in init_signal_wait_cs() or collective_wait_init_cs()
486 * right before hanging on the PQ.
488 if (cs->type == CS_TYPE_WAIT ||
489 cs->type == CS_TYPE_COLLECTIVE_WAIT)
490 hl_fence_put(cs->signal_fence);
495 hdev->asic_funcs->hw_queues_lock(hdev);
497 hdev->cs_active_cnt--;
498 if (!hdev->cs_active_cnt) {
499 struct hl_device_idle_busy_ts *ts;
501 ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
502 ts->busy_to_idle_ts = ktime_get();
504 if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
505 hdev->idle_busy_ts_idx = 0;
506 } else if (hdev->cs_active_cnt < 0) {
507 dev_crit(hdev->dev, "CS active cnt %d is negative\n",
508 hdev->cs_active_cnt);
511 hdev->asic_funcs->hw_queues_unlock(hdev);
513 /* Need to update CI for all queue jobs that does not get completion */
514 hl_hw_queue_update_ci(cs);
516 /* remove CS from CS mirror list */
517 spin_lock(&hdev->cs_mirror_lock);
518 list_del_init(&cs->mirror_node);
519 spin_unlock(&hdev->cs_mirror_lock);
521 cs_handle_tdr(hdev, cs);
524 /* the completion CS decrements reference for the entire
527 if (cs->staged_last) {
528 struct hl_cs *staged_cs, *tmp;
530 list_for_each_entry_safe(staged_cs, tmp,
531 &cs->staged_cs_node, staged_cs_node)
532 staged_cs_put(hdev, staged_cs);
535 /* A staged CS will be a member in the list only after it
536 * was submitted. We used 'cs_mirror_lock' when inserting
537 * it to list so we will use it again when removing it
540 spin_lock(&hdev->cs_mirror_lock);
541 list_del(&cs->staged_cs_node);
542 spin_unlock(&hdev->cs_mirror_lock);
547 /* Must be called before hl_ctx_put because inside we use ctx to get
550 hl_debugfs_remove_cs(cs);
554 /* We need to mark an error for not submitted because in that case
555 * the hl fence release flow is different. Mainly, we don't need
556 * to handle hw_sob for signal/wait
559 cs->fence->error = -ETIMEDOUT;
560 else if (cs->aborted)
561 cs->fence->error = -EIO;
562 else if (!cs->submitted)
563 cs->fence->error = -EBUSY;
566 cs->fence->timestamp = ktime_get();
567 complete_all(&cs->fence->completion);
568 hl_fence_put(cs->fence);
570 kfree(cs->jobs_in_queue_cnt);
574 static void cs_timedout(struct work_struct *work)
576 struct hl_device *hdev;
578 struct hl_cs *cs = container_of(work, struct hl_cs,
580 rc = cs_get_unless_zero(cs);
584 if ((!cs->submitted) || (cs->completed)) {
589 /* Mark the CS is timed out so we won't try to cancel its TDR */
592 hdev = cs->ctx->hdev;
597 "Signal command submission %llu has not finished in time!\n",
603 "Wait command submission %llu has not finished in time!\n",
607 case CS_TYPE_COLLECTIVE_WAIT:
609 "Collective Wait command submission %llu has not finished in time!\n",
615 "Command submission %llu has not finished in time!\n",
622 if (hdev->reset_on_lockup)
623 hl_device_reset(hdev, false, false);
625 hdev->needs_reset = true;
628 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
629 enum hl_cs_type cs_type, u64 user_sequence,
630 struct hl_cs **cs_new)
632 struct hl_cs_counters_atomic *cntr;
633 struct hl_fence *other = NULL;
634 struct hl_cs_compl *cs_cmpl;
638 cntr = &hdev->aggregated_cs_counters;
640 cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
642 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
643 atomic64_inc(&cntr->out_of_mem_drop_cnt);
647 /* increment refcnt for context */
648 hl_ctx_get(hdev, ctx);
651 cs->submitted = false;
652 cs->completed = false;
654 INIT_LIST_HEAD(&cs->job_list);
655 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
656 kref_init(&cs->refcount);
657 spin_lock_init(&cs->job_lock);
659 cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
661 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
662 atomic64_inc(&cntr->out_of_mem_drop_cnt);
667 cs_cmpl->hdev = hdev;
668 cs_cmpl->type = cs->type;
669 spin_lock_init(&cs_cmpl->lock);
670 cs->fence = &cs_cmpl->base_fence;
672 spin_lock(&ctx->cs_lock);
674 cs_cmpl->cs_seq = ctx->cs_sequence;
675 other = ctx->cs_pending[cs_cmpl->cs_seq &
676 (hdev->asic_prop.max_pending_cs - 1)];
678 if (other && !completion_done(&other->completion)) {
679 /* If the following statement is true, it means we have reached
680 * a point in which only part of the staged submission was
681 * submitted and we don't have enough room in the 'cs_pending'
682 * array for the rest of the submission.
683 * This causes a deadlock because this CS will never be
684 * completed as it depends on future CS's for completion.
686 if (other->cs_sequence == user_sequence)
687 dev_crit_ratelimited(hdev->dev,
688 "Staged CS %llu deadlock due to lack of resources",
691 dev_dbg_ratelimited(hdev->dev,
692 "Rejecting CS because of too many in-flights CS\n");
693 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
694 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
699 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
700 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
701 if (!cs->jobs_in_queue_cnt) {
702 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
703 atomic64_inc(&cntr->out_of_mem_drop_cnt);
709 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
711 cs->sequence = cs_cmpl->cs_seq;
713 ctx->cs_pending[cs_cmpl->cs_seq &
714 (hdev->asic_prop.max_pending_cs - 1)] =
715 &cs_cmpl->base_fence;
718 hl_fence_get(&cs_cmpl->base_fence);
722 spin_unlock(&ctx->cs_lock);
729 spin_unlock(&ctx->cs_lock);
737 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
739 struct hl_cs_job *job, *tmp;
741 staged_cs_put(hdev, cs);
743 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
744 complete_job(hdev, job);
747 void hl_cs_rollback_all(struct hl_device *hdev)
750 struct hl_cs *cs, *tmp;
752 /* flush all completions before iterating over the CS mirror list in
753 * order to avoid a race with the release functions
755 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
756 flush_workqueue(hdev->cq_wq[i]);
758 /* Make sure we don't have leftovers in the CS mirror list */
759 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
762 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
763 cs->ctx->asid, cs->sequence);
764 cs_rollback(hdev, cs);
769 void hl_pending_cb_list_flush(struct hl_ctx *ctx)
771 struct hl_pending_cb *pending_cb, *tmp;
773 list_for_each_entry_safe(pending_cb, tmp,
774 &ctx->pending_cb_list, cb_node) {
775 list_del(&pending_cb->cb_node);
776 hl_cb_put(pending_cb->cb);
781 static void job_wq_completion(struct work_struct *work)
783 struct hl_cs_job *job = container_of(work, struct hl_cs_job,
785 struct hl_cs *cs = job->cs;
786 struct hl_device *hdev = cs->ctx->hdev;
788 /* job is no longer needed */
789 complete_job(hdev, job);
792 static int validate_queue_index(struct hl_device *hdev,
793 struct hl_cs_chunk *chunk,
794 enum hl_queue_type *queue_type,
795 bool *is_kernel_allocated_cb)
797 struct asic_fixed_properties *asic = &hdev->asic_prop;
798 struct hw_queue_properties *hw_queue_prop;
800 /* This must be checked here to prevent out-of-bounds access to
801 * hw_queues_props array
803 if (chunk->queue_index >= asic->max_queues) {
804 dev_err(hdev->dev, "Queue index %d is invalid\n",
809 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
811 if (hw_queue_prop->type == QUEUE_TYPE_NA) {
812 dev_err(hdev->dev, "Queue index %d is invalid\n",
817 if (hw_queue_prop->driver_only) {
819 "Queue index %d is restricted for the kernel driver\n",
824 /* When hw queue type isn't QUEUE_TYPE_HW,
825 * USER_ALLOC_CB flag shall be referred as "don't care".
827 if (hw_queue_prop->type == QUEUE_TYPE_HW) {
828 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
829 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
831 "Queue index %d doesn't support user CB\n",
836 *is_kernel_allocated_cb = false;
838 if (!(hw_queue_prop->cb_alloc_flags &
841 "Queue index %d doesn't support kernel CB\n",
846 *is_kernel_allocated_cb = true;
849 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
853 *queue_type = hw_queue_prop->type;
857 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
858 struct hl_cb_mgr *cb_mgr,
859 struct hl_cs_chunk *chunk)
864 cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
866 cb = hl_cb_get(hdev, cb_mgr, cb_handle);
868 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
872 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
873 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
877 atomic_inc(&cb->cs_cnt);
886 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
887 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
889 struct hl_cs_job *job;
891 job = kzalloc(sizeof(*job), GFP_ATOMIC);
895 kref_init(&job->refcount);
896 job->queue_type = queue_type;
897 job->is_kernel_allocated_cb = is_kernel_allocated_cb;
899 if (is_cb_patched(hdev, job))
900 INIT_LIST_HEAD(&job->userptr_list);
902 if (job->queue_type == QUEUE_TYPE_EXT)
903 INIT_WORK(&job->finish_work, job_wq_completion);
908 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
910 if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
911 return CS_TYPE_SIGNAL;
912 else if (cs_type_flags & HL_CS_FLAGS_WAIT)
914 else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
915 return CS_TYPE_COLLECTIVE_WAIT;
917 return CS_TYPE_DEFAULT;
920 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
922 struct hl_device *hdev = hpriv->hdev;
923 struct hl_ctx *ctx = hpriv->ctx;
924 u32 cs_type_flags, num_chunks;
925 enum hl_device_status status;
926 enum hl_cs_type cs_type;
928 if (!hl_device_operational(hdev, &status)) {
929 dev_warn_ratelimited(hdev->dev,
930 "Device is %s. Can't submit new CS\n",
931 hdev->status[status]);
935 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
936 !hdev->supports_staged_submission) {
937 dev_err(hdev->dev, "staged submission not supported");
941 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
943 if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
945 "CS type flags are mutually exclusive, context %d\n",
950 cs_type = hl_cs_get_cs_type(cs_type_flags);
951 num_chunks = args->in.num_chunks_execute;
953 if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
954 !hdev->supports_sync_stream)) {
955 dev_err(hdev->dev, "Sync stream CS is not supported\n");
959 if (cs_type == CS_TYPE_DEFAULT) {
962 "Got execute CS with 0 chunks, context %d\n",
966 } else if (num_chunks != 1) {
968 "Sync stream CS mandates one chunk only, context %d\n",
976 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
977 struct hl_cs_chunk **cs_chunk_array,
978 void __user *chunks, u32 num_chunks,
983 if (num_chunks > HL_MAX_JOBS_PER_CS) {
984 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
985 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
987 "Number of chunks can NOT be larger than %d\n",
992 *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
994 if (!*cs_chunk_array) {
995 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
996 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1000 size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1001 if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1002 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1003 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1004 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1005 kfree(*cs_chunk_array);
1012 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1013 u64 sequence, u32 flags)
1015 if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1018 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1019 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1021 if (cs->staged_first) {
1022 /* Staged CS sequence is the first CS sequence */
1023 INIT_LIST_HEAD(&cs->staged_cs_node);
1024 cs->staged_sequence = cs->sequence;
1026 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1027 * under the cs_mirror_lock
1029 cs->staged_sequence = sequence;
1032 /* Increment CS reference if needed */
1033 staged_cs_get(hdev, cs);
1035 cs->staged_cs = true;
1040 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1041 u32 num_chunks, u64 *cs_seq, u32 flags)
1043 bool staged_mid, int_queues_only = true;
1044 struct hl_device *hdev = hpriv->hdev;
1045 struct hl_cs_chunk *cs_chunk_array;
1046 struct hl_cs_counters_atomic *cntr;
1047 struct hl_ctx *ctx = hpriv->ctx;
1048 struct hl_cs_job *job;
1054 cntr = &hdev->aggregated_cs_counters;
1055 user_sequence = *cs_seq;
1056 *cs_seq = ULLONG_MAX;
1058 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1063 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1064 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1069 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1070 staged_mid ? user_sequence : ULLONG_MAX, &cs);
1072 goto free_cs_chunk_array;
1074 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
1075 *cs_seq = cs->sequence;
1077 hl_debugfs_add_cs(cs);
1079 rc = cs_staged_submission(hdev, cs, user_sequence, flags);
1081 goto free_cs_object;
1083 /* Validate ALL the CS chunks before submitting the CS */
1084 for (i = 0 ; i < num_chunks ; i++) {
1085 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1086 enum hl_queue_type queue_type;
1087 bool is_kernel_allocated_cb;
1089 rc = validate_queue_index(hdev, chunk, &queue_type,
1090 &is_kernel_allocated_cb);
1092 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1093 atomic64_inc(&cntr->validation_drop_cnt);
1094 goto free_cs_object;
1097 if (is_kernel_allocated_cb) {
1098 cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1101 &ctx->cs_counters.validation_drop_cnt);
1102 atomic64_inc(&cntr->validation_drop_cnt);
1104 goto free_cs_object;
1107 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1110 if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
1111 int_queues_only = false;
1113 job = hl_cs_allocate_job(hdev, queue_type,
1114 is_kernel_allocated_cb);
1116 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1117 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1118 dev_err(hdev->dev, "Failed to allocate a new job\n");
1120 if (is_kernel_allocated_cb)
1123 goto free_cs_object;
1129 job->user_cb_size = chunk->cb_size;
1130 job->hw_queue_id = chunk->queue_index;
1132 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1134 list_add_tail(&job->cs_node, &cs->job_list);
1137 * Increment CS reference. When CS reference is 0, CS is
1138 * done and can be signaled to user and free all its resources
1139 * Only increment for JOB on external or H/W queues, because
1140 * only for those JOBs we get completion
1142 if (cs_needs_completion(cs) &&
1143 (job->queue_type == QUEUE_TYPE_EXT ||
1144 job->queue_type == QUEUE_TYPE_HW))
1147 hl_debugfs_add_job(hdev, job);
1149 rc = cs_parser(hpriv, job);
1151 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1152 atomic64_inc(&cntr->parsing_drop_cnt);
1154 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1155 cs->ctx->asid, cs->sequence, job->id, rc);
1156 goto free_cs_object;
1160 /* We allow a CS with any queue type combination as long as it does
1161 * not get a completion
1163 if (int_queues_only && cs_needs_completion(cs)) {
1164 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1165 atomic64_inc(&cntr->validation_drop_cnt);
1167 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1168 cs->ctx->asid, cs->sequence);
1170 goto free_cs_object;
1173 rc = hl_hw_queue_schedule_cs(cs);
1177 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1178 cs->ctx->asid, cs->sequence, rc);
1179 goto free_cs_object;
1182 rc = HL_CS_STATUS_SUCCESS;
1186 atomic_dec(&cb->cs_cnt);
1189 cs_rollback(hdev, cs);
1190 *cs_seq = ULLONG_MAX;
1191 /* The path below is both for good and erroneous exits */
1193 /* We finished with the CS in this function, so put the ref */
1195 free_cs_chunk_array:
1196 kfree(cs_chunk_array);
1201 static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
1202 struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
1204 struct hw_queue_properties *hw_queue_prop;
1205 struct hl_cs_counters_atomic *cntr;
1206 struct hl_cs_job *job;
1208 hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
1209 cntr = &hdev->aggregated_cs_counters;
1211 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1213 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1214 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1215 dev_err(hdev->dev, "Failed to allocate a new job\n");
1222 atomic_inc(&job->user_cb->cs_cnt);
1223 job->user_cb_size = size;
1224 job->hw_queue_id = hw_queue_id;
1225 job->patched_cb = job->user_cb;
1226 job->job_cb_size = job->user_cb_size;
1228 /* increment refcount as for external queues we get completion */
1231 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1233 list_add_tail(&job->cs_node, &cs->job_list);
1235 hl_debugfs_add_job(hdev, job);
1240 static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
1242 struct hl_device *hdev = hpriv->hdev;
1243 struct hl_ctx *ctx = hpriv->ctx;
1244 struct hl_pending_cb *pending_cb, *tmp;
1245 struct list_head local_cb_list;
1250 int process_list, rc = 0;
1252 if (list_empty(&ctx->pending_cb_list))
1255 process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
1257 /* Only a single thread is allowed to process the list */
1261 if (list_empty(&ctx->pending_cb_list))
1262 goto free_pending_cb_token;
1264 /* move all list elements to a local list */
1265 INIT_LIST_HEAD(&local_cb_list);
1266 spin_lock(&ctx->pending_cb_lock);
1267 list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
1269 list_move_tail(&pending_cb->cb_node, &local_cb_list);
1270 spin_unlock(&ctx->pending_cb_lock);
1272 rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
1274 goto add_list_elements;
1276 hl_debugfs_add_cs(cs);
1278 /* Iterate through pending cb list, create jobs and add to CS */
1279 list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
1280 cb = pending_cb->cb;
1281 cb_size = pending_cb->cb_size;
1282 hw_queue_id = pending_cb->hw_queue_id;
1284 rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
1287 goto free_cs_object;
1290 rc = hl_hw_queue_schedule_cs(cs);
1294 "Failed to submit CS %d.%llu (%d)\n",
1295 ctx->asid, cs->sequence, rc);
1296 goto free_cs_object;
1299 /* pending cb was scheduled successfully */
1300 list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
1301 list_del(&pending_cb->cb_node);
1307 goto free_pending_cb_token;
1310 cs_rollback(hdev, cs);
1313 spin_lock(&ctx->pending_cb_lock);
1314 list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
1316 list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
1317 spin_unlock(&ctx->pending_cb_lock);
1318 free_pending_cb_token:
1319 atomic_set(&ctx->thread_pending_cb_token, 1);
1324 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1327 struct hl_device *hdev = hpriv->hdev;
1328 struct hl_ctx *ctx = hpriv->ctx;
1329 bool need_soft_reset = false;
1330 int rc = 0, do_ctx_switch;
1331 void __user *chunks;
1332 u32 num_chunks, tmp;
1335 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1337 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1338 mutex_lock(&hpriv->restore_phase_mutex);
1340 if (do_ctx_switch) {
1341 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1343 dev_err_ratelimited(hdev->dev,
1344 "Failed to switch to context %d, rejecting CS! %d\n",
1347 * If we timedout, or if the device is not IDLE
1348 * while we want to do context-switch (-EBUSY),
1349 * we need to soft-reset because QMAN is
1350 * probably stuck. However, we can't call to
1351 * reset here directly because of deadlock, so
1352 * need to do it at the very end of this
1355 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1356 need_soft_reset = true;
1357 mutex_unlock(&hpriv->restore_phase_mutex);
1362 hdev->asic_funcs->restore_phase_topology(hdev);
1364 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1365 num_chunks = args->in.num_chunks_restore;
1369 "Need to run restore phase but restore CS is empty\n");
1372 rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1376 mutex_unlock(&hpriv->restore_phase_mutex);
1380 "Failed to submit restore CS for context %d (%d)\n",
1385 /* Need to wait for restore completion before execution phase */
1387 enum hl_cs_wait_status status;
1389 ret = _hl_cs_wait_ioctl(hdev, ctx,
1390 jiffies_to_usecs(hdev->timeout_jiffies),
1391 *cs_seq, &status, NULL);
1393 if (ret == -ERESTARTSYS) {
1394 usleep_range(100, 200);
1399 "Restore CS for context %d failed to complete %d\n",
1406 ctx->thread_ctx_switch_wait_token = 1;
1408 } else if (!ctx->thread_ctx_switch_wait_token) {
1409 rc = hl_poll_timeout_memory(hdev,
1410 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1411 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1413 if (rc == -ETIMEDOUT) {
1415 "context switch phase timeout (%d)\n", tmp);
1421 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1422 hl_device_reset(hdev, false, false);
1427 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1428 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
1430 u64 *signal_seq_arr = NULL;
1431 u32 size_to_copy, signal_seq_arr_len;
1434 signal_seq_arr_len = chunk->num_signal_seq_arr;
1436 /* currently only one signal seq is supported */
1437 if (signal_seq_arr_len != 1) {
1438 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1439 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1441 "Wait for signal CS supports only one signal CS seq\n");
1445 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1446 sizeof(*signal_seq_arr),
1448 if (!signal_seq_arr) {
1449 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1450 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1454 size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
1455 if (copy_from_user(signal_seq_arr,
1456 u64_to_user_ptr(chunk->signal_seq_arr),
1458 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1459 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1461 "Failed to copy signal seq array from user\n");
1466 /* currently it is guaranteed to have only one signal seq */
1467 *signal_seq = signal_seq_arr[0];
1470 kfree(signal_seq_arr);
1475 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1476 struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
1479 struct hl_cs_counters_atomic *cntr;
1480 struct hl_cs_job *job;
1484 cntr = &hdev->aggregated_cs_counters;
1486 job = hl_cs_allocate_job(hdev, q_type, true);
1488 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1489 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1490 dev_err(hdev->dev, "Failed to allocate a new job\n");
1494 if (cs->type == CS_TYPE_WAIT)
1495 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1497 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1499 cb = hl_cb_kernel_create(hdev, cb_size,
1500 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1502 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1503 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1511 atomic_inc(&job->user_cb->cs_cnt);
1512 job->user_cb_size = cb_size;
1513 job->hw_queue_id = q_idx;
1516 * No need in parsing, user CB is the patched CB.
1517 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1518 * the CB idr anymore and to decrement its refcount as it was
1519 * incremented inside hl_cb_kernel_create().
1521 job->patched_cb = job->user_cb;
1522 job->job_cb_size = job->user_cb_size;
1523 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1525 /* increment refcount as for external queues we get completion */
1528 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1530 list_add_tail(&job->cs_node, &cs->job_list);
1532 hl_debugfs_add_job(hdev, job);
1537 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1538 void __user *chunks, u32 num_chunks,
1539 u64 *cs_seq, bool timestamp)
1541 struct hl_cs_chunk *cs_chunk_array, *chunk;
1542 struct hw_queue_properties *hw_queue_prop;
1543 struct hl_device *hdev = hpriv->hdev;
1544 struct hl_cs_compl *sig_waitcs_cmpl;
1545 u32 q_idx, collective_engine_id = 0;
1546 struct hl_cs_counters_atomic *cntr;
1547 struct hl_fence *sig_fence = NULL;
1548 struct hl_ctx *ctx = hpriv->ctx;
1549 enum hl_queue_type q_type;
1554 cntr = &hdev->aggregated_cs_counters;
1555 *cs_seq = ULLONG_MAX;
1557 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1562 /* currently it is guaranteed to have only one chunk */
1563 chunk = &cs_chunk_array[0];
1565 if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1566 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1567 atomic64_inc(&cntr->validation_drop_cnt);
1568 dev_err(hdev->dev, "Queue index %d is invalid\n",
1569 chunk->queue_index);
1571 goto free_cs_chunk_array;
1574 q_idx = chunk->queue_index;
1575 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1576 q_type = hw_queue_prop->type;
1578 if (!hw_queue_prop->supports_sync_stream) {
1579 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1580 atomic64_inc(&cntr->validation_drop_cnt);
1582 "Queue index %d does not support sync stream operations\n",
1585 goto free_cs_chunk_array;
1588 if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1589 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1590 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1591 atomic64_inc(&cntr->validation_drop_cnt);
1593 "Queue index %d is invalid\n", q_idx);
1595 goto free_cs_chunk_array;
1598 collective_engine_id = chunk->collective_engine_id;
1601 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1602 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
1604 goto free_cs_chunk_array;
1606 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
1607 if (IS_ERR(sig_fence)) {
1608 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1609 atomic64_inc(&cntr->validation_drop_cnt);
1611 "Failed to get signal CS with seq 0x%llx\n",
1613 rc = PTR_ERR(sig_fence);
1614 goto free_cs_chunk_array;
1618 /* signal CS already finished */
1620 goto free_cs_chunk_array;
1624 container_of(sig_fence, struct hl_cs_compl, base_fence);
1626 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
1627 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1628 atomic64_inc(&cntr->validation_drop_cnt);
1630 "CS seq 0x%llx is not of a signal CS\n",
1632 hl_fence_put(sig_fence);
1634 goto free_cs_chunk_array;
1637 if (completion_done(&sig_fence->completion)) {
1638 /* signal CS already finished */
1639 hl_fence_put(sig_fence);
1641 goto free_cs_chunk_array;
1645 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
1647 if (cs_type == CS_TYPE_WAIT ||
1648 cs_type == CS_TYPE_COLLECTIVE_WAIT)
1649 hl_fence_put(sig_fence);
1650 goto free_cs_chunk_array;
1653 cs->timestamp = !!timestamp;
1656 * Save the signal CS fence for later initialization right before
1657 * hanging the wait CS on the queue.
1659 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
1660 cs->signal_fence = sig_fence;
1662 hl_debugfs_add_cs(cs);
1664 *cs_seq = cs->sequence;
1666 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
1667 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
1669 else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
1670 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
1671 cs, q_idx, collective_engine_id);
1673 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1674 atomic64_inc(&cntr->validation_drop_cnt);
1679 goto free_cs_object;
1681 rc = hl_hw_queue_schedule_cs(cs);
1685 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1686 ctx->asid, cs->sequence, rc);
1687 goto free_cs_object;
1690 rc = HL_CS_STATUS_SUCCESS;
1694 cs_rollback(hdev, cs);
1695 *cs_seq = ULLONG_MAX;
1696 /* The path below is both for good and erroneous exits */
1698 /* We finished with the CS in this function, so put the ref */
1700 free_cs_chunk_array:
1701 kfree(cs_chunk_array);
1706 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
1708 union hl_cs_args *args = data;
1709 enum hl_cs_type cs_type;
1710 u64 cs_seq = ULONG_MAX;
1711 void __user *chunks;
1712 u32 num_chunks, flags;
1715 rc = hl_cs_sanity_checks(hpriv, args);
1719 rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
1723 rc = hl_submit_pending_cb(hpriv);
1727 cs_type = hl_cs_get_cs_type(args->in.cs_flags &
1728 ~HL_CS_FLAGS_FORCE_RESTORE);
1729 chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
1730 num_chunks = args->in.num_chunks_execute;
1731 flags = args->in.cs_flags;
1733 /* In case this is a staged CS, user should supply the CS sequence */
1734 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1735 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1736 cs_seq = args->in.seq;
1739 case CS_TYPE_SIGNAL:
1741 case CS_TYPE_COLLECTIVE_WAIT:
1742 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
1743 &cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
1746 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
1752 if (rc != -EAGAIN) {
1753 memset(args, 0, sizeof(*args));
1754 args->out.status = rc;
1755 args->out.seq = cs_seq;
1761 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1762 u64 timeout_us, u64 seq,
1763 enum hl_cs_wait_status *status, s64 *timestamp)
1765 struct hl_fence *fence;
1766 unsigned long timeout;
1773 if (timeout_us == MAX_SCHEDULE_TIMEOUT)
1774 timeout = timeout_us;
1776 timeout = usecs_to_jiffies(timeout_us);
1778 hl_ctx_get(hdev, ctx);
1780 fence = hl_ctx_get_fence(ctx, seq);
1781 if (IS_ERR(fence)) {
1782 rc = PTR_ERR(fence);
1784 dev_notice_ratelimited(hdev->dev,
1785 "Can't wait on CS %llu because current CS is at seq %llu\n",
1786 seq, ctx->cs_sequence);
1789 completion_rc = completion_done(&fence->completion);
1792 wait_for_completion_interruptible_timeout(
1793 &fence->completion, timeout);
1795 if (completion_rc > 0) {
1796 *status = CS_WAIT_STATUS_COMPLETED;
1798 *timestamp = ktime_to_ns(fence->timestamp);
1800 *status = CS_WAIT_STATUS_BUSY;
1803 if (fence->error == -ETIMEDOUT)
1805 else if (fence->error == -EIO)
1808 hl_fence_put(fence);
1811 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
1812 seq, ctx->cs_sequence);
1813 *status = CS_WAIT_STATUS_GONE;
1821 int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
1823 struct hl_device *hdev = hpriv->hdev;
1824 union hl_wait_cs_args *args = data;
1825 enum hl_cs_wait_status status;
1826 u64 seq = args->in.seq;
1830 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
1831 &status, ×tamp);
1833 memset(args, 0, sizeof(*args));
1836 if (rc == -ERESTARTSYS) {
1837 dev_err_ratelimited(hdev->dev,
1838 "user process got signal while waiting for CS handle %llu\n",
1840 args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
1842 } else if (rc == -ETIMEDOUT) {
1843 dev_err_ratelimited(hdev->dev,
1844 "CS %llu has timed-out while user process is waiting for it\n",
1846 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
1847 } else if (rc == -EIO) {
1848 dev_err_ratelimited(hdev->dev,
1849 "CS %llu has been aborted while user process is waiting for it\n",
1851 args->out.status = HL_WAIT_CS_STATUS_ABORTED;
1857 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
1858 args->out.timestamp_nsec = timestamp;
1862 case CS_WAIT_STATUS_GONE:
1863 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
1865 case CS_WAIT_STATUS_COMPLETED:
1866 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
1868 case CS_WAIT_STATUS_BUSY:
1870 args->out.status = HL_WAIT_CS_STATUS_BUSY;