1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15 HL_CS_FLAGS_COLLECTIVE_WAIT)
18 * enum hl_cs_wait_status - cs wait status
19 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
20 * @CS_WAIT_STATUS_COMPLETED: cs completed
21 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
23 enum hl_cs_wait_status {
25 CS_WAIT_STATUS_COMPLETED,
29 static void job_wq_completion(struct work_struct *work);
30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
31 u64 timeout_us, u64 seq,
32 enum hl_cs_wait_status *status, s64 *timestamp);
33 static void cs_do_release(struct kref *ref);
35 static void hl_sob_reset(struct kref *ref)
37 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
39 struct hl_device *hdev = hw_sob->hdev;
41 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
43 hdev->asic_funcs->reset_sob(hdev, hw_sob);
45 hw_sob->need_reset = false;
48 void hl_sob_reset_error(struct kref *ref)
50 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
52 struct hl_device *hdev = hw_sob->hdev;
55 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
56 hw_sob->q_idx, hw_sob->sob_id);
59 void hw_sob_put(struct hl_hw_sob *hw_sob)
62 kref_put(&hw_sob->kref, hl_sob_reset);
65 static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
68 kref_put(&hw_sob->kref, hl_sob_reset_error);
71 void hw_sob_get(struct hl_hw_sob *hw_sob)
74 kref_get(&hw_sob->kref);
78 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
79 * @sob_base: sob base id
80 * @sob_mask: sob user mask, each bit represents a sob offset from sob base
81 * @mask: generated mask
83 * Return: 0 if given parameters are valid
85 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
92 if (sob_mask == 0x1) {
93 *mask = ~(1 << (sob_base & 0x7));
95 /* find msb in order to verify sob range is valid */
96 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
97 if (BIT(i) & sob_mask)
100 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
109 static void hl_fence_release(struct kref *kref)
111 struct hl_fence *fence =
112 container_of(kref, struct hl_fence, refcount);
113 struct hl_cs_compl *hl_cs_cmpl =
114 container_of(fence, struct hl_cs_compl, base_fence);
119 void hl_fence_put(struct hl_fence *fence)
121 if (IS_ERR_OR_NULL(fence))
123 kref_put(&fence->refcount, hl_fence_release);
126 void hl_fences_put(struct hl_fence **fence, int len)
130 for (i = 0; i < len; i++, fence++)
131 hl_fence_put(*fence);
134 void hl_fence_get(struct hl_fence *fence)
137 kref_get(&fence->refcount);
140 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
142 kref_init(&fence->refcount);
143 fence->cs_sequence = sequence;
145 fence->timestamp = ktime_set(0, 0);
146 fence->mcs_handling_done = false;
147 init_completion(&fence->completion);
150 void cs_get(struct hl_cs *cs)
152 kref_get(&cs->refcount);
155 static int cs_get_unless_zero(struct hl_cs *cs)
157 return kref_get_unless_zero(&cs->refcount);
160 static void cs_put(struct hl_cs *cs)
162 kref_put(&cs->refcount, cs_do_release);
165 static void cs_job_do_release(struct kref *ref)
167 struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
172 static void cs_job_put(struct hl_cs_job *job)
174 kref_put(&job->refcount, cs_job_do_release);
177 bool cs_needs_completion(struct hl_cs *cs)
179 /* In case this is a staged CS, only the last CS in sequence should
180 * get a completion, any non staged CS will always get a completion
182 if (cs->staged_cs && !cs->staged_last)
188 bool cs_needs_timeout(struct hl_cs *cs)
190 /* In case this is a staged CS, only the first CS in sequence should
191 * get a timeout, any non staged CS will always get a timeout
193 if (cs->staged_cs && !cs->staged_first)
199 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
202 * Patched CB is created for external queues jobs, and for H/W queues
203 * jobs if the user CB was allocated by driver and MMU is disabled.
205 return (job->queue_type == QUEUE_TYPE_EXT ||
206 (job->queue_type == QUEUE_TYPE_HW &&
207 job->is_kernel_allocated_cb &&
212 * cs_parser - parse the user command submission
214 * @hpriv : pointer to the private data of the fd
215 * @job : pointer to the job that holds the command submission info
217 * The function parses the command submission of the user. It calls the
218 * ASIC specific parser, which returns a list of memory blocks to send
219 * to the device as different command buffers
222 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
224 struct hl_device *hdev = hpriv->hdev;
225 struct hl_cs_parser parser;
228 parser.ctx_id = job->cs->ctx->asid;
229 parser.cs_sequence = job->cs->sequence;
230 parser.job_id = job->id;
232 parser.hw_queue_id = job->hw_queue_id;
233 parser.job_userptr_list = &job->userptr_list;
234 parser.patched_cb = NULL;
235 parser.user_cb = job->user_cb;
236 parser.user_cb_size = job->user_cb_size;
237 parser.queue_type = job->queue_type;
238 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
239 job->patched_cb = NULL;
240 parser.completion = cs_needs_completion(job->cs);
242 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
244 if (is_cb_patched(hdev, job)) {
246 job->patched_cb = parser.patched_cb;
247 job->job_cb_size = parser.patched_cb_size;
248 job->contains_dma_pkt = parser.contains_dma_pkt;
249 atomic_inc(&job->patched_cb->cs_cnt);
253 * Whether the parsing worked or not, we don't need the
254 * original CB anymore because it was already parsed and
255 * won't be accessed again for this CS
257 atomic_dec(&job->user_cb->cs_cnt);
258 hl_cb_put(job->user_cb);
261 job->job_cb_size = job->user_cb_size;
267 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
269 struct hl_cs *cs = job->cs;
271 if (is_cb_patched(hdev, job)) {
272 hl_userptr_delete_list(hdev, &job->userptr_list);
275 * We might arrive here from rollback and patched CB wasn't
276 * created, so we need to check it's not NULL
278 if (job->patched_cb) {
279 atomic_dec(&job->patched_cb->cs_cnt);
280 hl_cb_put(job->patched_cb);
284 /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
285 * enabled, the user CB isn't released in cs_parser() and thus should be
287 * This is also true for INT queues jobs which were allocated by driver
289 if (job->is_kernel_allocated_cb &&
290 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
291 job->queue_type == QUEUE_TYPE_INT)) {
292 atomic_dec(&job->user_cb->cs_cnt);
293 hl_cb_put(job->user_cb);
297 * This is the only place where there can be multiple threads
298 * modifying the list at the same time
300 spin_lock(&cs->job_lock);
301 list_del(&job->cs_node);
302 spin_unlock(&cs->job_lock);
304 hl_debugfs_remove_job(hdev, job);
306 /* We decrement reference only for a CS that gets completion
307 * because the reference was incremented only for this kind of CS
308 * right before it was scheduled.
310 * In staged submission, only the last CS marked as 'staged_last'
311 * gets completion, hence its release function will be called from here.
312 * As for all the rest CS's in the staged submission which do not get
313 * completion, their CS reference will be decremented by the
314 * 'staged_last' CS during the CS release flow.
315 * All relevant PQ CI counters will be incremented during the CS release
316 * flow by calling 'hl_hw_queue_update_ci'.
318 if (cs_needs_completion(cs) &&
319 (job->queue_type == QUEUE_TYPE_EXT ||
320 job->queue_type == QUEUE_TYPE_HW))
327 * hl_staged_cs_find_first - locate the first CS in this staged submission
329 * @hdev: pointer to device structure
330 * @cs_seq: staged submission sequence number
332 * @note: This function must be called under 'hdev->cs_mirror_lock'
334 * Find and return a CS pointer with the given sequence
336 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
340 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
341 if (cs->staged_cs && cs->staged_first &&
342 cs->sequence == cs_seq)
349 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
351 * @hdev: pointer to device structure
352 * @cs: staged submission member
355 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
357 struct hl_cs *last_entry;
359 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
362 if (last_entry->staged_last)
369 * staged_cs_get - get CS reference if this CS is a part of a staged CS
371 * @hdev: pointer to device structure
373 * @cs_seq: staged submission sequence number
375 * Increment CS reference for every CS in this staged submission except for
376 * the CS which get completion.
378 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
380 /* Only the last CS in this staged submission will get a completion.
381 * We must increment the reference for all other CS's in this
383 * Once we get a completion we will release the whole staged submission.
385 if (!cs->staged_last)
390 * staged_cs_put - put a CS in case it is part of staged submission
392 * @hdev: pointer to device structure
395 * This function decrements a CS reference (for a non completion CS)
397 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
399 /* We release all CS's in a staged submission except the last
400 * CS which we have never incremented its reference.
402 if (!cs_needs_completion(cs))
406 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
408 bool next_entry_found = false;
409 struct hl_cs *next, *first_cs;
411 if (!cs_needs_timeout(cs))
414 spin_lock(&hdev->cs_mirror_lock);
416 /* We need to handle tdr only once for the complete staged submission.
417 * Hence, we choose the CS that reaches this function first which is
418 * the CS marked as 'staged_last'.
419 * In case single staged cs was submitted which has both first and last
420 * indications, then "cs_find_first" below will return NULL, since we
421 * removed the cs node from the list before getting here,
422 * in such cases just continue with the cs to cancel it's TDR work.
424 if (cs->staged_cs && cs->staged_last) {
425 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
430 spin_unlock(&hdev->cs_mirror_lock);
432 /* Don't cancel TDR in case this CS was timedout because we might be
433 * running from the TDR context
435 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
439 cancel_delayed_work_sync(&cs->work_tdr);
441 spin_lock(&hdev->cs_mirror_lock);
443 /* queue TDR for next CS */
444 list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
445 if (cs_needs_timeout(next)) {
446 next_entry_found = true;
450 if (next_entry_found && !next->tdr_active) {
451 next->tdr_active = true;
452 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
455 spin_unlock(&hdev->cs_mirror_lock);
459 * force_complete_multi_cs - complete all contexts that wait on multi-CS
461 * @hdev: pointer to habanalabs device structure
463 static void force_complete_multi_cs(struct hl_device *hdev)
467 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
468 struct multi_cs_completion *mcs_compl;
470 mcs_compl = &hdev->multi_cs_completion[i];
472 spin_lock(&mcs_compl->lock);
474 if (!mcs_compl->used) {
475 spin_unlock(&mcs_compl->lock);
479 /* when calling force complete no context should be waiting on
481 * We are calling the function as a protection for such case
482 * to free any pending context and print error message
485 "multi-CS completion context %d still waiting when calling force completion\n",
487 complete_all(&mcs_compl->completion);
488 spin_unlock(&mcs_compl->lock);
493 * complete_multi_cs - complete all waiting entities on multi-CS
495 * @hdev: pointer to habanalabs device structure
497 * The function signals a waiting entity that has an overlapping stream masters
498 * with the completed CS.
500 * - a completed CS worked on stream master QID 4, multi CS completion
501 * is actively waiting on stream master QIDs 3, 5. don't send signal as no
502 * common stream master QID
503 * - a completed CS worked on stream master QID 4, multi CS completion
504 * is actively waiting on stream master QIDs 3, 4. send signal as stream
505 * master QID 4 is common
507 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
509 struct hl_fence *fence = cs->fence;
512 /* in case of multi CS check for completion only for the first CS */
513 if (cs->staged_cs && !cs->staged_first)
516 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
517 struct multi_cs_completion *mcs_compl;
519 mcs_compl = &hdev->multi_cs_completion[i];
520 if (!mcs_compl->used)
523 spin_lock(&mcs_compl->lock);
527 * 1. still waiting for completion
528 * 2. the completed CS has at least one overlapping stream
529 * master with the stream masters in the completion
531 if (mcs_compl->used &&
532 (fence->stream_master_qid_map &
533 mcs_compl->stream_master_qid_map)) {
534 /* extract the timestamp only of first completed CS */
535 if (!mcs_compl->timestamp)
536 mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
538 complete_all(&mcs_compl->completion);
541 * Setting mcs_handling_done inside the lock ensures
542 * at least one fence have mcs_handling_done set to
543 * true before wait for mcs finish. This ensures at
544 * least one CS will be set as completed when polling
547 fence->mcs_handling_done = true;
550 spin_unlock(&mcs_compl->lock);
552 /* In case CS completed without mcs completion initialized */
553 fence->mcs_handling_done = true;
556 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
558 struct hl_cs_compl *hl_cs_cmpl)
560 /* Skip this handler if the cs wasn't submitted, to avoid putting
561 * the hw_sob twice, since this case already handled at this point,
562 * also skip if the hw_sob pointer wasn't set.
564 if (!hl_cs_cmpl->hw_sob || !cs->submitted)
567 spin_lock(&hl_cs_cmpl->lock);
570 * we get refcount upon reservation of signals or signal/wait cs for the
571 * hw_sob object, and need to put it when the first staged cs
572 * (which cotains the encaps signals) or cs signal/wait is completed.
574 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
575 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
576 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
577 (!!hl_cs_cmpl->encaps_signals)) {
579 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
582 hl_cs_cmpl->hw_sob->sob_id,
583 hl_cs_cmpl->sob_val);
585 hw_sob_put(hl_cs_cmpl->hw_sob);
587 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
588 hdev->asic_funcs->reset_sob_group(hdev,
589 hl_cs_cmpl->sob_group);
592 spin_unlock(&hl_cs_cmpl->lock);
595 static void cs_do_release(struct kref *ref)
597 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
598 struct hl_device *hdev = cs->ctx->hdev;
599 struct hl_cs_job *job, *tmp;
600 struct hl_cs_compl *hl_cs_cmpl =
601 container_of(cs->fence, struct hl_cs_compl, base_fence);
603 cs->completed = true;
606 * Although if we reached here it means that all external jobs have
607 * finished, because each one of them took refcnt to CS, we still
608 * need to go over the internal jobs and complete them. Otherwise, we
609 * will have leaked memory and what's worse, the CS object (and
610 * potentially the CTX object) could be released, while the JOB
611 * still holds a pointer to them (but no reference).
613 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
614 complete_job(hdev, job);
616 if (!cs->submitted) {
618 * In case the wait for signal CS was submitted, the fence put
619 * occurs in init_signal_wait_cs() or collective_wait_init_cs()
620 * right before hanging on the PQ.
622 if (cs->type == CS_TYPE_WAIT ||
623 cs->type == CS_TYPE_COLLECTIVE_WAIT)
624 hl_fence_put(cs->signal_fence);
629 /* Need to update CI for all queue jobs that does not get completion */
630 hl_hw_queue_update_ci(cs);
632 /* remove CS from CS mirror list */
633 spin_lock(&hdev->cs_mirror_lock);
634 list_del_init(&cs->mirror_node);
635 spin_unlock(&hdev->cs_mirror_lock);
637 cs_handle_tdr(hdev, cs);
640 /* the completion CS decrements reference for the entire
643 if (cs->staged_last) {
644 struct hl_cs *staged_cs, *tmp;
646 list_for_each_entry_safe(staged_cs, tmp,
647 &cs->staged_cs_node, staged_cs_node)
648 staged_cs_put(hdev, staged_cs);
651 /* A staged CS will be a member in the list only after it
652 * was submitted. We used 'cs_mirror_lock' when inserting
653 * it to list so we will use it again when removing it
656 spin_lock(&hdev->cs_mirror_lock);
657 list_del(&cs->staged_cs_node);
658 spin_unlock(&hdev->cs_mirror_lock);
661 /* decrement refcount to handle when first staged cs
662 * with encaps signals is completed.
664 if (hl_cs_cmpl->encaps_signals)
665 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
666 hl_encaps_handle_do_release);
669 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
670 && cs->encaps_signals)
671 kref_put(&cs->encaps_sig_hdl->refcount,
672 hl_encaps_handle_do_release);
675 /* Must be called before hl_ctx_put because inside we use ctx to get
678 hl_debugfs_remove_cs(cs);
682 /* We need to mark an error for not submitted because in that case
683 * the hl fence release flow is different. Mainly, we don't need
684 * to handle hw_sob for signal/wait
687 cs->fence->error = -ETIMEDOUT;
688 else if (cs->aborted)
689 cs->fence->error = -EIO;
690 else if (!cs->submitted)
691 cs->fence->error = -EBUSY;
693 if (unlikely(cs->skip_reset_on_timeout)) {
695 "Command submission %llu completed after %llu (s)\n",
697 div_u64(jiffies - cs->submission_time_jiffies, HZ));
701 cs->fence->timestamp = ktime_get();
702 complete_all(&cs->fence->completion);
703 complete_multi_cs(hdev, cs);
705 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
707 hl_fence_put(cs->fence);
709 kfree(cs->jobs_in_queue_cnt);
713 static void cs_timedout(struct work_struct *work)
715 struct hl_device *hdev;
717 struct hl_cs *cs = container_of(work, struct hl_cs,
719 bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
721 rc = cs_get_unless_zero(cs);
725 if ((!cs->submitted) || (cs->completed)) {
730 /* Mark the CS is timed out so we won't try to cancel its TDR */
731 if (likely(!skip_reset_on_timeout))
734 hdev = cs->ctx->hdev;
736 /* Save only the first CS timeout parameters */
737 rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
739 hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
740 hdev->last_error.cs_timeout_timestamp = ktime_get();
741 hdev->last_error.cs_timeout_seq = cs->sequence;
747 "Signal command submission %llu has not finished in time!\n",
753 "Wait command submission %llu has not finished in time!\n",
757 case CS_TYPE_COLLECTIVE_WAIT:
759 "Collective Wait command submission %llu has not finished in time!\n",
765 "Command submission %llu has not finished in time!\n",
770 rc = hl_state_dump(hdev);
772 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
776 if (likely(!skip_reset_on_timeout)) {
777 if (hdev->reset_on_lockup)
778 hl_device_reset(hdev, HL_DRV_RESET_TDR);
780 hdev->reset_info.needs_reset = true;
784 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
785 enum hl_cs_type cs_type, u64 user_sequence,
786 struct hl_cs **cs_new, u32 flags, u32 timeout)
788 struct hl_cs_counters_atomic *cntr;
789 struct hl_fence *other = NULL;
790 struct hl_cs_compl *cs_cmpl;
794 cntr = &hdev->aggregated_cs_counters;
796 cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
798 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
801 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
802 atomic64_inc(&cntr->out_of_mem_drop_cnt);
806 /* increment refcnt for context */
807 hl_ctx_get(hdev, ctx);
810 cs->submitted = false;
811 cs->completed = false;
813 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
814 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
815 cs->timeout_jiffies = timeout;
816 cs->skip_reset_on_timeout =
817 hdev->reset_info.skip_reset_on_timeout ||
818 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
819 cs->submission_time_jiffies = jiffies;
820 INIT_LIST_HEAD(&cs->job_list);
821 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
822 kref_init(&cs->refcount);
823 spin_lock_init(&cs->job_lock);
825 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
827 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
830 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
831 atomic64_inc(&cntr->out_of_mem_drop_cnt);
836 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
837 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
838 if (!cs->jobs_in_queue_cnt)
839 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
840 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
842 if (!cs->jobs_in_queue_cnt) {
843 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
844 atomic64_inc(&cntr->out_of_mem_drop_cnt);
849 cs_cmpl->hdev = hdev;
850 cs_cmpl->type = cs->type;
851 spin_lock_init(&cs_cmpl->lock);
852 cs->fence = &cs_cmpl->base_fence;
854 spin_lock(&ctx->cs_lock);
856 cs_cmpl->cs_seq = ctx->cs_sequence;
857 other = ctx->cs_pending[cs_cmpl->cs_seq &
858 (hdev->asic_prop.max_pending_cs - 1)];
860 if (other && !completion_done(&other->completion)) {
861 /* If the following statement is true, it means we have reached
862 * a point in which only part of the staged submission was
863 * submitted and we don't have enough room in the 'cs_pending'
864 * array for the rest of the submission.
865 * This causes a deadlock because this CS will never be
866 * completed as it depends on future CS's for completion.
868 if (other->cs_sequence == user_sequence)
869 dev_crit_ratelimited(hdev->dev,
870 "Staged CS %llu deadlock due to lack of resources",
873 dev_dbg_ratelimited(hdev->dev,
874 "Rejecting CS because of too many in-flights CS\n");
875 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
876 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
882 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
884 cs->sequence = cs_cmpl->cs_seq;
886 ctx->cs_pending[cs_cmpl->cs_seq &
887 (hdev->asic_prop.max_pending_cs - 1)] =
888 &cs_cmpl->base_fence;
891 hl_fence_get(&cs_cmpl->base_fence);
895 spin_unlock(&ctx->cs_lock);
902 spin_unlock(&ctx->cs_lock);
903 kfree(cs->jobs_in_queue_cnt);
912 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
914 struct hl_cs_job *job, *tmp;
916 staged_cs_put(hdev, cs);
918 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
919 complete_job(hdev, job);
922 void hl_cs_rollback_all(struct hl_device *hdev)
925 struct hl_cs *cs, *tmp;
927 flush_workqueue(hdev->sob_reset_wq);
929 /* flush all completions before iterating over the CS mirror list in
930 * order to avoid a race with the release functions
932 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
933 flush_workqueue(hdev->cq_wq[i]);
935 /* Make sure we don't have leftovers in the CS mirror list */
936 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
939 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
940 cs->ctx->asid, cs->sequence);
941 cs_rollback(hdev, cs);
945 force_complete_multi_cs(hdev);
949 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
951 struct hl_user_pending_interrupt *pend;
954 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
955 list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
956 pend->fence.error = -EIO;
957 complete_all(&pend->fence.completion);
959 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
962 void hl_release_pending_user_interrupts(struct hl_device *hdev)
964 struct asic_fixed_properties *prop = &hdev->asic_prop;
965 struct hl_user_interrupt *interrupt;
968 if (!prop->user_interrupt_count)
971 /* We iterate through the user interrupt requests and waking up all
972 * user threads waiting for interrupt completion. We iterate the
973 * list under a lock, this is why all user threads, once awake,
974 * will wait on the same lock and will release the waiting object upon
978 for (i = 0 ; i < prop->user_interrupt_count ; i++) {
979 interrupt = &hdev->user_interrupt[i];
980 wake_pending_user_interrupt_threads(interrupt);
983 interrupt = &hdev->common_user_interrupt;
984 wake_pending_user_interrupt_threads(interrupt);
987 static void job_wq_completion(struct work_struct *work)
989 struct hl_cs_job *job = container_of(work, struct hl_cs_job,
991 struct hl_cs *cs = job->cs;
992 struct hl_device *hdev = cs->ctx->hdev;
994 /* job is no longer needed */
995 complete_job(hdev, job);
998 static int validate_queue_index(struct hl_device *hdev,
999 struct hl_cs_chunk *chunk,
1000 enum hl_queue_type *queue_type,
1001 bool *is_kernel_allocated_cb)
1003 struct asic_fixed_properties *asic = &hdev->asic_prop;
1004 struct hw_queue_properties *hw_queue_prop;
1006 /* This must be checked here to prevent out-of-bounds access to
1007 * hw_queues_props array
1009 if (chunk->queue_index >= asic->max_queues) {
1010 dev_err(hdev->dev, "Queue index %d is invalid\n",
1011 chunk->queue_index);
1015 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1017 if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1018 dev_err(hdev->dev, "Queue index %d is invalid\n",
1019 chunk->queue_index);
1023 if (hw_queue_prop->driver_only) {
1025 "Queue index %d is restricted for the kernel driver\n",
1026 chunk->queue_index);
1030 /* When hw queue type isn't QUEUE_TYPE_HW,
1031 * USER_ALLOC_CB flag shall be referred as "don't care".
1033 if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1034 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1035 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1037 "Queue index %d doesn't support user CB\n",
1038 chunk->queue_index);
1042 *is_kernel_allocated_cb = false;
1044 if (!(hw_queue_prop->cb_alloc_flags &
1047 "Queue index %d doesn't support kernel CB\n",
1048 chunk->queue_index);
1052 *is_kernel_allocated_cb = true;
1055 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1059 *queue_type = hw_queue_prop->type;
1063 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1064 struct hl_cb_mgr *cb_mgr,
1065 struct hl_cs_chunk *chunk)
1070 cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
1072 cb = hl_cb_get(hdev, cb_mgr, cb_handle);
1074 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
1078 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1079 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1083 atomic_inc(&cb->cs_cnt);
1092 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1093 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1095 struct hl_cs_job *job;
1097 job = kzalloc(sizeof(*job), GFP_ATOMIC);
1099 job = kzalloc(sizeof(*job), GFP_KERNEL);
1104 kref_init(&job->refcount);
1105 job->queue_type = queue_type;
1106 job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1108 if (is_cb_patched(hdev, job))
1109 INIT_LIST_HEAD(&job->userptr_list);
1111 if (job->queue_type == QUEUE_TYPE_EXT)
1112 INIT_WORK(&job->finish_work, job_wq_completion);
1117 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1119 if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1120 return CS_TYPE_SIGNAL;
1121 else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1122 return CS_TYPE_WAIT;
1123 else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1124 return CS_TYPE_COLLECTIVE_WAIT;
1125 else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1126 return CS_RESERVE_SIGNALS;
1127 else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1128 return CS_UNRESERVE_SIGNALS;
1130 return CS_TYPE_DEFAULT;
1133 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1135 struct hl_device *hdev = hpriv->hdev;
1136 struct hl_ctx *ctx = hpriv->ctx;
1137 u32 cs_type_flags, num_chunks;
1138 enum hl_device_status status;
1139 enum hl_cs_type cs_type;
1141 if (!hl_device_operational(hdev, &status)) {
1145 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1146 !hdev->supports_staged_submission) {
1147 dev_err(hdev->dev, "staged submission not supported");
1151 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1153 if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1155 "CS type flags are mutually exclusive, context %d\n",
1160 cs_type = hl_cs_get_cs_type(cs_type_flags);
1161 num_chunks = args->in.num_chunks_execute;
1163 if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1164 !hdev->supports_sync_stream)) {
1165 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1169 if (cs_type == CS_TYPE_DEFAULT) {
1172 "Got execute CS with 0 chunks, context %d\n",
1176 } else if (num_chunks != 1) {
1178 "Sync stream CS mandates one chunk only, context %d\n",
1186 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1187 struct hl_cs_chunk **cs_chunk_array,
1188 void __user *chunks, u32 num_chunks,
1193 if (num_chunks > HL_MAX_JOBS_PER_CS) {
1194 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1195 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1197 "Number of chunks can NOT be larger than %d\n",
1198 HL_MAX_JOBS_PER_CS);
1202 *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1204 if (!*cs_chunk_array)
1205 *cs_chunk_array = kmalloc_array(num_chunks,
1206 sizeof(**cs_chunk_array), GFP_KERNEL);
1207 if (!*cs_chunk_array) {
1208 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1209 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1213 size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1214 if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1215 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1216 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1217 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1218 kfree(*cs_chunk_array);
1225 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1226 u64 sequence, u32 flags,
1227 u32 encaps_signal_handle)
1229 if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1232 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1233 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1235 if (cs->staged_first) {
1236 /* Staged CS sequence is the first CS sequence */
1237 INIT_LIST_HEAD(&cs->staged_cs_node);
1238 cs->staged_sequence = cs->sequence;
1240 if (cs->encaps_signals)
1241 cs->encaps_sig_hdl_id = encaps_signal_handle;
1243 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1244 * under the cs_mirror_lock
1246 cs->staged_sequence = sequence;
1249 /* Increment CS reference if needed */
1250 staged_cs_get(hdev, cs);
1252 cs->staged_cs = true;
1257 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1261 for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1262 if (qid == hdev->stream_master_qid_arr[i])
1268 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1269 u32 num_chunks, u64 *cs_seq, u32 flags,
1270 u32 encaps_signals_handle, u32 timeout,
1271 u16 *signal_initial_sob_count)
1273 bool staged_mid, int_queues_only = true;
1274 struct hl_device *hdev = hpriv->hdev;
1275 struct hl_cs_chunk *cs_chunk_array;
1276 struct hl_cs_counters_atomic *cntr;
1277 struct hl_ctx *ctx = hpriv->ctx;
1278 struct hl_cs_job *job;
1282 u8 stream_master_qid_map = 0;
1285 cntr = &hdev->aggregated_cs_counters;
1286 user_sequence = *cs_seq;
1287 *cs_seq = ULLONG_MAX;
1289 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1294 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1295 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1300 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1301 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1304 goto free_cs_chunk_array;
1306 *cs_seq = cs->sequence;
1308 hl_debugfs_add_cs(cs);
1310 rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1311 encaps_signals_handle);
1313 goto free_cs_object;
1315 /* If this is a staged submission we must return the staged sequence
1316 * rather than the internal CS sequence
1319 *cs_seq = cs->staged_sequence;
1321 /* Validate ALL the CS chunks before submitting the CS */
1322 for (i = 0 ; i < num_chunks ; i++) {
1323 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1324 enum hl_queue_type queue_type;
1325 bool is_kernel_allocated_cb;
1327 rc = validate_queue_index(hdev, chunk, &queue_type,
1328 &is_kernel_allocated_cb);
1330 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1331 atomic64_inc(&cntr->validation_drop_cnt);
1332 goto free_cs_object;
1335 if (is_kernel_allocated_cb) {
1336 cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1339 &ctx->cs_counters.validation_drop_cnt);
1340 atomic64_inc(&cntr->validation_drop_cnt);
1342 goto free_cs_object;
1345 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1348 if (queue_type == QUEUE_TYPE_EXT ||
1349 queue_type == QUEUE_TYPE_HW) {
1350 int_queues_only = false;
1353 * store which stream are being used for external/HW
1356 if (hdev->supports_wait_for_multi_cs)
1357 stream_master_qid_map |=
1358 get_stream_master_qid_mask(hdev,
1359 chunk->queue_index);
1362 job = hl_cs_allocate_job(hdev, queue_type,
1363 is_kernel_allocated_cb);
1365 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1366 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1367 dev_err(hdev->dev, "Failed to allocate a new job\n");
1369 if (is_kernel_allocated_cb)
1372 goto free_cs_object;
1378 job->user_cb_size = chunk->cb_size;
1379 job->hw_queue_id = chunk->queue_index;
1381 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1383 list_add_tail(&job->cs_node, &cs->job_list);
1386 * Increment CS reference. When CS reference is 0, CS is
1387 * done and can be signaled to user and free all its resources
1388 * Only increment for JOB on external or H/W queues, because
1389 * only for those JOBs we get completion
1391 if (cs_needs_completion(cs) &&
1392 (job->queue_type == QUEUE_TYPE_EXT ||
1393 job->queue_type == QUEUE_TYPE_HW))
1396 hl_debugfs_add_job(hdev, job);
1398 rc = cs_parser(hpriv, job);
1400 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1401 atomic64_inc(&cntr->parsing_drop_cnt);
1403 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1404 cs->ctx->asid, cs->sequence, job->id, rc);
1405 goto free_cs_object;
1409 /* We allow a CS with any queue type combination as long as it does
1410 * not get a completion
1412 if (int_queues_only && cs_needs_completion(cs)) {
1413 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1414 atomic64_inc(&cntr->validation_drop_cnt);
1416 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1417 cs->ctx->asid, cs->sequence);
1419 goto free_cs_object;
1423 * store the (external/HW queues) streams used by the CS in the
1424 * fence object for multi-CS completion
1426 if (hdev->supports_wait_for_multi_cs)
1427 cs->fence->stream_master_qid_map = stream_master_qid_map;
1429 rc = hl_hw_queue_schedule_cs(cs);
1433 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1434 cs->ctx->asid, cs->sequence, rc);
1435 goto free_cs_object;
1438 *signal_initial_sob_count = cs->initial_sob_count;
1440 rc = HL_CS_STATUS_SUCCESS;
1444 atomic_dec(&cb->cs_cnt);
1447 cs_rollback(hdev, cs);
1448 *cs_seq = ULLONG_MAX;
1449 /* The path below is both for good and erroneous exits */
1451 /* We finished with the CS in this function, so put the ref */
1453 free_cs_chunk_array:
1454 kfree(cs_chunk_array);
1459 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1462 struct hl_device *hdev = hpriv->hdev;
1463 struct hl_ctx *ctx = hpriv->ctx;
1464 bool need_soft_reset = false;
1465 int rc = 0, do_ctx_switch;
1466 void __user *chunks;
1467 u32 num_chunks, tmp;
1471 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1473 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1474 mutex_lock(&hpriv->restore_phase_mutex);
1476 if (do_ctx_switch) {
1477 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1479 dev_err_ratelimited(hdev->dev,
1480 "Failed to switch to context %d, rejecting CS! %d\n",
1483 * If we timedout, or if the device is not IDLE
1484 * while we want to do context-switch (-EBUSY),
1485 * we need to soft-reset because QMAN is
1486 * probably stuck. However, we can't call to
1487 * reset here directly because of deadlock, so
1488 * need to do it at the very end of this
1491 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1492 need_soft_reset = true;
1493 mutex_unlock(&hpriv->restore_phase_mutex);
1498 hdev->asic_funcs->restore_phase_topology(hdev);
1500 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1501 num_chunks = args->in.num_chunks_restore;
1505 "Need to run restore phase but restore CS is empty\n");
1508 rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1509 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1512 mutex_unlock(&hpriv->restore_phase_mutex);
1516 "Failed to submit restore CS for context %d (%d)\n",
1521 /* Need to wait for restore completion before execution phase */
1523 enum hl_cs_wait_status status;
1525 ret = _hl_cs_wait_ioctl(hdev, ctx,
1526 jiffies_to_usecs(hdev->timeout_jiffies),
1527 *cs_seq, &status, NULL);
1529 if (ret == -ERESTARTSYS) {
1530 usleep_range(100, 200);
1535 "Restore CS for context %d failed to complete %d\n",
1542 ctx->thread_ctx_switch_wait_token = 1;
1544 } else if (!ctx->thread_ctx_switch_wait_token) {
1545 rc = hl_poll_timeout_memory(hdev,
1546 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1547 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1549 if (rc == -ETIMEDOUT) {
1551 "context switch phase timeout (%d)\n", tmp);
1557 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1558 hl_device_reset(hdev, 0);
1564 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1565 * if the SOB value reaches the max value move to the other SOB reserved
1567 * @hdev: pointer to device structure
1568 * @q_idx: stream queue index
1569 * @hw_sob: the H/W SOB used in this signal CS.
1570 * @count: signals count
1571 * @encaps_sig: tells whether it's reservation for encaps signals or not.
1573 * Note that this function must be called while hw_queues_lock is taken.
1575 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1576 struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1579 struct hl_sync_stream_properties *prop;
1580 struct hl_hw_sob *sob = *hw_sob, *other_sob;
1581 u8 other_sob_offset;
1583 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1587 /* check for wraparound */
1588 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1590 * Decrement as we reached the max value.
1591 * The release function won't be called here as we've
1592 * just incremented the refcount right before calling this
1595 hw_sob_put_err(sob);
1598 * check the other sob value, if it still in use then fail
1599 * otherwise make the switch
1601 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1602 other_sob = &prop->hw_sob[other_sob_offset];
1604 if (kref_read(&other_sob->kref) != 1) {
1605 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1611 * next_sob_val always points to the next available signal
1612 * in the sob, so in encaps signals it will be the next one
1613 * after reserving the required amount.
1616 prop->next_sob_val = count + 1;
1618 prop->next_sob_val = count;
1620 /* only two SOBs are currently in use */
1621 prop->curr_sob_offset = other_sob_offset;
1622 *hw_sob = other_sob;
1625 * check if other_sob needs reset, then do it before using it
1626 * for the reservation or the next signal cs.
1627 * we do it here, and for both encaps and regular signal cs
1628 * cases in order to avoid possible races of two kref_put
1629 * of the sob which can occur at the same time if we move the
1630 * sob reset(kref_put) to cs_do_release function.
1631 * in addition, if we have combination of cs signal and
1632 * encaps, and at the point we need to reset the sob there was
1633 * no more reservations and only signal cs keep coming,
1634 * in such case we need signal_cs to put the refcount and
1637 if (other_sob->need_reset)
1638 hw_sob_put(other_sob);
1641 /* set reset indication for the sob */
1642 sob->need_reset = true;
1643 hw_sob_get(other_sob);
1646 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1647 prop->curr_sob_offset, q_idx);
1649 prop->next_sob_val += count;
1655 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1656 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1657 bool encaps_signals)
1659 u64 *signal_seq_arr = NULL;
1660 u32 size_to_copy, signal_seq_arr_len;
1663 if (encaps_signals) {
1664 *signal_seq = chunk->encaps_signal_seq;
1668 signal_seq_arr_len = chunk->num_signal_seq_arr;
1670 /* currently only one signal seq is supported */
1671 if (signal_seq_arr_len != 1) {
1672 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1673 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1675 "Wait for signal CS supports only one signal CS seq\n");
1679 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1680 sizeof(*signal_seq_arr),
1682 if (!signal_seq_arr)
1683 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1684 sizeof(*signal_seq_arr),
1686 if (!signal_seq_arr) {
1687 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1688 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1692 size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1693 if (copy_from_user(signal_seq_arr,
1694 u64_to_user_ptr(chunk->signal_seq_arr),
1696 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1697 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1699 "Failed to copy signal seq array from user\n");
1704 /* currently it is guaranteed to have only one signal seq */
1705 *signal_seq = signal_seq_arr[0];
1708 kfree(signal_seq_arr);
1713 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1714 struct hl_ctx *ctx, struct hl_cs *cs,
1715 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1717 struct hl_cs_counters_atomic *cntr;
1718 struct hl_cs_job *job;
1722 cntr = &hdev->aggregated_cs_counters;
1724 job = hl_cs_allocate_job(hdev, q_type, true);
1726 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1727 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1728 dev_err(hdev->dev, "Failed to allocate a new job\n");
1732 if (cs->type == CS_TYPE_WAIT)
1733 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1735 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1737 cb = hl_cb_kernel_create(hdev, cb_size,
1738 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1740 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1741 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1749 atomic_inc(&job->user_cb->cs_cnt);
1750 job->user_cb_size = cb_size;
1751 job->hw_queue_id = q_idx;
1753 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1754 && cs->encaps_signals)
1755 job->encaps_sig_wait_offset = encaps_signal_offset;
1757 * No need in parsing, user CB is the patched CB.
1758 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1759 * the CB idr anymore and to decrement its refcount as it was
1760 * incremented inside hl_cb_kernel_create().
1762 job->patched_cb = job->user_cb;
1763 job->job_cb_size = job->user_cb_size;
1764 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1766 /* increment refcount as for external queues we get completion */
1769 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1771 list_add_tail(&job->cs_node, &cs->job_list);
1773 hl_debugfs_add_job(hdev, job);
1778 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
1779 u32 q_idx, u32 count,
1780 u32 *handle_id, u32 *sob_addr,
1783 struct hw_queue_properties *hw_queue_prop;
1784 struct hl_sync_stream_properties *prop;
1785 struct hl_device *hdev = hpriv->hdev;
1786 struct hl_cs_encaps_sig_handle *handle;
1787 struct hl_encaps_signals_mgr *mgr;
1788 struct hl_hw_sob *hw_sob;
1792 if (count >= HL_MAX_SOB_VAL) {
1793 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
1799 if (q_idx >= hdev->asic_prop.max_queues) {
1800 dev_err(hdev->dev, "Queue index %d is invalid\n",
1806 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1808 if (!hw_queue_prop->supports_sync_stream) {
1810 "Queue index %d does not support sync stream operations\n",
1816 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1818 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
1824 handle->count = count;
1826 hl_ctx_get(hdev, hpriv->ctx);
1827 handle->ctx = hpriv->ctx;
1828 mgr = &hpriv->ctx->sig_mgr;
1830 spin_lock(&mgr->lock);
1831 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
1832 spin_unlock(&mgr->lock);
1835 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
1840 handle->id = hdl_id;
1841 handle->q_idx = q_idx;
1842 handle->hdev = hdev;
1843 kref_init(&handle->refcount);
1845 hdev->asic_funcs->hw_queues_lock(hdev);
1847 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1850 * Increment the SOB value by count by user request
1851 * to reserve those signals
1852 * check if the signals amount to reserve is not exceeding the max sob
1853 * value, if yes then switch sob.
1855 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
1858 dev_err(hdev->dev, "Failed to switch SOB\n");
1859 hdev->asic_funcs->hw_queues_unlock(hdev);
1863 /* set the hw_sob to the handle after calling the sob wraparound handler
1864 * since sob could have changed.
1866 handle->hw_sob = hw_sob;
1868 /* store the current sob value for unreserve validity check, and
1869 * signal offset support
1871 handle->pre_sob_val = prop->next_sob_val - handle->count;
1873 *signals_count = prop->next_sob_val;
1874 hdev->asic_funcs->hw_queues_unlock(hdev);
1876 *sob_addr = handle->hw_sob->sob_addr;
1877 *handle_id = hdl_id;
1880 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
1881 hw_sob->sob_id, handle->hw_sob->sob_addr,
1882 prop->next_sob_val - 1, q_idx, hdl_id);
1886 spin_lock(&mgr->lock);
1887 idr_remove(&mgr->handles, hdl_id);
1888 spin_unlock(&mgr->lock);
1891 hl_ctx_put(handle->ctx);
1898 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
1900 struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
1901 struct hl_sync_stream_properties *prop;
1902 struct hl_device *hdev = hpriv->hdev;
1903 struct hl_encaps_signals_mgr *mgr;
1904 struct hl_hw_sob *hw_sob;
1905 u32 q_idx, sob_addr;
1908 mgr = &hpriv->ctx->sig_mgr;
1910 spin_lock(&mgr->lock);
1911 encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
1912 if (encaps_sig_hdl) {
1913 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
1914 handle_id, encaps_sig_hdl->hw_sob->sob_addr,
1915 encaps_sig_hdl->count);
1917 hdev->asic_funcs->hw_queues_lock(hdev);
1919 q_idx = encaps_sig_hdl->q_idx;
1920 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1921 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1922 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
1924 /* Check if sob_val got out of sync due to other
1925 * signal submission requests which were handled
1926 * between the reserve-unreserve calls or SOB switch
1927 * upon reaching SOB max value.
1929 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
1930 != prop->next_sob_val ||
1931 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
1932 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
1933 encaps_sig_hdl->pre_sob_val,
1934 (prop->next_sob_val - encaps_sig_hdl->count));
1936 hdev->asic_funcs->hw_queues_unlock(hdev);
1942 * Decrement the SOB value by count by user request
1943 * to unreserve those signals
1945 prop->next_sob_val -= encaps_sig_hdl->count;
1947 hdev->asic_funcs->hw_queues_unlock(hdev);
1951 /* Release the id and free allocated memory of the handle */
1952 idr_remove(&mgr->handles, handle_id);
1953 hl_ctx_put(encaps_sig_hdl->ctx);
1954 kfree(encaps_sig_hdl);
1957 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
1960 spin_unlock(&mgr->lock);
1965 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1966 void __user *chunks, u32 num_chunks,
1967 u64 *cs_seq, u32 flags, u32 timeout,
1968 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
1970 struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
1971 bool handle_found = false, is_wait_cs = false,
1972 wait_cs_submitted = false,
1973 cs_encaps_signals = false;
1974 struct hl_cs_chunk *cs_chunk_array, *chunk;
1975 bool staged_cs_with_encaps_signals = false;
1976 struct hw_queue_properties *hw_queue_prop;
1977 struct hl_device *hdev = hpriv->hdev;
1978 struct hl_cs_compl *sig_waitcs_cmpl;
1979 u32 q_idx, collective_engine_id = 0;
1980 struct hl_cs_counters_atomic *cntr;
1981 struct hl_fence *sig_fence = NULL;
1982 struct hl_ctx *ctx = hpriv->ctx;
1983 enum hl_queue_type q_type;
1988 cntr = &hdev->aggregated_cs_counters;
1989 *cs_seq = ULLONG_MAX;
1991 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1996 /* currently it is guaranteed to have only one chunk */
1997 chunk = &cs_chunk_array[0];
1999 if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2000 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2001 atomic64_inc(&cntr->validation_drop_cnt);
2002 dev_err(hdev->dev, "Queue index %d is invalid\n",
2003 chunk->queue_index);
2005 goto free_cs_chunk_array;
2008 q_idx = chunk->queue_index;
2009 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2010 q_type = hw_queue_prop->type;
2012 if (!hw_queue_prop->supports_sync_stream) {
2013 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2014 atomic64_inc(&cntr->validation_drop_cnt);
2016 "Queue index %d does not support sync stream operations\n",
2019 goto free_cs_chunk_array;
2022 if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2023 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2024 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2025 atomic64_inc(&cntr->validation_drop_cnt);
2027 "Queue index %d is invalid\n", q_idx);
2029 goto free_cs_chunk_array;
2032 if (!hdev->nic_ports_mask) {
2033 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2034 atomic64_inc(&cntr->validation_drop_cnt);
2036 "Collective operations not supported when NIC ports are disabled");
2038 goto free_cs_chunk_array;
2041 collective_engine_id = chunk->collective_engine_id;
2044 is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2045 cs_type == CS_TYPE_COLLECTIVE_WAIT);
2047 cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2050 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2051 ctx, cs_encaps_signals);
2053 goto free_cs_chunk_array;
2055 if (cs_encaps_signals) {
2056 /* check if cs sequence has encapsulated
2062 spin_lock(&ctx->sig_mgr.lock);
2063 idp = &ctx->sig_mgr.handles;
2064 idr_for_each_entry(idp, encaps_sig_hdl, id) {
2065 if (encaps_sig_hdl->cs_seq == signal_seq) {
2066 handle_found = true;
2067 /* get refcount to protect removing
2068 * this handle from idr, needed when
2069 * multiple wait cs are used with offset
2070 * to wait on reserved encaps signals.
2072 kref_get(&encaps_sig_hdl->refcount);
2076 spin_unlock(&ctx->sig_mgr.lock);
2078 if (!handle_found) {
2079 /* treat as signal CS already finished */
2080 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2083 goto free_cs_chunk_array;
2086 /* validate also the signal offset value */
2087 if (chunk->encaps_signal_offset >
2088 encaps_sig_hdl->count) {
2089 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2090 chunk->encaps_signal_offset,
2091 encaps_sig_hdl->count);
2093 goto free_cs_chunk_array;
2097 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2098 if (IS_ERR(sig_fence)) {
2099 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2100 atomic64_inc(&cntr->validation_drop_cnt);
2102 "Failed to get signal CS with seq 0x%llx\n",
2104 rc = PTR_ERR(sig_fence);
2105 goto free_cs_chunk_array;
2109 /* signal CS already finished */
2111 goto free_cs_chunk_array;
2115 container_of(sig_fence, struct hl_cs_compl, base_fence);
2117 staged_cs_with_encaps_signals = !!
2118 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2119 (flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2121 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2122 !staged_cs_with_encaps_signals) {
2123 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2124 atomic64_inc(&cntr->validation_drop_cnt);
2126 "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2128 hl_fence_put(sig_fence);
2130 goto free_cs_chunk_array;
2133 if (completion_done(&sig_fence->completion)) {
2134 /* signal CS already finished */
2135 hl_fence_put(sig_fence);
2137 goto free_cs_chunk_array;
2141 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2144 hl_fence_put(sig_fence);
2146 goto free_cs_chunk_array;
2150 * Save the signal CS fence for later initialization right before
2151 * hanging the wait CS on the queue.
2152 * for encaps signals case, we save the cs sequence and handle pointer
2153 * for later initialization.
2156 cs->signal_fence = sig_fence;
2157 /* store the handle pointer, so we don't have to
2158 * look for it again, later on the flow
2159 * when we need to set SOB info in hw_queue.
2161 if (cs->encaps_signals)
2162 cs->encaps_sig_hdl = encaps_sig_hdl;
2165 hl_debugfs_add_cs(cs);
2167 *cs_seq = cs->sequence;
2169 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2170 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2171 q_idx, chunk->encaps_signal_offset);
2172 else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2173 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2174 cs, q_idx, collective_engine_id,
2175 chunk->encaps_signal_offset);
2177 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2178 atomic64_inc(&cntr->validation_drop_cnt);
2183 goto free_cs_object;
2185 rc = hl_hw_queue_schedule_cs(cs);
2187 /* In case wait cs failed here, it means the signal cs
2188 * already completed. we want to free all it's related objects
2189 * but we don't want to fail the ioctl.
2193 else if (rc != -EAGAIN)
2195 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
2196 ctx->asid, cs->sequence, rc);
2197 goto free_cs_object;
2200 *signal_sob_addr_offset = cs->sob_addr_offset;
2201 *signal_initial_sob_count = cs->initial_sob_count;
2203 rc = HL_CS_STATUS_SUCCESS;
2205 wait_cs_submitted = true;
2209 cs_rollback(hdev, cs);
2210 *cs_seq = ULLONG_MAX;
2211 /* The path below is both for good and erroneous exits */
2213 /* We finished with the CS in this function, so put the ref */
2215 free_cs_chunk_array:
2216 if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
2218 kref_put(&encaps_sig_hdl->refcount,
2219 hl_encaps_handle_do_release);
2220 kfree(cs_chunk_array);
2225 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
2227 union hl_cs_args *args = data;
2228 enum hl_cs_type cs_type = 0;
2229 u64 cs_seq = ULONG_MAX;
2230 void __user *chunks;
2231 u32 num_chunks, flags, timeout,
2232 signals_count = 0, sob_addr = 0, handle_id = 0;
2233 u16 sob_initial_count = 0;
2236 rc = hl_cs_sanity_checks(hpriv, args);
2240 rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2244 cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2245 ~HL_CS_FLAGS_FORCE_RESTORE);
2246 chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2247 num_chunks = args->in.num_chunks_execute;
2248 flags = args->in.cs_flags;
2250 /* In case this is a staged CS, user should supply the CS sequence */
2251 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2252 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2253 cs_seq = args->in.seq;
2255 timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2256 ? msecs_to_jiffies(args->in.timeout * 1000)
2257 : hpriv->hdev->timeout_jiffies;
2260 case CS_TYPE_SIGNAL:
2262 case CS_TYPE_COLLECTIVE_WAIT:
2263 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2264 &cs_seq, args->in.cs_flags, timeout,
2265 &sob_addr, &sob_initial_count);
2267 case CS_RESERVE_SIGNALS:
2268 rc = cs_ioctl_reserve_signals(hpriv,
2269 args->in.encaps_signals_q_idx,
2270 args->in.encaps_signals_count,
2271 &handle_id, &sob_addr, &signals_count);
2273 case CS_UNRESERVE_SIGNALS:
2274 rc = cs_ioctl_unreserve_signals(hpriv,
2275 args->in.encaps_sig_handle_id);
2278 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2280 args->in.encaps_sig_handle_id,
2281 timeout, &sob_initial_count);
2285 if (rc != -EAGAIN) {
2286 memset(args, 0, sizeof(*args));
2289 case CS_RESERVE_SIGNALS:
2290 args->out.handle_id = handle_id;
2291 args->out.sob_base_addr_offset = sob_addr;
2292 args->out.count = signals_count;
2294 case CS_TYPE_SIGNAL:
2295 args->out.sob_base_addr_offset = sob_addr;
2296 args->out.sob_count_before_submission = sob_initial_count;
2297 args->out.seq = cs_seq;
2299 case CS_TYPE_DEFAULT:
2300 args->out.sob_count_before_submission = sob_initial_count;
2301 args->out.seq = cs_seq;
2304 args->out.seq = cs_seq;
2308 args->out.status = rc;
2314 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2315 enum hl_cs_wait_status *status, u64 timeout_us,
2318 struct hl_device *hdev = ctx->hdev;
2322 if (IS_ERR(fence)) {
2323 rc = PTR_ERR(fence);
2325 dev_notice_ratelimited(hdev->dev,
2326 "Can't wait on CS %llu because current CS is at seq %llu\n",
2327 seq, ctx->cs_sequence);
2333 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2334 seq, ctx->cs_sequence);
2336 *status = CS_WAIT_STATUS_GONE;
2341 completion_rc = completion_done(&fence->completion);
2343 unsigned long timeout;
2345 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2346 timeout_us : usecs_to_jiffies(timeout_us);
2348 wait_for_completion_interruptible_timeout(
2349 &fence->completion, timeout);
2352 if (completion_rc > 0) {
2353 *status = CS_WAIT_STATUS_COMPLETED;
2355 *timestamp = ktime_to_ns(fence->timestamp);
2357 *status = CS_WAIT_STATUS_BUSY;
2360 if (fence->error == -ETIMEDOUT)
2362 else if (fence->error == -EIO)
2369 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2371 * @mcs_data: multi-CS internal data
2372 * @mcs_compl: multi-CS completion structure
2374 * @return 0 on success, otherwise non 0 error code
2376 * The function iterates on all CS sequence in the list and set bit in
2377 * completion_bitmap for each completed CS.
2378 * While iterating, the function sets the stream map of each fence in the fence
2379 * array in the completion QID stream map to be used by CSs to perform
2380 * completion to the multi-CS context.
2381 * This function shall be called after taking context ref
2383 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2385 struct hl_fence **fence_ptr = mcs_data->fence_arr;
2386 struct hl_device *hdev = mcs_data->ctx->hdev;
2387 int i, rc, arr_len = mcs_data->arr_len;
2388 u64 *seq_arr = mcs_data->seq_arr;
2389 ktime_t max_ktime, first_cs_time;
2390 enum hl_cs_wait_status status;
2392 memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
2394 /* get all fences under the same lock */
2395 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2400 * re-initialize the completion here to handle 2 possible cases:
2401 * 1. CS will complete the multi-CS prior clearing the completion. in which
2402 * case the fence iteration is guaranteed to catch the CS completion.
2403 * 2. the completion will occur after re-init of the completion.
2404 * in which case we will wake up immediately in wait_for_completion.
2406 reinit_completion(&mcs_compl->completion);
2409 * set to maximum time to verify timestamp is valid: if at the end
2410 * this value is maintained- no timestamp was updated
2412 max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2413 first_cs_time = max_ktime;
2415 for (i = 0; i < arr_len; i++, fence_ptr++) {
2416 struct hl_fence *fence = *fence_ptr;
2419 * In order to prevent case where we wait until timeout even though a CS associated
2420 * with the multi-CS actually completed we do things in the below order:
2421 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2422 * any CS can, potentially, complete the multi CS for the specific QID (note
2423 * that once completion is initialized, calling complete* and then wait on the
2424 * completion will cause it to return at once)
2425 * 2. only after allowing multi-CS completion for the specific QID we check whether
2426 * the specific CS already completed (and thus the wait for completion part will
2427 * be skipped). if the CS not completed it is guaranteed that completing CS will
2428 * wake up the completion.
2431 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2434 * function won't sleep as it is called with timeout 0 (i.e.
2437 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
2441 "wait_for_fence error :%d for CS seq %llu\n",
2447 case CS_WAIT_STATUS_BUSY:
2448 /* CS did not finished, QID to wait on already stored */
2450 case CS_WAIT_STATUS_COMPLETED:
2452 * Using mcs_handling_done to avoid possibility of mcs_data
2453 * returns to user indicating CS completed before it finished
2454 * all of its mcs handling, to avoid race the next time the
2455 * user waits for mcs.
2456 * note: when reaching this case fence is definitely not NULL
2457 * but NULL check was added to overcome static analysis
2459 if (fence && !fence->mcs_handling_done) {
2461 * in case multi CS is completed but MCS handling not done
2462 * we "complete" the multi CS to prevent it from waiting
2463 * until time-out and the "multi-CS handling done" will have
2464 * another chance at the next iteration
2466 complete_all(&mcs_compl->completion);
2470 mcs_data->completion_bitmap |= BIT(i);
2472 * For all completed CSs we take the earliest timestamp.
2473 * For this we have to validate that the timestamp is
2474 * earliest of all timestamps so far.
2476 if (mcs_data->update_ts &&
2477 (ktime_compare(fence->timestamp, first_cs_time) < 0))
2478 first_cs_time = fence->timestamp;
2480 case CS_WAIT_STATUS_GONE:
2481 mcs_data->update_ts = false;
2482 mcs_data->gone_cs = true;
2484 * It is possible to get an old sequence numbers from user
2485 * which related to already completed CSs and their fences
2486 * already gone. In this case, CS set as completed but
2487 * no need to consider its QID for mcs completion.
2489 mcs_data->completion_bitmap |= BIT(i);
2492 dev_err(hdev->dev, "Invalid fence status\n");
2498 hl_fences_put(mcs_data->fence_arr, arr_len);
2500 if (mcs_data->update_ts &&
2501 (ktime_compare(first_cs_time, max_ktime) != 0))
2502 mcs_data->timestamp = ktime_to_ns(first_cs_time);
2507 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2508 u64 timeout_us, u64 seq,
2509 enum hl_cs_wait_status *status, s64 *timestamp)
2511 struct hl_fence *fence;
2517 hl_ctx_get(hdev, ctx);
2519 fence = hl_ctx_get_fence(ctx, seq);
2521 rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2522 hl_fence_put(fence);
2528 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2530 if (usecs <= U32_MAX)
2531 return usecs_to_jiffies(usecs);
2534 * If the value in nanoseconds is larger than 64 bit, use the largest
2537 if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2538 return nsecs_to_jiffies(U64_MAX);
2540 return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2544 * hl_wait_multi_cs_completion_init - init completion structure
2546 * @hdev: pointer to habanalabs device structure
2547 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2548 * master QID to wait on
2550 * @return valid completion struct pointer on success, otherwise error pointer
2552 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2553 * the function gets the first available completion (by marking it "used")
2554 * and initialize its values.
2556 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2558 struct multi_cs_completion *mcs_compl;
2561 /* find free multi_cs completion structure */
2562 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2563 mcs_compl = &hdev->multi_cs_completion[i];
2564 spin_lock(&mcs_compl->lock);
2565 if (!mcs_compl->used) {
2566 mcs_compl->used = 1;
2567 mcs_compl->timestamp = 0;
2569 * init QID map to 0 to avoid completion by CSs. the actual QID map
2570 * to multi-CS CSs will be set incrementally at a later stage
2572 mcs_compl->stream_master_qid_map = 0;
2573 spin_unlock(&mcs_compl->lock);
2576 spin_unlock(&mcs_compl->lock);
2579 if (i == MULTI_CS_MAX_USER_CTX) {
2580 dev_err(hdev->dev, "no available multi-CS completion structure\n");
2581 return ERR_PTR(-ENOMEM);
2587 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2590 * @mcs_compl: pointer to the completion structure
2592 static void hl_wait_multi_cs_completion_fini(
2593 struct multi_cs_completion *mcs_compl)
2596 * free completion structure, do it under lock to be in-sync with the
2597 * thread that signals completion
2599 spin_lock(&mcs_compl->lock);
2600 mcs_compl->used = 0;
2601 spin_unlock(&mcs_compl->lock);
2605 * hl_wait_multi_cs_completion - wait for first CS to complete
2607 * @mcs_data: multi-CS internal data
2609 * @return 0 on success, otherwise non 0 error code
2611 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2612 struct multi_cs_completion *mcs_compl)
2616 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2617 mcs_data->timeout_jiffies);
2619 /* update timestamp */
2620 if (completion_rc > 0)
2621 mcs_data->timestamp = mcs_compl->timestamp;
2623 mcs_data->wait_status = completion_rc;
2629 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2631 * @hdev: pointer to habanalabs device structure
2633 void hl_multi_cs_completion_init(struct hl_device *hdev)
2635 struct multi_cs_completion *mcs_cmpl;
2638 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2639 mcs_cmpl = &hdev->multi_cs_completion[i];
2641 spin_lock_init(&mcs_cmpl->lock);
2642 init_completion(&mcs_cmpl->completion);
2647 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2649 * @hpriv: pointer to the private data of the fd
2650 * @data: pointer to multi-CS wait ioctl in/out args
2653 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2655 struct multi_cs_completion *mcs_compl;
2656 struct hl_device *hdev = hpriv->hdev;
2657 struct multi_cs_data mcs_data = {0};
2658 union hl_wait_cs_args *args = data;
2659 struct hl_ctx *ctx = hpriv->ctx;
2660 struct hl_fence **fence_arr;
2661 void __user *seq_arr;
2667 if (!hdev->supports_wait_for_multi_cs) {
2668 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
2672 seq_arr_len = args->in.seq_arr_len;
2674 if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
2675 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
2676 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
2680 /* allocate memory for sequence array */
2682 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
2686 /* copy CS sequence array from user */
2687 seq_arr = (void __user *) (uintptr_t) args->in.seq;
2688 size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
2689 if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
2690 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
2695 /* allocate array for the fences */
2696 fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
2702 /* initialize the multi-CS internal data */
2704 mcs_data.seq_arr = cs_seq_arr;
2705 mcs_data.fence_arr = fence_arr;
2706 mcs_data.arr_len = seq_arr_len;
2708 hl_ctx_get(hdev, ctx);
2710 /* wait (with timeout) for the first CS to be completed */
2711 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
2712 mcs_compl = hl_wait_multi_cs_completion_init(hdev);
2713 if (IS_ERR(mcs_compl)) {
2714 rc = PTR_ERR(mcs_compl);
2718 /* poll all CS fences, extract timestamp */
2719 mcs_data.update_ts = true;
2720 rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2722 * skip wait for CS completion when one of the below is true:
2723 * - an error on the poll function
2724 * - one or more CS in the list completed
2725 * - the user called ioctl with timeout 0
2727 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
2728 goto completion_fini;
2731 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
2732 if (rc || (mcs_data.wait_status == 0))
2736 * poll fences once again to update the CS map.
2737 * no timestamp should be updated this time.
2739 mcs_data.update_ts = false;
2740 rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2742 if (mcs_data.completion_bitmap)
2746 * if hl_wait_multi_cs_completion returned before timeout (i.e.
2747 * it got a completion) it either got completed by CS in the multi CS list
2748 * (in which case the indication will be non empty completion_bitmap) or it
2749 * got completed by CS submitted to one of the shared stream master but
2750 * not in the multi CS list (in which case we should wait again but modify
2751 * the timeout and set timestamp as zero to let a CS related to the current
2752 * multi-CS set a new, relevant, timestamp)
2754 mcs_data.timeout_jiffies = mcs_data.wait_status;
2755 mcs_compl->timestamp = 0;
2759 hl_wait_multi_cs_completion_fini(mcs_compl);
2771 if (mcs_data.wait_status == -ERESTARTSYS) {
2772 dev_err_ratelimited(hdev->dev,
2773 "user process got signal while waiting for Multi-CS\n");
2777 /* update output args */
2778 memset(args, 0, sizeof(*args));
2780 if (mcs_data.completion_bitmap) {
2781 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2782 args->out.cs_completion_map = mcs_data.completion_bitmap;
2784 /* if timestamp not 0- it's valid */
2785 if (mcs_data.timestamp) {
2786 args->out.timestamp_nsec = mcs_data.timestamp;
2787 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2790 /* update if some CS was gone */
2791 if (!mcs_data.timestamp)
2792 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2794 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2800 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2802 struct hl_device *hdev = hpriv->hdev;
2803 union hl_wait_cs_args *args = data;
2804 enum hl_cs_wait_status status;
2805 u64 seq = args->in.seq;
2809 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
2810 &status, ×tamp);
2812 if (rc == -ERESTARTSYS) {
2813 dev_err_ratelimited(hdev->dev,
2814 "user process got signal while waiting for CS handle %llu\n",
2819 memset(args, 0, sizeof(*args));
2822 if (rc == -ETIMEDOUT) {
2823 dev_err_ratelimited(hdev->dev,
2824 "CS %llu has timed-out while user process is waiting for it\n",
2826 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
2827 } else if (rc == -EIO) {
2828 dev_err_ratelimited(hdev->dev,
2829 "CS %llu has been aborted while user process is waiting for it\n",
2831 args->out.status = HL_WAIT_CS_STATUS_ABORTED;
2837 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2838 args->out.timestamp_nsec = timestamp;
2842 case CS_WAIT_STATUS_GONE:
2843 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2845 case CS_WAIT_STATUS_COMPLETED:
2846 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2848 case CS_WAIT_STATUS_BUSY:
2850 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2857 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2858 struct hl_cb_mgr *cb_mgr, u64 timeout_us,
2859 u64 cq_counters_handle, u64 cq_counters_offset,
2860 u64 target_value, struct hl_user_interrupt *interrupt,
2864 struct hl_user_pending_interrupt *pend;
2865 unsigned long timeout, flags;
2871 timeout = hl_usecs64_to_jiffies(timeout_us);
2873 hl_ctx_get(hdev, ctx);
2875 cq_counters_handle >>= PAGE_SHIFT;
2876 handle = (u32) cq_counters_handle;
2878 cb = hl_cb_get(hdev, cb_mgr, handle);
2884 pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2891 hl_fence_init(&pend->fence, ULONG_MAX);
2893 pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
2894 pend->cq_target_value = target_value;
2896 /* We check for completion value as interrupt could have been received
2897 * before we added the node to the wait list
2899 if (*pend->cq_kernel_addr >= target_value) {
2900 *status = HL_WAIT_CS_STATUS_COMPLETED;
2901 /* There was no interrupt, we assume the completion is now. */
2902 pend->fence.timestamp = ktime_get();
2905 if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
2908 /* Add pending user interrupt to relevant list for the interrupt
2909 * handler to monitor
2911 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2912 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2913 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2915 /* Wait for interrupt handler to signal completion */
2916 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
2918 if (completion_rc > 0) {
2919 *status = HL_WAIT_CS_STATUS_COMPLETED;
2921 if (completion_rc == -ERESTARTSYS) {
2922 dev_err_ratelimited(hdev->dev,
2923 "user process got signal while waiting for interrupt ID %d\n",
2924 interrupt->interrupt_id);
2926 *status = HL_WAIT_CS_STATUS_ABORTED;
2928 if (pend->fence.error == -EIO) {
2929 dev_err_ratelimited(hdev->dev,
2930 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
2933 *status = HL_WAIT_CS_STATUS_ABORTED;
2935 dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
2936 interrupt->interrupt_id);
2939 *status = HL_WAIT_CS_STATUS_BUSY;
2943 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2944 list_del(&pend->wait_list_node);
2945 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2948 *timestamp = ktime_to_ns(pend->fence.timestamp);
2957 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
2958 u64 timeout_us, u64 user_address,
2959 u64 target_value, struct hl_user_interrupt *interrupt,
2964 struct hl_user_pending_interrupt *pend;
2965 unsigned long timeout, flags;
2966 u64 completion_value;
2970 timeout = hl_usecs64_to_jiffies(timeout_us);
2972 hl_ctx_get(hdev, ctx);
2974 pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2980 hl_fence_init(&pend->fence, ULONG_MAX);
2982 /* Add pending user interrupt to relevant list for the interrupt
2983 * handler to monitor
2985 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2986 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2987 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2989 /* We check for completion value as interrupt could have been received
2990 * before we added the node to the wait list
2992 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
2993 dev_err(hdev->dev, "Failed to copy completion value from user\n");
2995 goto remove_pending_user_interrupt;
2998 if (completion_value >= target_value) {
2999 *status = HL_WAIT_CS_STATUS_COMPLETED;
3000 /* There was no interrupt, we assume the completion is now. */
3001 pend->fence.timestamp = ktime_get();
3003 *status = HL_WAIT_CS_STATUS_BUSY;
3006 if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3007 goto remove_pending_user_interrupt;
3010 /* Wait for interrupt handler to signal completion */
3011 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3014 /* If timeout did not expire we need to perform the comparison.
3015 * If comparison fails, keep waiting until timeout expires
3017 if (completion_rc > 0) {
3018 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3019 /* reinit_completion must be called before we check for user
3020 * completion value, otherwise, if interrupt is received after
3021 * the comparison and before the next wait_for_completion,
3022 * we will reach timeout and fail
3024 reinit_completion(&pend->fence.completion);
3025 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3027 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3028 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3031 goto remove_pending_user_interrupt;
3034 if (completion_value >= target_value) {
3035 *status = HL_WAIT_CS_STATUS_COMPLETED;
3036 } else if (pend->fence.error) {
3037 dev_err_ratelimited(hdev->dev,
3038 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3040 /* set the command completion status as ABORTED */
3041 *status = HL_WAIT_CS_STATUS_ABORTED;
3043 timeout = completion_rc;
3046 } else if (completion_rc == -ERESTARTSYS) {
3047 dev_err_ratelimited(hdev->dev,
3048 "user process got signal while waiting for interrupt ID %d\n",
3049 interrupt->interrupt_id);
3052 *status = HL_WAIT_CS_STATUS_BUSY;
3055 remove_pending_user_interrupt:
3056 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3057 list_del(&pend->wait_list_node);
3058 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3060 *timestamp = ktime_to_ns(pend->fence.timestamp);
3068 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3070 u16 interrupt_id, first_interrupt, last_interrupt;
3071 struct hl_device *hdev = hpriv->hdev;
3072 struct asic_fixed_properties *prop;
3073 struct hl_user_interrupt *interrupt;
3074 union hl_wait_cs_args *args = data;
3075 u32 status = HL_WAIT_CS_STATUS_BUSY;
3079 prop = &hdev->asic_prop;
3081 if (!prop->user_interrupt_count) {
3082 dev_err(hdev->dev, "no user interrupts allowed");
3086 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3088 first_interrupt = prop->first_available_user_msix_interrupt;
3089 last_interrupt = prop->first_available_user_msix_interrupt +
3090 prop->user_interrupt_count - 1;
3092 if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
3093 interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
3094 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3098 if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
3099 interrupt = &hdev->common_user_interrupt;
3101 interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
3103 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
3104 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
3105 args->in.interrupt_timeout_us, args->in.cq_counters_handle,
3106 args->in.cq_counters_offset,
3107 args->in.target, interrupt, &status,
3110 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3111 args->in.interrupt_timeout_us, args->in.addr,
3112 args->in.target, interrupt, &status,
3116 dev_err_ratelimited(hdev->dev,
3117 "interrupt_wait_ioctl failed (%d)\n", rc);
3122 memset(args, 0, sizeof(*args));
3123 args->out.status = status;
3126 args->out.timestamp_nsec = timestamp;
3127 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3133 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3135 union hl_wait_cs_args *args = data;
3136 u32 flags = args->in.flags;
3139 /* If the device is not operational, no point in waiting for any command submission or
3142 if (!hl_device_operational(hpriv->hdev, NULL))
3145 if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3146 rc = hl_interrupt_wait_ioctl(hpriv, data);
3147 else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3148 rc = hl_multi_cs_wait_ioctl(hpriv, data);
3150 rc = hl_cs_wait_ioctl(hpriv, data);