drivers/misc/habanalabs/common/command_submission.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2021 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include <uapi/misc/habanalabs.h>
   9 #include "habanalabs.h"
  10
  11 #include <linux/uaccess.h>
  12 #include <linux/slab.h>
  13
  14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
  15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
  16
  17 /**
  18  * enum hl_cs_wait_status - cs wait status
  19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
  20  * @CS_WAIT_STATUS_COMPLETED: cs completed
  21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
  22  */
  23 enum hl_cs_wait_status {
  24         CS_WAIT_STATUS_BUSY,
  25         CS_WAIT_STATUS_COMPLETED,
  26         CS_WAIT_STATUS_GONE
  27 };
  28
  29 static void job_wq_completion(struct work_struct *work);
  30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
  31                                 u64 timeout_us, u64 seq,
  32                                 enum hl_cs_wait_status *status, s64 *timestamp);
  33 static void cs_do_release(struct kref *ref);
  34
  35 static void hl_sob_reset(struct kref *ref)
  36 {
  37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  38                                                         kref);
  39         struct hl_device *hdev = hw_sob->hdev;
  40
  41         dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
  42
  43         hdev->asic_funcs->reset_sob(hdev, hw_sob);
  44
  45         hw_sob->need_reset = false;
  46 }
  47
  48 void hl_sob_reset_error(struct kref *ref)
  49 {
  50         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  51                                                         kref);
  52         struct hl_device *hdev = hw_sob->hdev;
  53
  54         dev_crit(hdev->dev,
  55                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
  56                 hw_sob->q_idx, hw_sob->sob_id);
  57 }
  58
  59 void hw_sob_put(struct hl_hw_sob *hw_sob)
  60 {
  61         if (hw_sob)
  62                 kref_put(&hw_sob->kref, hl_sob_reset);
  63 }
  64
  65 static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
  66 {
  67         if (hw_sob)
  68                 kref_put(&hw_sob->kref, hl_sob_reset_error);
  69 }
  70
  71 void hw_sob_get(struct hl_hw_sob *hw_sob)
  72 {
  73         if (hw_sob)
  74                 kref_get(&hw_sob->kref);
  75 }
  76
  77 /**
  78  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
  79  * @sob_base: sob base id
  80  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
  81  * @mask: generated mask
  82  *
  83  * Return: 0 if given parameters are valid
  84  */
  85 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
  86 {
  87         int i;
  88
  89         if (sob_mask == 0)
  90                 return -EINVAL;
  91
  92         if (sob_mask == 0x1) {
  93                 *mask = ~(1 << (sob_base & 0x7));
  94         } else {
  95                 /* find msb in order to verify sob range is valid */
  96                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
  97                         if (BIT(i) & sob_mask)
  98                                 break;
  99
 100                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
 101                         return -EINVAL;
 102
 103                 *mask = ~sob_mask;
 104         }
 105
 106         return 0;
 107 }
 108
 109 static void hl_fence_release(struct kref *kref)
 110 {
 111         struct hl_fence *fence =
 112                 container_of(kref, struct hl_fence, refcount);
 113         struct hl_cs_compl *hl_cs_cmpl =
 114                 container_of(fence, struct hl_cs_compl, base_fence);
 115
 116         kfree(hl_cs_cmpl);
 117 }
 118
 119 void hl_fence_put(struct hl_fence *fence)
 120 {
 121         if (IS_ERR_OR_NULL(fence))
 122                 return;
 123         kref_put(&fence->refcount, hl_fence_release);
 124 }
 125
 126 void hl_fences_put(struct hl_fence **fence, int len)
 127 {
 128         int i;
 129
 130         for (i = 0; i < len; i++, fence++)
 131                 hl_fence_put(*fence);
 132 }
 133
 134 void hl_fence_get(struct hl_fence *fence)
 135 {
 136         if (fence)
 137                 kref_get(&fence->refcount);
 138 }
 139
 140 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 141 {
 142         kref_init(&fence->refcount);
 143         fence->cs_sequence = sequence;
 144         fence->error = 0;
 145         fence->timestamp = ktime_set(0, 0);
 146         fence->mcs_handling_done = false;
 147         init_completion(&fence->completion);
 148 }
 149
 150 void cs_get(struct hl_cs *cs)
 151 {
 152         kref_get(&cs->refcount);
 153 }
 154
 155 static int cs_get_unless_zero(struct hl_cs *cs)
 156 {
 157         return kref_get_unless_zero(&cs->refcount);
 158 }
 159
 160 static void cs_put(struct hl_cs *cs)
 161 {
 162         kref_put(&cs->refcount, cs_do_release);
 163 }
 164
 165 static void cs_job_do_release(struct kref *ref)
 166 {
 167         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
 168
 169         kfree(job);
 170 }
 171
 172 static void cs_job_put(struct hl_cs_job *job)
 173 {
 174         kref_put(&job->refcount, cs_job_do_release);
 175 }
 176
 177 bool cs_needs_completion(struct hl_cs *cs)
 178 {
 179         /* In case this is a staged CS, only the last CS in sequence should
 180          * get a completion, any non staged CS will always get a completion
 181          */
 182         if (cs->staged_cs && !cs->staged_last)
 183                 return false;
 184
 185         return true;
 186 }
 187
 188 bool cs_needs_timeout(struct hl_cs *cs)
 189 {
 190         /* In case this is a staged CS, only the first CS in sequence should
 191          * get a timeout, any non staged CS will always get a timeout
 192          */
 193         if (cs->staged_cs && !cs->staged_first)
 194                 return false;
 195
 196         return true;
 197 }
 198
 199 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 200 {
 201         /*
 202          * Patched CB is created for external queues jobs, and for H/W queues
 203          * jobs if the user CB was allocated by driver and MMU is disabled.
 204          */
 205         return (job->queue_type == QUEUE_TYPE_EXT ||
 206                         (job->queue_type == QUEUE_TYPE_HW &&
 207                                         job->is_kernel_allocated_cb &&
 208                                         !hdev->mmu_enable));
 209 }
 210
 211 /*
 212  * cs_parser - parse the user command submission
 213  *
 214  * @hpriv       : pointer to the private data of the fd
 215  * @job        : pointer to the job that holds the command submission info
 216  *
 217  * The function parses the command submission of the user. It calls the
 218  * ASIC specific parser, which returns a list of memory blocks to send
 219  * to the device as different command buffers
 220  *
 221  */
 222 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 223 {
 224         struct hl_device *hdev = hpriv->hdev;
 225         struct hl_cs_parser parser;
 226         int rc;
 227
 228         parser.ctx_id = job->cs->ctx->asid;
 229         parser.cs_sequence = job->cs->sequence;
 230         parser.job_id = job->id;
 231
 232         parser.hw_queue_id = job->hw_queue_id;
 233         parser.job_userptr_list = &job->userptr_list;
 234         parser.patched_cb = NULL;
 235         parser.user_cb = job->user_cb;
 236         parser.user_cb_size = job->user_cb_size;
 237         parser.queue_type = job->queue_type;
 238         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 239         job->patched_cb = NULL;
 240         parser.completion = cs_needs_completion(job->cs);
 241
 242         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 243
 244         if (is_cb_patched(hdev, job)) {
 245                 if (!rc) {
 246                         job->patched_cb = parser.patched_cb;
 247                         job->job_cb_size = parser.patched_cb_size;
 248                         job->contains_dma_pkt = parser.contains_dma_pkt;
 249                         atomic_inc(&job->patched_cb->cs_cnt);
 250                 }
 251
 252                 /*
 253                  * Whether the parsing worked or not, we don't need the
 254                  * original CB anymore because it was already parsed and
 255                  * won't be accessed again for this CS
 256                  */
 257                 atomic_dec(&job->user_cb->cs_cnt);
 258                 hl_cb_put(job->user_cb);
 259                 job->user_cb = NULL;
 260         } else if (!rc) {
 261                 job->job_cb_size = job->user_cb_size;
 262         }
 263
 264         return rc;
 265 }
 266
 267 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 268 {
 269         struct hl_cs *cs = job->cs;
 270
 271         if (is_cb_patched(hdev, job)) {
 272                 hl_userptr_delete_list(hdev, &job->userptr_list);
 273
 274                 /*
 275                  * We might arrive here from rollback and patched CB wasn't
 276                  * created, so we need to check it's not NULL
 277                  */
 278                 if (job->patched_cb) {
 279                         atomic_dec(&job->patched_cb->cs_cnt);
 280                         hl_cb_put(job->patched_cb);
 281                 }
 282         }
 283
 284         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
 285          * enabled, the user CB isn't released in cs_parser() and thus should be
 286          * released here.
 287          * This is also true for INT queues jobs which were allocated by driver
 288          */
 289         if (job->is_kernel_allocated_cb &&
 290                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
 291                                 job->queue_type == QUEUE_TYPE_INT)) {
 292                 atomic_dec(&job->user_cb->cs_cnt);
 293                 hl_cb_put(job->user_cb);
 294         }
 295
 296         /*
 297          * This is the only place where there can be multiple threads
 298          * modifying the list at the same time
 299          */
 300         spin_lock(&cs->job_lock);
 301         list_del(&job->cs_node);
 302         spin_unlock(&cs->job_lock);
 303
 304         hl_debugfs_remove_job(hdev, job);
 305
 306         /* We decrement reference only for a CS that gets completion
 307          * because the reference was incremented only for this kind of CS
 308          * right before it was scheduled.
 309          *
 310          * In staged submission, only the last CS marked as 'staged_last'
 311          * gets completion, hence its release function will be called from here.
 312          * As for all the rest CS's in the staged submission which do not get
 313          * completion, their CS reference will be decremented by the
 314          * 'staged_last' CS during the CS release flow.
 315          * All relevant PQ CI counters will be incremented during the CS release
 316          * flow by calling 'hl_hw_queue_update_ci'.
 317          */
 318         if (cs_needs_completion(cs) &&
 319                 (job->queue_type == QUEUE_TYPE_EXT ||
 320                         job->queue_type == QUEUE_TYPE_HW))
 321                 cs_put(cs);
 322
 323         cs_job_put(job);
 324 }
 325
 326 /*
 327  * hl_staged_cs_find_first - locate the first CS in this staged submission
 328  *
 329  * @hdev: pointer to device structure
 330  * @cs_seq: staged submission sequence number
 331  *
 332  * @note: This function must be called under 'hdev->cs_mirror_lock'
 333  *
 334  * Find and return a CS pointer with the given sequence
 335  */
 336 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
 337 {
 338         struct hl_cs *cs;
 339
 340         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
 341                 if (cs->staged_cs && cs->staged_first &&
 342                                 cs->sequence == cs_seq)
 343                         return cs;
 344
 345         return NULL;
 346 }
 347
 348 /*
 349  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
 350  *
 351  * @hdev: pointer to device structure
 352  * @cs: staged submission member
 353  *
 354  */
 355 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
 356 {
 357         struct hl_cs *last_entry;
 358
 359         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
 360                                                                 staged_cs_node);
 361
 362         if (last_entry->staged_last)
 363                 return true;
 364
 365         return false;
 366 }
 367
 368 /*
 369  * staged_cs_get - get CS reference if this CS is a part of a staged CS
 370  *
 371  * @hdev: pointer to device structure
 372  * @cs: current CS
 373  * @cs_seq: staged submission sequence number
 374  *
 375  * Increment CS reference for every CS in this staged submission except for
 376  * the CS which get completion.
 377  */
 378 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
 379 {
 380         /* Only the last CS in this staged submission will get a completion.
 381          * We must increment the reference for all other CS's in this
 382          * staged submission.
 383          * Once we get a completion we will release the whole staged submission.
 384          */
 385         if (!cs->staged_last)
 386                 cs_get(cs);
 387 }
 388
 389 /*
 390  * staged_cs_put - put a CS in case it is part of staged submission
 391  *
 392  * @hdev: pointer to device structure
 393  * @cs: CS to put
 394  *
 395  * This function decrements a CS reference (for a non completion CS)
 396  */
 397 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
 398 {
 399         /* We release all CS's in a staged submission except the last
 400          * CS which we have never incremented its reference.
 401          */
 402         if (!cs_needs_completion(cs))
 403                 cs_put(cs);
 404 }
 405
 406 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 407 {
 408         bool next_entry_found = false;
 409         struct hl_cs *next, *first_cs;
 410
 411         if (!cs_needs_timeout(cs))
 412                 return;
 413
 414         spin_lock(&hdev->cs_mirror_lock);
 415
 416         /* We need to handle tdr only once for the complete staged submission.
 417          * Hence, we choose the CS that reaches this function first which is
 418          * the CS marked as 'staged_last'.
 419          * In case single staged cs was submitted which has both first and last
 420          * indications, then "cs_find_first" below will return NULL, since we
 421          * removed the cs node from the list before getting here,
 422          * in such cases just continue with the cs to cancel it's TDR work.
 423          */
 424         if (cs->staged_cs && cs->staged_last) {
 425                 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
 426                 if (first_cs)
 427                         cs = first_cs;
 428         }
 429
 430         spin_unlock(&hdev->cs_mirror_lock);
 431
 432         /* Don't cancel TDR in case this CS was timedout because we might be
 433          * running from the TDR context
 434          */
 435         if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
 436                 return;
 437
 438         if (cs->tdr_active)
 439                 cancel_delayed_work_sync(&cs->work_tdr);
 440
 441         spin_lock(&hdev->cs_mirror_lock);
 442
 443         /* queue TDR for next CS */
 444         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
 445                 if (cs_needs_timeout(next)) {
 446                         next_entry_found = true;
 447                         break;
 448                 }
 449
 450         if (next_entry_found && !next->tdr_active) {
 451                 next->tdr_active = true;
 452                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
 453         }
 454
 455         spin_unlock(&hdev->cs_mirror_lock);
 456 }
 457
 458 /*
 459  * force_complete_multi_cs - complete all contexts that wait on multi-CS
 460  *
 461  * @hdev: pointer to habanalabs device structure
 462  */
 463 static void force_complete_multi_cs(struct hl_device *hdev)
 464 {
 465         int i;
 466
 467         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
 468                 struct multi_cs_completion *mcs_compl;
 469
 470                 mcs_compl = &hdev->multi_cs_completion[i];
 471
 472                 spin_lock(&mcs_compl->lock);
 473
 474                 if (!mcs_compl->used) {
 475                         spin_unlock(&mcs_compl->lock);
 476                         continue;
 477                 }
 478
 479                 /* when calling force complete no context should be waiting on
 480                  * multi-cS.
 481                  * We are calling the function as a protection for such case
 482                  * to free any pending context and print error message
 483                  */
 484                 dev_err(hdev->dev,
 485                                 "multi-CS completion context %d still waiting when calling force completion\n",
 486                                 i);
 487                 complete_all(&mcs_compl->completion);
 488                 spin_unlock(&mcs_compl->lock);
 489         }
 490 }
 491
 492 /*
 493  * complete_multi_cs - complete all waiting entities on multi-CS
 494  *
 495  * @hdev: pointer to habanalabs device structure
 496  * @cs: CS structure
 497  * The function signals a waiting entity that has an overlapping stream masters
 498  * with the completed CS.
 499  * For example:
 500  * - a completed CS worked on stream master QID 4, multi CS completion
 501  *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
 502  *   common stream master QID
 503  * - a completed CS worked on stream master QID 4, multi CS completion
 504  *   is actively waiting on stream master QIDs 3, 4. send signal as stream
 505  *   master QID 4 is common
 506  */
 507 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
 508 {
 509         struct hl_fence *fence = cs->fence;
 510         int i;
 511
 512         /* in case of multi CS check for completion only for the first CS */
 513         if (cs->staged_cs && !cs->staged_first)
 514                 return;
 515
 516         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
 517                 struct multi_cs_completion *mcs_compl;
 518
 519                 mcs_compl = &hdev->multi_cs_completion[i];
 520                 if (!mcs_compl->used)
 521                         continue;
 522
 523                 spin_lock(&mcs_compl->lock);
 524
 525                 /*
 526                  * complete if:
 527                  * 1. still waiting for completion
 528                  * 2. the completed CS has at least one overlapping stream
 529                  *    master with the stream masters in the completion
 530                  */
 531                 if (mcs_compl->used &&
 532                                 (fence->stream_master_qid_map &
 533                                         mcs_compl->stream_master_qid_map)) {
 534                         /* extract the timestamp only of first completed CS */
 535                         if (!mcs_compl->timestamp)
 536                                 mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
 537
 538                         complete_all(&mcs_compl->completion);
 539
 540                         /*
 541                          * Setting mcs_handling_done inside the lock ensures
 542                          * at least one fence have mcs_handling_done set to
 543                          * true before wait for mcs finish. This ensures at
 544                          * least one CS will be set as completed when polling
 545                          * mcs fences.
 546                          */
 547                         fence->mcs_handling_done = true;
 548                 }
 549
 550                 spin_unlock(&mcs_compl->lock);
 551         }
 552         /* In case CS completed without mcs completion initialized */
 553         fence->mcs_handling_done = true;
 554 }
 555
 556 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
 557                                         struct hl_cs *cs,
 558                                         struct hl_cs_compl *hl_cs_cmpl)
 559 {
 560         /* Skip this handler if the cs wasn't submitted, to avoid putting
 561          * the hw_sob twice, since this case already handled at this point,
 562          * also skip if the hw_sob pointer wasn't set.
 563          */
 564         if (!hl_cs_cmpl->hw_sob || !cs->submitted)
 565                 return;
 566
 567         spin_lock(&hl_cs_cmpl->lock);
 568
 569         /*
 570          * we get refcount upon reservation of signals or signal/wait cs for the
 571          * hw_sob object, and need to put it when the first staged cs
 572          * (which cotains the encaps signals) or cs signal/wait is completed.
 573          */
 574         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 575                         (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
 576                         (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
 577                         (!!hl_cs_cmpl->encaps_signals)) {
 578                 dev_dbg(hdev->dev,
 579                                 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
 580                                 hl_cs_cmpl->cs_seq,
 581                                 hl_cs_cmpl->type,
 582                                 hl_cs_cmpl->hw_sob->sob_id,
 583                                 hl_cs_cmpl->sob_val);
 584
 585                 hw_sob_put(hl_cs_cmpl->hw_sob);
 586
 587                 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
 588                         hdev->asic_funcs->reset_sob_group(hdev,
 589                                         hl_cs_cmpl->sob_group);
 590         }
 591
 592         spin_unlock(&hl_cs_cmpl->lock);
 593 }
 594
 595 static void cs_do_release(struct kref *ref)
 596 {
 597         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
 598         struct hl_device *hdev = cs->ctx->hdev;
 599         struct hl_cs_job *job, *tmp;
 600         struct hl_cs_compl *hl_cs_cmpl =
 601                         container_of(cs->fence, struct hl_cs_compl, base_fence);
 602
 603         cs->completed = true;
 604
 605         /*
 606          * Although if we reached here it means that all external jobs have
 607          * finished, because each one of them took refcnt to CS, we still
 608          * need to go over the internal jobs and complete them. Otherwise, we
 609          * will have leaked memory and what's worse, the CS object (and
 610          * potentially the CTX object) could be released, while the JOB
 611          * still holds a pointer to them (but no reference).
 612          */
 613         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 614                 complete_job(hdev, job);
 615
 616         if (!cs->submitted) {
 617                 /*
 618                  * In case the wait for signal CS was submitted, the fence put
 619                  * occurs in init_signal_wait_cs() or collective_wait_init_cs()
 620                  * right before hanging on the PQ.
 621                  */
 622                 if (cs->type == CS_TYPE_WAIT ||
 623                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
 624                         hl_fence_put(cs->signal_fence);
 625
 626                 goto out;
 627         }
 628
 629         /* Need to update CI for all queue jobs that does not get completion */
 630         hl_hw_queue_update_ci(cs);
 631
 632         /* remove CS from CS mirror list */
 633         spin_lock(&hdev->cs_mirror_lock);
 634         list_del_init(&cs->mirror_node);
 635         spin_unlock(&hdev->cs_mirror_lock);
 636
 637         cs_handle_tdr(hdev, cs);
 638
 639         if (cs->staged_cs) {
 640                 /* the completion CS decrements reference for the entire
 641                  * staged submission
 642                  */
 643                 if (cs->staged_last) {
 644                         struct hl_cs *staged_cs, *tmp;
 645
 646                         list_for_each_entry_safe(staged_cs, tmp,
 647                                         &cs->staged_cs_node, staged_cs_node)
 648                                 staged_cs_put(hdev, staged_cs);
 649                 }
 650
 651                 /* A staged CS will be a member in the list only after it
 652                  * was submitted. We used 'cs_mirror_lock' when inserting
 653                  * it to list so we will use it again when removing it
 654                  */
 655                 if (cs->submitted) {
 656                         spin_lock(&hdev->cs_mirror_lock);
 657                         list_del(&cs->staged_cs_node);
 658                         spin_unlock(&hdev->cs_mirror_lock);
 659                 }
 660
 661                 /* decrement refcount to handle when first staged cs
 662                  * with encaps signals is completed.
 663                  */
 664                 if (hl_cs_cmpl->encaps_signals)
 665                         kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
 666                                                 hl_encaps_handle_do_release);
 667         }
 668
 669         if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
 670                         && cs->encaps_signals)
 671                 kref_put(&cs->encaps_sig_hdl->refcount,
 672                                         hl_encaps_handle_do_release);
 673
 674 out:
 675         /* Must be called before hl_ctx_put because inside we use ctx to get
 676          * the device
 677          */
 678         hl_debugfs_remove_cs(cs);
 679
 680         hl_ctx_put(cs->ctx);
 681
 682         /* We need to mark an error for not submitted because in that case
 683          * the hl fence release flow is different. Mainly, we don't need
 684          * to handle hw_sob for signal/wait
 685          */
 686         if (cs->timedout)
 687                 cs->fence->error = -ETIMEDOUT;
 688         else if (cs->aborted)
 689                 cs->fence->error = -EIO;
 690         else if (!cs->submitted)
 691                 cs->fence->error = -EBUSY;
 692
 693         if (unlikely(cs->skip_reset_on_timeout)) {
 694                 dev_err(hdev->dev,
 695                         "Command submission %llu completed after %llu (s)\n",
 696                         cs->sequence,
 697                         div_u64(jiffies - cs->submission_time_jiffies, HZ));
 698         }
 699
 700         if (cs->timestamp)
 701                 cs->fence->timestamp = ktime_get();
 702         complete_all(&cs->fence->completion);
 703         complete_multi_cs(hdev, cs);
 704
 705         cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
 706
 707         hl_fence_put(cs->fence);
 708
 709         kfree(cs->jobs_in_queue_cnt);
 710         kfree(cs);
 711 }
 712
 713 static void cs_timedout(struct work_struct *work)
 714 {
 715         struct hl_device *hdev;
 716         int rc;
 717         struct hl_cs *cs = container_of(work, struct hl_cs,
 718                                                  work_tdr.work);
 719         bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
 720
 721         rc = cs_get_unless_zero(cs);
 722         if (!rc)
 723                 return;
 724
 725         if ((!cs->submitted) || (cs->completed)) {
 726                 cs_put(cs);
 727                 return;
 728         }
 729
 730         /* Mark the CS is timed out so we won't try to cancel its TDR */
 731         if (likely(!skip_reset_on_timeout))
 732                 cs->timedout = true;
 733
 734         hdev = cs->ctx->hdev;
 735
 736         /* Save only the first CS timeout parameters */
 737         rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
 738         if (!rc) {
 739                 hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
 740                 hdev->last_error.cs_timeout_timestamp = ktime_get();
 741                 hdev->last_error.cs_timeout_seq = cs->sequence;
 742         }
 743
 744         switch (cs->type) {
 745         case CS_TYPE_SIGNAL:
 746                 dev_err(hdev->dev,
 747                         "Signal command submission %llu has not finished in time!\n",
 748                         cs->sequence);
 749                 break;
 750
 751         case CS_TYPE_WAIT:
 752                 dev_err(hdev->dev,
 753                         "Wait command submission %llu has not finished in time!\n",
 754                         cs->sequence);
 755                 break;
 756
 757         case CS_TYPE_COLLECTIVE_WAIT:
 758                 dev_err(hdev->dev,
 759                         "Collective Wait command submission %llu has not finished in time!\n",
 760                         cs->sequence);
 761                 break;
 762
 763         default:
 764                 dev_err(hdev->dev,
 765                         "Command submission %llu has not finished in time!\n",
 766                         cs->sequence);
 767                 break;
 768         }
 769
 770         rc = hl_state_dump(hdev);
 771         if (rc)
 772                 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
 773
 774         cs_put(cs);
 775
 776         if (likely(!skip_reset_on_timeout)) {
 777                 if (hdev->reset_on_lockup)
 778                         hl_device_reset(hdev, HL_DRV_RESET_TDR);
 779                 else
 780                         hdev->reset_info.needs_reset = true;
 781         }
 782 }
 783
 784 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 785                         enum hl_cs_type cs_type, u64 user_sequence,
 786                         struct hl_cs **cs_new, u32 flags, u32 timeout)
 787 {
 788         struct hl_cs_counters_atomic *cntr;
 789         struct hl_fence *other = NULL;
 790         struct hl_cs_compl *cs_cmpl;
 791         struct hl_cs *cs;
 792         int rc;
 793
 794         cntr = &hdev->aggregated_cs_counters;
 795
 796         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 797         if (!cs)
 798                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 799
 800         if (!cs) {
 801                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 802                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 803                 return -ENOMEM;
 804         }
 805
 806         /* increment refcnt for context */
 807         hl_ctx_get(hdev, ctx);
 808
 809         cs->ctx = ctx;
 810         cs->submitted = false;
 811         cs->completed = false;
 812         cs->type = cs_type;
 813         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 814         cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
 815         cs->timeout_jiffies = timeout;
 816         cs->skip_reset_on_timeout =
 817                 hdev->reset_info.skip_reset_on_timeout ||
 818                 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
 819         cs->submission_time_jiffies = jiffies;
 820         INIT_LIST_HEAD(&cs->job_list);
 821         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 822         kref_init(&cs->refcount);
 823         spin_lock_init(&cs->job_lock);
 824
 825         cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 826         if (!cs_cmpl)
 827                 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 828
 829         if (!cs_cmpl) {
 830                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 831                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 832                 rc = -ENOMEM;
 833                 goto free_cs;
 834         }
 835
 836         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 837                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 838         if (!cs->jobs_in_queue_cnt)
 839                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 840                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
 841
 842         if (!cs->jobs_in_queue_cnt) {
 843                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 844                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 845                 rc = -ENOMEM;
 846                 goto free_cs_cmpl;
 847         }
 848
 849         cs_cmpl->hdev = hdev;
 850         cs_cmpl->type = cs->type;
 851         spin_lock_init(&cs_cmpl->lock);
 852         cs->fence = &cs_cmpl->base_fence;
 853
 854         spin_lock(&ctx->cs_lock);
 855
 856         cs_cmpl->cs_seq = ctx->cs_sequence;
 857         other = ctx->cs_pending[cs_cmpl->cs_seq &
 858                                 (hdev->asic_prop.max_pending_cs - 1)];
 859
 860         if (other && !completion_done(&other->completion)) {
 861                 /* If the following statement is true, it means we have reached
 862                  * a point in which only part of the staged submission was
 863                  * submitted and we don't have enough room in the 'cs_pending'
 864                  * array for the rest of the submission.
 865                  * This causes a deadlock because this CS will never be
 866                  * completed as it depends on future CS's for completion.
 867                  */
 868                 if (other->cs_sequence == user_sequence)
 869                         dev_crit_ratelimited(hdev->dev,
 870                                 "Staged CS %llu deadlock due to lack of resources",
 871                                 user_sequence);
 872
 873                 dev_dbg_ratelimited(hdev->dev,
 874                         "Rejecting CS because of too many in-flights CS\n");
 875                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 876                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 877                 rc = -EAGAIN;
 878                 goto free_fence;
 879         }
 880
 881         /* init hl_fence */
 882         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 883
 884         cs->sequence = cs_cmpl->cs_seq;
 885
 886         ctx->cs_pending[cs_cmpl->cs_seq &
 887                         (hdev->asic_prop.max_pending_cs - 1)] =
 888                                                         &cs_cmpl->base_fence;
 889         ctx->cs_sequence++;
 890
 891         hl_fence_get(&cs_cmpl->base_fence);
 892
 893         hl_fence_put(other);
 894
 895         spin_unlock(&ctx->cs_lock);
 896
 897         *cs_new = cs;
 898
 899         return 0;
 900
 901 free_fence:
 902         spin_unlock(&ctx->cs_lock);
 903         kfree(cs->jobs_in_queue_cnt);
 904 free_cs_cmpl:
 905         kfree(cs_cmpl);
 906 free_cs:
 907         kfree(cs);
 908         hl_ctx_put(ctx);
 909         return rc;
 910 }
 911
 912 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 913 {
 914         struct hl_cs_job *job, *tmp;
 915
 916         staged_cs_put(hdev, cs);
 917
 918         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 919                 complete_job(hdev, job);
 920 }
 921
 922 void hl_cs_rollback_all(struct hl_device *hdev)
 923 {
 924         int i;
 925         struct hl_cs *cs, *tmp;
 926
 927         flush_workqueue(hdev->sob_reset_wq);
 928
 929         /* flush all completions before iterating over the CS mirror list in
 930          * order to avoid a race with the release functions
 931          */
 932         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 933                 flush_workqueue(hdev->cq_wq[i]);
 934
 935         /* Make sure we don't have leftovers in the CS mirror list */
 936         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
 937                 cs_get(cs);
 938                 cs->aborted = true;
 939                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
 940                                 cs->ctx->asid, cs->sequence);
 941                 cs_rollback(hdev, cs);
 942                 cs_put(cs);
 943         }
 944
 945         force_complete_multi_cs(hdev);
 946 }
 947
 948 static void
 949 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 950 {
 951         struct hl_user_pending_interrupt *pend;
 952         unsigned long flags;
 953
 954         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 955         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
 956                 pend->fence.error = -EIO;
 957                 complete_all(&pend->fence.completion);
 958         }
 959         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 960 }
 961
 962 void hl_release_pending_user_interrupts(struct hl_device *hdev)
 963 {
 964         struct asic_fixed_properties *prop = &hdev->asic_prop;
 965         struct hl_user_interrupt *interrupt;
 966         int i;
 967
 968         if (!prop->user_interrupt_count)
 969                 return;
 970
 971         /* We iterate through the user interrupt requests and waking up all
 972          * user threads waiting for interrupt completion. We iterate the
 973          * list under a lock, this is why all user threads, once awake,
 974          * will wait on the same lock and will release the waiting object upon
 975          * unlock.
 976          */
 977
 978         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
 979                 interrupt = &hdev->user_interrupt[i];
 980                 wake_pending_user_interrupt_threads(interrupt);
 981         }
 982
 983         interrupt = &hdev->common_user_interrupt;
 984         wake_pending_user_interrupt_threads(interrupt);
 985 }
 986
 987 static void job_wq_completion(struct work_struct *work)
 988 {
 989         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
 990                                                 finish_work);
 991         struct hl_cs *cs = job->cs;
 992         struct hl_device *hdev = cs->ctx->hdev;
 993
 994         /* job is no longer needed */
 995         complete_job(hdev, job);
 996 }
 997
 998 static int validate_queue_index(struct hl_device *hdev,
 999                                 struct hl_cs_chunk *chunk,
1000                                 enum hl_queue_type *queue_type,
1001                                 bool *is_kernel_allocated_cb)
1002 {
1003         struct asic_fixed_properties *asic = &hdev->asic_prop;
1004         struct hw_queue_properties *hw_queue_prop;
1005
1006         /* This must be checked here to prevent out-of-bounds access to
1007          * hw_queues_props array
1008          */
1009         if (chunk->queue_index >= asic->max_queues) {
1010                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1011                         chunk->queue_index);
1012                 return -EINVAL;
1013         }
1014
1015         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1016
1017         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1018                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1019                         chunk->queue_index);
1020                 return -EINVAL;
1021         }
1022
1023         if (hw_queue_prop->driver_only) {
1024                 dev_err(hdev->dev,
1025                         "Queue index %d is restricted for the kernel driver\n",
1026                         chunk->queue_index);
1027                 return -EINVAL;
1028         }
1029
1030         /* When hw queue type isn't QUEUE_TYPE_HW,
1031          * USER_ALLOC_CB flag shall be referred as "don't care".
1032          */
1033         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1034                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1035                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1036                                 dev_err(hdev->dev,
1037                                         "Queue index %d doesn't support user CB\n",
1038                                         chunk->queue_index);
1039                                 return -EINVAL;
1040                         }
1041
1042                         *is_kernel_allocated_cb = false;
1043                 } else {
1044                         if (!(hw_queue_prop->cb_alloc_flags &
1045                                         CB_ALLOC_KERNEL)) {
1046                                 dev_err(hdev->dev,
1047                                         "Queue index %d doesn't support kernel CB\n",
1048                                         chunk->queue_index);
1049                                 return -EINVAL;
1050                         }
1051
1052                         *is_kernel_allocated_cb = true;
1053                 }
1054         } else {
1055                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1056                                                 & CB_ALLOC_KERNEL);
1057         }
1058
1059         *queue_type = hw_queue_prop->type;
1060         return 0;
1061 }
1062
1063 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1064                                         struct hl_cb_mgr *cb_mgr,
1065                                         struct hl_cs_chunk *chunk)
1066 {
1067         struct hl_cb *cb;
1068         u32 cb_handle;
1069
1070         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
1071
1072         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
1073         if (!cb) {
1074                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
1075                 return NULL;
1076         }
1077
1078         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1079                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1080                 goto release_cb;
1081         }
1082
1083         atomic_inc(&cb->cs_cnt);
1084
1085         return cb;
1086
1087 release_cb:
1088         hl_cb_put(cb);
1089         return NULL;
1090 }
1091
1092 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1093                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1094 {
1095         struct hl_cs_job *job;
1096
1097         job = kzalloc(sizeof(*job), GFP_ATOMIC);
1098         if (!job)
1099                 job = kzalloc(sizeof(*job), GFP_KERNEL);
1100
1101         if (!job)
1102                 return NULL;
1103
1104         kref_init(&job->refcount);
1105         job->queue_type = queue_type;
1106         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1107
1108         if (is_cb_patched(hdev, job))
1109                 INIT_LIST_HEAD(&job->userptr_list);
1110
1111         if (job->queue_type == QUEUE_TYPE_EXT)
1112                 INIT_WORK(&job->finish_work, job_wq_completion);
1113
1114         return job;
1115 }
1116
1117 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1118 {
1119         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1120                 return CS_TYPE_SIGNAL;
1121         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1122                 return CS_TYPE_WAIT;
1123         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1124                 return CS_TYPE_COLLECTIVE_WAIT;
1125         else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1126                 return CS_RESERVE_SIGNALS;
1127         else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1128                 return CS_UNRESERVE_SIGNALS;
1129         else
1130                 return CS_TYPE_DEFAULT;
1131 }
1132
1133 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1134 {
1135         struct hl_device *hdev = hpriv->hdev;
1136         struct hl_ctx *ctx = hpriv->ctx;
1137         u32 cs_type_flags, num_chunks;
1138         enum hl_device_status status;
1139         enum hl_cs_type cs_type;
1140
1141         if (!hl_device_operational(hdev, &status)) {
1142                 return -EBUSY;
1143         }
1144
1145         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1146                         !hdev->supports_staged_submission) {
1147                 dev_err(hdev->dev, "staged submission not supported");
1148                 return -EPERM;
1149         }
1150
1151         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1152
1153         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1154                 dev_err(hdev->dev,
1155                         "CS type flags are mutually exclusive, context %d\n",
1156                         ctx->asid);
1157                 return -EINVAL;
1158         }
1159
1160         cs_type = hl_cs_get_cs_type(cs_type_flags);
1161         num_chunks = args->in.num_chunks_execute;
1162
1163         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1164                                         !hdev->supports_sync_stream)) {
1165                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1166                 return -EINVAL;
1167         }
1168
1169         if (cs_type == CS_TYPE_DEFAULT) {
1170                 if (!num_chunks) {
1171                         dev_err(hdev->dev,
1172                                 "Got execute CS with 0 chunks, context %d\n",
1173                                 ctx->asid);
1174                         return -EINVAL;
1175                 }
1176         } else if (num_chunks != 1) {
1177                 dev_err(hdev->dev,
1178                         "Sync stream CS mandates one chunk only, context %d\n",
1179                         ctx->asid);
1180                 return -EINVAL;
1181         }
1182
1183         return 0;
1184 }
1185
1186 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1187                                         struct hl_cs_chunk **cs_chunk_array,
1188                                         void __user *chunks, u32 num_chunks,
1189                                         struct hl_ctx *ctx)
1190 {
1191         u32 size_to_copy;
1192
1193         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1194                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1195                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1196                 dev_err(hdev->dev,
1197                         "Number of chunks can NOT be larger than %d\n",
1198                         HL_MAX_JOBS_PER_CS);
1199                 return -EINVAL;
1200         }
1201
1202         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1203                                         GFP_ATOMIC);
1204         if (!*cs_chunk_array)
1205                 *cs_chunk_array = kmalloc_array(num_chunks,
1206                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1207         if (!*cs_chunk_array) {
1208                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1209                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1210                 return -ENOMEM;
1211         }
1212
1213         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1214         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1215                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1216                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1217                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1218                 kfree(*cs_chunk_array);
1219                 return -EFAULT;
1220         }
1221
1222         return 0;
1223 }
1224
1225 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1226                                 u64 sequence, u32 flags,
1227                                 u32 encaps_signal_handle)
1228 {
1229         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1230                 return 0;
1231
1232         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1233         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1234
1235         if (cs->staged_first) {
1236                 /* Staged CS sequence is the first CS sequence */
1237                 INIT_LIST_HEAD(&cs->staged_cs_node);
1238                 cs->staged_sequence = cs->sequence;
1239
1240                 if (cs->encaps_signals)
1241                         cs->encaps_sig_hdl_id = encaps_signal_handle;
1242         } else {
1243                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1244                  * under the cs_mirror_lock
1245                  */
1246                 cs->staged_sequence = sequence;
1247         }
1248
1249         /* Increment CS reference if needed */
1250         staged_cs_get(hdev, cs);
1251
1252         cs->staged_cs = true;
1253
1254         return 0;
1255 }
1256
1257 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1258 {
1259         int i;
1260
1261         for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1262                 if (qid == hdev->stream_master_qid_arr[i])
1263                         return BIT(i);
1264
1265         return 0;
1266 }
1267
1268 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1269                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1270                                 u32 encaps_signals_handle, u32 timeout,
1271                                 u16 *signal_initial_sob_count)
1272 {
1273         bool staged_mid, int_queues_only = true;
1274         struct hl_device *hdev = hpriv->hdev;
1275         struct hl_cs_chunk *cs_chunk_array;
1276         struct hl_cs_counters_atomic *cntr;
1277         struct hl_ctx *ctx = hpriv->ctx;
1278         struct hl_cs_job *job;
1279         struct hl_cs *cs;
1280         struct hl_cb *cb;
1281         u64 user_sequence;
1282         u8 stream_master_qid_map = 0;
1283         int rc, i;
1284
1285         cntr = &hdev->aggregated_cs_counters;
1286         user_sequence = *cs_seq;
1287         *cs_seq = ULLONG_MAX;
1288
1289         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1290                         hpriv->ctx);
1291         if (rc)
1292                 goto out;
1293
1294         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1295                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1296                 staged_mid = true;
1297         else
1298                 staged_mid = false;
1299
1300         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1301                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1302                         timeout);
1303         if (rc)
1304                 goto free_cs_chunk_array;
1305
1306         *cs_seq = cs->sequence;
1307
1308         hl_debugfs_add_cs(cs);
1309
1310         rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1311                                                 encaps_signals_handle);
1312         if (rc)
1313                 goto free_cs_object;
1314
1315         /* If this is a staged submission we must return the staged sequence
1316          * rather than the internal CS sequence
1317          */
1318         if (cs->staged_cs)
1319                 *cs_seq = cs->staged_sequence;
1320
1321         /* Validate ALL the CS chunks before submitting the CS */
1322         for (i = 0 ; i < num_chunks ; i++) {
1323                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1324                 enum hl_queue_type queue_type;
1325                 bool is_kernel_allocated_cb;
1326
1327                 rc = validate_queue_index(hdev, chunk, &queue_type,
1328                                                 &is_kernel_allocated_cb);
1329                 if (rc) {
1330                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1331                         atomic64_inc(&cntr->validation_drop_cnt);
1332                         goto free_cs_object;
1333                 }
1334
1335                 if (is_kernel_allocated_cb) {
1336                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1337                         if (!cb) {
1338                                 atomic64_inc(
1339                                         &ctx->cs_counters.validation_drop_cnt);
1340                                 atomic64_inc(&cntr->validation_drop_cnt);
1341                                 rc = -EINVAL;
1342                                 goto free_cs_object;
1343                         }
1344                 } else {
1345                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1346                 }
1347
1348                 if (queue_type == QUEUE_TYPE_EXT ||
1349                                                 queue_type == QUEUE_TYPE_HW) {
1350                         int_queues_only = false;
1351
1352                         /*
1353                          * store which stream are being used for external/HW
1354                          * queues of this CS
1355                          */
1356                         if (hdev->supports_wait_for_multi_cs)
1357                                 stream_master_qid_map |=
1358                                         get_stream_master_qid_mask(hdev,
1359                                                         chunk->queue_index);
1360                 }
1361
1362                 job = hl_cs_allocate_job(hdev, queue_type,
1363                                                 is_kernel_allocated_cb);
1364                 if (!job) {
1365                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1366                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1367                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1368                         rc = -ENOMEM;
1369                         if (is_kernel_allocated_cb)
1370                                 goto release_cb;
1371
1372                         goto free_cs_object;
1373                 }
1374
1375                 job->id = i + 1;
1376                 job->cs = cs;
1377                 job->user_cb = cb;
1378                 job->user_cb_size = chunk->cb_size;
1379                 job->hw_queue_id = chunk->queue_index;
1380
1381                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1382
1383                 list_add_tail(&job->cs_node, &cs->job_list);
1384
1385                 /*
1386                  * Increment CS reference. When CS reference is 0, CS is
1387                  * done and can be signaled to user and free all its resources
1388                  * Only increment for JOB on external or H/W queues, because
1389                  * only for those JOBs we get completion
1390                  */
1391                 if (cs_needs_completion(cs) &&
1392                         (job->queue_type == QUEUE_TYPE_EXT ||
1393                                 job->queue_type == QUEUE_TYPE_HW))
1394                         cs_get(cs);
1395
1396                 hl_debugfs_add_job(hdev, job);
1397
1398                 rc = cs_parser(hpriv, job);
1399                 if (rc) {
1400                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1401                         atomic64_inc(&cntr->parsing_drop_cnt);
1402                         dev_err(hdev->dev,
1403                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1404                                 cs->ctx->asid, cs->sequence, job->id, rc);
1405                         goto free_cs_object;
1406                 }
1407         }
1408
1409         /* We allow a CS with any queue type combination as long as it does
1410          * not get a completion
1411          */
1412         if (int_queues_only && cs_needs_completion(cs)) {
1413                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1414                 atomic64_inc(&cntr->validation_drop_cnt);
1415                 dev_err(hdev->dev,
1416                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1417                         cs->ctx->asid, cs->sequence);
1418                 rc = -EINVAL;
1419                 goto free_cs_object;
1420         }
1421
1422         /*
1423          * store the (external/HW queues) streams used by the CS in the
1424          * fence object for multi-CS completion
1425          */
1426         if (hdev->supports_wait_for_multi_cs)
1427                 cs->fence->stream_master_qid_map = stream_master_qid_map;
1428
1429         rc = hl_hw_queue_schedule_cs(cs);
1430         if (rc) {
1431                 if (rc != -EAGAIN)
1432                         dev_err(hdev->dev,
1433                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1434                                 cs->ctx->asid, cs->sequence, rc);
1435                 goto free_cs_object;
1436         }
1437
1438         *signal_initial_sob_count = cs->initial_sob_count;
1439
1440         rc = HL_CS_STATUS_SUCCESS;
1441         goto put_cs;
1442
1443 release_cb:
1444         atomic_dec(&cb->cs_cnt);
1445         hl_cb_put(cb);
1446 free_cs_object:
1447         cs_rollback(hdev, cs);
1448         *cs_seq = ULLONG_MAX;
1449         /* The path below is both for good and erroneous exits */
1450 put_cs:
1451         /* We finished with the CS in this function, so put the ref */
1452         cs_put(cs);
1453 free_cs_chunk_array:
1454         kfree(cs_chunk_array);
1455 out:
1456         return rc;
1457 }
1458
1459 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1460                                 u64 *cs_seq)
1461 {
1462         struct hl_device *hdev = hpriv->hdev;
1463         struct hl_ctx *ctx = hpriv->ctx;
1464         bool need_soft_reset = false;
1465         int rc = 0, do_ctx_switch;
1466         void __user *chunks;
1467         u32 num_chunks, tmp;
1468         u16 sob_count;
1469         int ret;
1470
1471         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1472
1473         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1474                 mutex_lock(&hpriv->restore_phase_mutex);
1475
1476                 if (do_ctx_switch) {
1477                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1478                         if (rc) {
1479                                 dev_err_ratelimited(hdev->dev,
1480                                         "Failed to switch to context %d, rejecting CS! %d\n",
1481                                         ctx->asid, rc);
1482                                 /*
1483                                  * If we timedout, or if the device is not IDLE
1484                                  * while we want to do context-switch (-EBUSY),
1485                                  * we need to soft-reset because QMAN is
1486                                  * probably stuck. However, we can't call to
1487                                  * reset here directly because of deadlock, so
1488                                  * need to do it at the very end of this
1489                                  * function
1490                                  */
1491                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1492                                         need_soft_reset = true;
1493                                 mutex_unlock(&hpriv->restore_phase_mutex);
1494                                 goto out;
1495                         }
1496                 }
1497
1498                 hdev->asic_funcs->restore_phase_topology(hdev);
1499
1500                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1501                 num_chunks = args->in.num_chunks_restore;
1502
1503                 if (!num_chunks) {
1504                         dev_dbg(hdev->dev,
1505                                 "Need to run restore phase but restore CS is empty\n");
1506                         rc = 0;
1507                 } else {
1508                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1509                                         cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1510                 }
1511
1512                 mutex_unlock(&hpriv->restore_phase_mutex);
1513
1514                 if (rc) {
1515                         dev_err(hdev->dev,
1516                                 "Failed to submit restore CS for context %d (%d)\n",
1517                                 ctx->asid, rc);
1518                         goto out;
1519                 }
1520
1521                 /* Need to wait for restore completion before execution phase */
1522                 if (num_chunks) {
1523                         enum hl_cs_wait_status status;
1524 wait_again:
1525                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1526                                         jiffies_to_usecs(hdev->timeout_jiffies),
1527                                         *cs_seq, &status, NULL);
1528                         if (ret) {
1529                                 if (ret == -ERESTARTSYS) {
1530                                         usleep_range(100, 200);
1531                                         goto wait_again;
1532                                 }
1533
1534                                 dev_err(hdev->dev,
1535                                         "Restore CS for context %d failed to complete %d\n",
1536                                         ctx->asid, ret);
1537                                 rc = -ENOEXEC;
1538                                 goto out;
1539                         }
1540                 }
1541
1542                 ctx->thread_ctx_switch_wait_token = 1;
1543
1544         } else if (!ctx->thread_ctx_switch_wait_token) {
1545                 rc = hl_poll_timeout_memory(hdev,
1546                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1547                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1548
1549                 if (rc == -ETIMEDOUT) {
1550                         dev_err(hdev->dev,
1551                                 "context switch phase timeout (%d)\n", tmp);
1552                         goto out;
1553                 }
1554         }
1555
1556 out:
1557         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1558                 hl_device_reset(hdev, 0);
1559
1560         return rc;
1561 }
1562
1563 /*
1564  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1565  * if the SOB value reaches the max value move to the other SOB reserved
1566  * to the queue.
1567  * @hdev: pointer to device structure
1568  * @q_idx: stream queue index
1569  * @hw_sob: the H/W SOB used in this signal CS.
1570  * @count: signals count
1571  * @encaps_sig: tells whether it's reservation for encaps signals or not.
1572  *
1573  * Note that this function must be called while hw_queues_lock is taken.
1574  */
1575 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1576                         struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1577
1578 {
1579         struct hl_sync_stream_properties *prop;
1580         struct hl_hw_sob *sob = *hw_sob, *other_sob;
1581         u8 other_sob_offset;
1582
1583         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1584
1585         hw_sob_get(sob);
1586
1587         /* check for wraparound */
1588         if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1589                 /*
1590                  * Decrement as we reached the max value.
1591                  * The release function won't be called here as we've
1592                  * just incremented the refcount right before calling this
1593                  * function.
1594                  */
1595                 hw_sob_put_err(sob);
1596
1597                 /*
1598                  * check the other sob value, if it still in use then fail
1599                  * otherwise make the switch
1600                  */
1601                 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1602                 other_sob = &prop->hw_sob[other_sob_offset];
1603
1604                 if (kref_read(&other_sob->kref) != 1) {
1605                         dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1606                                                                 q_idx);
1607                         return -EINVAL;
1608                 }
1609
1610                 /*
1611                  * next_sob_val always points to the next available signal
1612                  * in the sob, so in encaps signals it will be the next one
1613                  * after reserving the required amount.
1614                  */
1615                 if (encaps_sig)
1616                         prop->next_sob_val = count + 1;
1617                 else
1618                         prop->next_sob_val = count;
1619
1620                 /* only two SOBs are currently in use */
1621                 prop->curr_sob_offset = other_sob_offset;
1622                 *hw_sob = other_sob;
1623
1624                 /*
1625                  * check if other_sob needs reset, then do it before using it
1626                  * for the reservation or the next signal cs.
1627                  * we do it here, and for both encaps and regular signal cs
1628                  * cases in order to avoid possible races of two kref_put
1629                  * of the sob which can occur at the same time if we move the
1630                  * sob reset(kref_put) to cs_do_release function.
1631                  * in addition, if we have combination of cs signal and
1632                  * encaps, and at the point we need to reset the sob there was
1633                  * no more reservations and only signal cs keep coming,
1634                  * in such case we need signal_cs to put the refcount and
1635                  * reset the sob.
1636                  */
1637                 if (other_sob->need_reset)
1638                         hw_sob_put(other_sob);
1639
1640                 if (encaps_sig) {
1641                         /* set reset indication for the sob */
1642                         sob->need_reset = true;
1643                         hw_sob_get(other_sob);
1644                 }
1645
1646                 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1647                                 prop->curr_sob_offset, q_idx);
1648         } else {
1649                 prop->next_sob_val += count;
1650         }
1651
1652         return 0;
1653 }
1654
1655 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1656                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1657                 bool encaps_signals)
1658 {
1659         u64 *signal_seq_arr = NULL;
1660         u32 size_to_copy, signal_seq_arr_len;
1661         int rc = 0;
1662
1663         if (encaps_signals) {
1664                 *signal_seq = chunk->encaps_signal_seq;
1665                 return 0;
1666         }
1667
1668         signal_seq_arr_len = chunk->num_signal_seq_arr;
1669
1670         /* currently only one signal seq is supported */
1671         if (signal_seq_arr_len != 1) {
1672                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1673                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1674                 dev_err(hdev->dev,
1675                         "Wait for signal CS supports only one signal CS seq\n");
1676                 return -EINVAL;
1677         }
1678
1679         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1680                                         sizeof(*signal_seq_arr),
1681                                         GFP_ATOMIC);
1682         if (!signal_seq_arr)
1683                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1684                                         sizeof(*signal_seq_arr),
1685                                         GFP_KERNEL);
1686         if (!signal_seq_arr) {
1687                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1688                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1689                 return -ENOMEM;
1690         }
1691
1692         size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1693         if (copy_from_user(signal_seq_arr,
1694                                 u64_to_user_ptr(chunk->signal_seq_arr),
1695                                 size_to_copy)) {
1696                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1697                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1698                 dev_err(hdev->dev,
1699                         "Failed to copy signal seq array from user\n");
1700                 rc = -EFAULT;
1701                 goto out;
1702         }
1703
1704         /* currently it is guaranteed to have only one signal seq */
1705         *signal_seq = signal_seq_arr[0];
1706
1707 out:
1708         kfree(signal_seq_arr);
1709
1710         return rc;
1711 }
1712
1713 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1714                 struct hl_ctx *ctx, struct hl_cs *cs,
1715                 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1716 {
1717         struct hl_cs_counters_atomic *cntr;
1718         struct hl_cs_job *job;
1719         struct hl_cb *cb;
1720         u32 cb_size;
1721
1722         cntr = &hdev->aggregated_cs_counters;
1723
1724         job = hl_cs_allocate_job(hdev, q_type, true);
1725         if (!job) {
1726                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1727                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1728                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1729                 return -ENOMEM;
1730         }
1731
1732         if (cs->type == CS_TYPE_WAIT)
1733                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1734         else
1735                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1736
1737         cb = hl_cb_kernel_create(hdev, cb_size,
1738                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1739         if (!cb) {
1740                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1741                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1742                 kfree(job);
1743                 return -EFAULT;
1744         }
1745
1746         job->id = 0;
1747         job->cs = cs;
1748         job->user_cb = cb;
1749         atomic_inc(&job->user_cb->cs_cnt);
1750         job->user_cb_size = cb_size;
1751         job->hw_queue_id = q_idx;
1752
1753         if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1754                         && cs->encaps_signals)
1755                 job->encaps_sig_wait_offset = encaps_signal_offset;
1756         /*
1757          * No need in parsing, user CB is the patched CB.
1758          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1759          * the CB idr anymore and to decrement its refcount as it was
1760          * incremented inside hl_cb_kernel_create().
1761          */
1762         job->patched_cb = job->user_cb;
1763         job->job_cb_size = job->user_cb_size;
1764         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1765
1766         /* increment refcount as for external queues we get completion */
1767         cs_get(cs);
1768
1769         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1770
1771         list_add_tail(&job->cs_node, &cs->job_list);
1772
1773         hl_debugfs_add_job(hdev, job);
1774
1775         return 0;
1776 }
1777
1778 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
1779                                 u32 q_idx, u32 count,
1780                                 u32 *handle_id, u32 *sob_addr,
1781                                 u32 *signals_count)
1782 {
1783         struct hw_queue_properties *hw_queue_prop;
1784         struct hl_sync_stream_properties *prop;
1785         struct hl_device *hdev = hpriv->hdev;
1786         struct hl_cs_encaps_sig_handle *handle;
1787         struct hl_encaps_signals_mgr *mgr;
1788         struct hl_hw_sob *hw_sob;
1789         int hdl_id;
1790         int rc = 0;
1791
1792         if (count >= HL_MAX_SOB_VAL) {
1793                 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
1794                                                 count);
1795                 rc = -EINVAL;
1796                 goto out;
1797         }
1798
1799         if (q_idx >= hdev->asic_prop.max_queues) {
1800                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1801                         q_idx);
1802                 rc = -EINVAL;
1803                 goto out;
1804         }
1805
1806         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1807
1808         if (!hw_queue_prop->supports_sync_stream) {
1809                 dev_err(hdev->dev,
1810                         "Queue index %d does not support sync stream operations\n",
1811                                                                         q_idx);
1812                 rc = -EINVAL;
1813                 goto out;
1814         }
1815
1816         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1817
1818         handle = kzalloc(sizeof(*handle), GFP_KERNEL);
1819         if (!handle) {
1820                 rc = -ENOMEM;
1821                 goto out;
1822         }
1823
1824         handle->count = count;
1825
1826         hl_ctx_get(hdev, hpriv->ctx);
1827         handle->ctx = hpriv->ctx;
1828         mgr = &hpriv->ctx->sig_mgr;
1829
1830         spin_lock(&mgr->lock);
1831         hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
1832         spin_unlock(&mgr->lock);
1833
1834         if (hdl_id < 0) {
1835                 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
1836                 rc = -EINVAL;
1837                 goto put_ctx;
1838         }
1839
1840         handle->id = hdl_id;
1841         handle->q_idx = q_idx;
1842         handle->hdev = hdev;
1843         kref_init(&handle->refcount);
1844
1845         hdev->asic_funcs->hw_queues_lock(hdev);
1846
1847         hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1848
1849         /*
1850          * Increment the SOB value by count by user request
1851          * to reserve those signals
1852          * check if the signals amount to reserve is not exceeding the max sob
1853          * value, if yes then switch sob.
1854          */
1855         rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
1856                                                                 true);
1857         if (rc) {
1858                 dev_err(hdev->dev, "Failed to switch SOB\n");
1859                 hdev->asic_funcs->hw_queues_unlock(hdev);
1860                 rc = -EINVAL;
1861                 goto remove_idr;
1862         }
1863         /* set the hw_sob to the handle after calling the sob wraparound handler
1864          * since sob could have changed.
1865          */
1866         handle->hw_sob = hw_sob;
1867
1868         /* store the current sob value for unreserve validity check, and
1869          * signal offset support
1870          */
1871         handle->pre_sob_val = prop->next_sob_val - handle->count;
1872
1873         *signals_count = prop->next_sob_val;
1874         hdev->asic_funcs->hw_queues_unlock(hdev);
1875
1876         *sob_addr = handle->hw_sob->sob_addr;
1877         *handle_id = hdl_id;
1878
1879         dev_dbg(hdev->dev,
1880                 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
1881                         hw_sob->sob_id, handle->hw_sob->sob_addr,
1882                         prop->next_sob_val - 1, q_idx, hdl_id);
1883         goto out;
1884
1885 remove_idr:
1886         spin_lock(&mgr->lock);
1887         idr_remove(&mgr->handles, hdl_id);
1888         spin_unlock(&mgr->lock);
1889
1890 put_ctx:
1891         hl_ctx_put(handle->ctx);
1892         kfree(handle);
1893
1894 out:
1895         return rc;
1896 }
1897
1898 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
1899 {
1900         struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
1901         struct hl_sync_stream_properties *prop;
1902         struct hl_device *hdev = hpriv->hdev;
1903         struct hl_encaps_signals_mgr *mgr;
1904         struct hl_hw_sob *hw_sob;
1905         u32 q_idx, sob_addr;
1906         int rc = 0;
1907
1908         mgr = &hpriv->ctx->sig_mgr;
1909
1910         spin_lock(&mgr->lock);
1911         encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
1912         if (encaps_sig_hdl) {
1913                 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
1914                                 handle_id, encaps_sig_hdl->hw_sob->sob_addr,
1915                                         encaps_sig_hdl->count);
1916
1917                 hdev->asic_funcs->hw_queues_lock(hdev);
1918
1919                 q_idx = encaps_sig_hdl->q_idx;
1920                 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1921                 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1922                 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
1923
1924                 /* Check if sob_val got out of sync due to other
1925                  * signal submission requests which were handled
1926                  * between the reserve-unreserve calls or SOB switch
1927                  * upon reaching SOB max value.
1928                  */
1929                 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
1930                                 != prop->next_sob_val ||
1931                                 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
1932                         dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
1933                                 encaps_sig_hdl->pre_sob_val,
1934                                 (prop->next_sob_val - encaps_sig_hdl->count));
1935
1936                         hdev->asic_funcs->hw_queues_unlock(hdev);
1937                         rc = -EINVAL;
1938                         goto out;
1939                 }
1940
1941                 /*
1942                  * Decrement the SOB value by count by user request
1943                  * to unreserve those signals
1944                  */
1945                 prop->next_sob_val -= encaps_sig_hdl->count;
1946
1947                 hdev->asic_funcs->hw_queues_unlock(hdev);
1948
1949                 hw_sob_put(hw_sob);
1950
1951                 /* Release the id and free allocated memory of the handle */
1952                 idr_remove(&mgr->handles, handle_id);
1953                 hl_ctx_put(encaps_sig_hdl->ctx);
1954                 kfree(encaps_sig_hdl);
1955         } else {
1956                 rc = -EINVAL;
1957                 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
1958         }
1959 out:
1960         spin_unlock(&mgr->lock);
1961
1962         return rc;
1963 }
1964
1965 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1966                                 void __user *chunks, u32 num_chunks,
1967                                 u64 *cs_seq, u32 flags, u32 timeout,
1968                                 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
1969 {
1970         struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
1971         bool handle_found = false, is_wait_cs = false,
1972                         wait_cs_submitted = false,
1973                         cs_encaps_signals = false;
1974         struct hl_cs_chunk *cs_chunk_array, *chunk;
1975         bool staged_cs_with_encaps_signals = false;
1976         struct hw_queue_properties *hw_queue_prop;
1977         struct hl_device *hdev = hpriv->hdev;
1978         struct hl_cs_compl *sig_waitcs_cmpl;
1979         u32 q_idx, collective_engine_id = 0;
1980         struct hl_cs_counters_atomic *cntr;
1981         struct hl_fence *sig_fence = NULL;
1982         struct hl_ctx *ctx = hpriv->ctx;
1983         enum hl_queue_type q_type;
1984         struct hl_cs *cs;
1985         u64 signal_seq;
1986         int rc;
1987
1988         cntr = &hdev->aggregated_cs_counters;
1989         *cs_seq = ULLONG_MAX;
1990
1991         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1992                         ctx);
1993         if (rc)
1994                 goto out;
1995
1996         /* currently it is guaranteed to have only one chunk */
1997         chunk = &cs_chunk_array[0];
1998
1999         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2000                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2001                 atomic64_inc(&cntr->validation_drop_cnt);
2002                 dev_err(hdev->dev, "Queue index %d is invalid\n",
2003                         chunk->queue_index);
2004                 rc = -EINVAL;
2005                 goto free_cs_chunk_array;
2006         }
2007
2008         q_idx = chunk->queue_index;
2009         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2010         q_type = hw_queue_prop->type;
2011
2012         if (!hw_queue_prop->supports_sync_stream) {
2013                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2014                 atomic64_inc(&cntr->validation_drop_cnt);
2015                 dev_err(hdev->dev,
2016                         "Queue index %d does not support sync stream operations\n",
2017                         q_idx);
2018                 rc = -EINVAL;
2019                 goto free_cs_chunk_array;
2020         }
2021
2022         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2023                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2024                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2025                         atomic64_inc(&cntr->validation_drop_cnt);
2026                         dev_err(hdev->dev,
2027                                 "Queue index %d is invalid\n", q_idx);
2028                         rc = -EINVAL;
2029                         goto free_cs_chunk_array;
2030                 }
2031
2032                 if (!hdev->nic_ports_mask) {
2033                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2034                         atomic64_inc(&cntr->validation_drop_cnt);
2035                         dev_err(hdev->dev,
2036                                 "Collective operations not supported when NIC ports are disabled");
2037                         rc = -EINVAL;
2038                         goto free_cs_chunk_array;
2039                 }
2040
2041                 collective_engine_id = chunk->collective_engine_id;
2042         }
2043
2044         is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2045                         cs_type == CS_TYPE_COLLECTIVE_WAIT);
2046
2047         cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2048
2049         if (is_wait_cs) {
2050                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2051                                 ctx, cs_encaps_signals);
2052                 if (rc)
2053                         goto free_cs_chunk_array;
2054
2055                 if (cs_encaps_signals) {
2056                         /* check if cs sequence has encapsulated
2057                          * signals handle
2058                          */
2059                         struct idr *idp;
2060                         u32 id;
2061
2062                         spin_lock(&ctx->sig_mgr.lock);
2063                         idp = &ctx->sig_mgr.handles;
2064                         idr_for_each_entry(idp, encaps_sig_hdl, id) {
2065                                 if (encaps_sig_hdl->cs_seq == signal_seq) {
2066                                         handle_found = true;
2067                                         /* get refcount to protect removing
2068                                          * this handle from idr, needed when
2069                                          * multiple wait cs are used with offset
2070                                          * to wait on reserved encaps signals.
2071                                          */
2072                                         kref_get(&encaps_sig_hdl->refcount);
2073                                         break;
2074                                 }
2075                         }
2076                         spin_unlock(&ctx->sig_mgr.lock);
2077
2078                         if (!handle_found) {
2079                                 /* treat as signal CS already finished */
2080                                 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2081                                                 signal_seq);
2082                                 rc = 0;
2083                                 goto free_cs_chunk_array;
2084                         }
2085
2086                         /* validate also the signal offset value */
2087                         if (chunk->encaps_signal_offset >
2088                                         encaps_sig_hdl->count) {
2089                                 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2090                                                 chunk->encaps_signal_offset,
2091                                                 encaps_sig_hdl->count);
2092                                 rc = -EINVAL;
2093                                 goto free_cs_chunk_array;
2094                         }
2095                 }
2096
2097                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2098                 if (IS_ERR(sig_fence)) {
2099                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2100                         atomic64_inc(&cntr->validation_drop_cnt);
2101                         dev_err(hdev->dev,
2102                                 "Failed to get signal CS with seq 0x%llx\n",
2103                                 signal_seq);
2104                         rc = PTR_ERR(sig_fence);
2105                         goto free_cs_chunk_array;
2106                 }
2107
2108                 if (!sig_fence) {
2109                         /* signal CS already finished */
2110                         rc = 0;
2111                         goto free_cs_chunk_array;
2112                 }
2113
2114                 sig_waitcs_cmpl =
2115                         container_of(sig_fence, struct hl_cs_compl, base_fence);
2116
2117                 staged_cs_with_encaps_signals = !!
2118                                 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2119                                 (flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2120
2121                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2122                                 !staged_cs_with_encaps_signals) {
2123                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2124                         atomic64_inc(&cntr->validation_drop_cnt);
2125                         dev_err(hdev->dev,
2126                                 "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2127                                 signal_seq);
2128                         hl_fence_put(sig_fence);
2129                         rc = -EINVAL;
2130                         goto free_cs_chunk_array;
2131                 }
2132
2133                 if (completion_done(&sig_fence->completion)) {
2134                         /* signal CS already finished */
2135                         hl_fence_put(sig_fence);
2136                         rc = 0;
2137                         goto free_cs_chunk_array;
2138                 }
2139         }
2140
2141         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2142         if (rc) {
2143                 if (is_wait_cs)
2144                         hl_fence_put(sig_fence);
2145
2146                 goto free_cs_chunk_array;
2147         }
2148
2149         /*
2150          * Save the signal CS fence for later initialization right before
2151          * hanging the wait CS on the queue.
2152          * for encaps signals case, we save the cs sequence and handle pointer
2153          * for later initialization.
2154          */
2155         if (is_wait_cs) {
2156                 cs->signal_fence = sig_fence;
2157                 /* store the handle pointer, so we don't have to
2158                  * look for it again, later on the flow
2159                  * when we need to set SOB info in hw_queue.
2160                  */
2161                 if (cs->encaps_signals)
2162                         cs->encaps_sig_hdl = encaps_sig_hdl;
2163         }
2164
2165         hl_debugfs_add_cs(cs);
2166
2167         *cs_seq = cs->sequence;
2168
2169         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2170                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2171                                 q_idx, chunk->encaps_signal_offset);
2172         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2173                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2174                                 cs, q_idx, collective_engine_id,
2175                                 chunk->encaps_signal_offset);
2176         else {
2177                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2178                 atomic64_inc(&cntr->validation_drop_cnt);
2179                 rc = -EINVAL;
2180         }
2181
2182         if (rc)
2183                 goto free_cs_object;
2184
2185         rc = hl_hw_queue_schedule_cs(cs);
2186         if (rc) {
2187                 /* In case wait cs failed here, it means the signal cs
2188                  * already completed. we want to free all it's related objects
2189                  * but we don't want to fail the ioctl.
2190                  */
2191                 if (is_wait_cs)
2192                         rc = 0;
2193                 else if (rc != -EAGAIN)
2194                         dev_err(hdev->dev,
2195                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
2196                                 ctx->asid, cs->sequence, rc);
2197                 goto free_cs_object;
2198         }
2199
2200         *signal_sob_addr_offset = cs->sob_addr_offset;
2201         *signal_initial_sob_count = cs->initial_sob_count;
2202
2203         rc = HL_CS_STATUS_SUCCESS;
2204         if (is_wait_cs)
2205                 wait_cs_submitted = true;
2206         goto put_cs;
2207
2208 free_cs_object:
2209         cs_rollback(hdev, cs);
2210         *cs_seq = ULLONG_MAX;
2211         /* The path below is both for good and erroneous exits */
2212 put_cs:
2213         /* We finished with the CS in this function, so put the ref */
2214         cs_put(cs);
2215 free_cs_chunk_array:
2216         if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
2217                                                         is_wait_cs)
2218                 kref_put(&encaps_sig_hdl->refcount,
2219                                 hl_encaps_handle_do_release);
2220         kfree(cs_chunk_array);
2221 out:
2222         return rc;
2223 }
2224
2225 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
2226 {
2227         union hl_cs_args *args = data;
2228         enum hl_cs_type cs_type = 0;
2229         u64 cs_seq = ULONG_MAX;
2230         void __user *chunks;
2231         u32 num_chunks, flags, timeout,
2232                 signals_count = 0, sob_addr = 0, handle_id = 0;
2233         u16 sob_initial_count = 0;
2234         int rc;
2235
2236         rc = hl_cs_sanity_checks(hpriv, args);
2237         if (rc)
2238                 goto out;
2239
2240         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2241         if (rc)
2242                 goto out;
2243
2244         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2245                                         ~HL_CS_FLAGS_FORCE_RESTORE);
2246         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2247         num_chunks = args->in.num_chunks_execute;
2248         flags = args->in.cs_flags;
2249
2250         /* In case this is a staged CS, user should supply the CS sequence */
2251         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2252                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2253                 cs_seq = args->in.seq;
2254
2255         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2256                         ? msecs_to_jiffies(args->in.timeout * 1000)
2257                         : hpriv->hdev->timeout_jiffies;
2258
2259         switch (cs_type) {
2260         case CS_TYPE_SIGNAL:
2261         case CS_TYPE_WAIT:
2262         case CS_TYPE_COLLECTIVE_WAIT:
2263                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2264                                         &cs_seq, args->in.cs_flags, timeout,
2265                                         &sob_addr, &sob_initial_count);
2266                 break;
2267         case CS_RESERVE_SIGNALS:
2268                 rc = cs_ioctl_reserve_signals(hpriv,
2269                                         args->in.encaps_signals_q_idx,
2270                                         args->in.encaps_signals_count,
2271                                         &handle_id, &sob_addr, &signals_count);
2272                 break;
2273         case CS_UNRESERVE_SIGNALS:
2274                 rc = cs_ioctl_unreserve_signals(hpriv,
2275                                         args->in.encaps_sig_handle_id);
2276                 break;
2277         default:
2278                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2279                                                 args->in.cs_flags,
2280                                                 args->in.encaps_sig_handle_id,
2281                                                 timeout, &sob_initial_count);
2282                 break;
2283         }
2284 out:
2285         if (rc != -EAGAIN) {
2286                 memset(args, 0, sizeof(*args));
2287
2288                 switch (cs_type) {
2289                 case CS_RESERVE_SIGNALS:
2290                         args->out.handle_id = handle_id;
2291                         args->out.sob_base_addr_offset = sob_addr;
2292                         args->out.count = signals_count;
2293                         break;
2294                 case CS_TYPE_SIGNAL:
2295                         args->out.sob_base_addr_offset = sob_addr;
2296                         args->out.sob_count_before_submission = sob_initial_count;
2297                         args->out.seq = cs_seq;
2298                         break;
2299                 case CS_TYPE_DEFAULT:
2300                         args->out.sob_count_before_submission = sob_initial_count;
2301                         args->out.seq = cs_seq;
2302                         break;
2303                 default:
2304                         args->out.seq = cs_seq;
2305                         break;
2306                 }
2307
2308                 args->out.status = rc;
2309         }
2310
2311         return rc;
2312 }
2313
2314 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2315                                 enum hl_cs_wait_status *status, u64 timeout_us,
2316                                 s64 *timestamp)
2317 {
2318         struct hl_device *hdev = ctx->hdev;
2319         long completion_rc;
2320         int rc = 0;
2321
2322         if (IS_ERR(fence)) {
2323                 rc = PTR_ERR(fence);
2324                 if (rc == -EINVAL)
2325                         dev_notice_ratelimited(hdev->dev,
2326                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
2327                                 seq, ctx->cs_sequence);
2328                 return rc;
2329         }
2330
2331         if (!fence) {
2332                 dev_dbg(hdev->dev,
2333                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2334                                 seq, ctx->cs_sequence);
2335
2336                 *status = CS_WAIT_STATUS_GONE;
2337                 return 0;
2338         }
2339
2340         if (!timeout_us) {
2341                 completion_rc = completion_done(&fence->completion);
2342         } else {
2343                 unsigned long timeout;
2344
2345                 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2346                                 timeout_us : usecs_to_jiffies(timeout_us);
2347                 completion_rc =
2348                         wait_for_completion_interruptible_timeout(
2349                                 &fence->completion, timeout);
2350         }
2351
2352         if (completion_rc > 0) {
2353                 *status = CS_WAIT_STATUS_COMPLETED;
2354                 if (timestamp)
2355                         *timestamp = ktime_to_ns(fence->timestamp);
2356         } else {
2357                 *status = CS_WAIT_STATUS_BUSY;
2358         }
2359
2360         if (fence->error == -ETIMEDOUT)
2361                 rc = -ETIMEDOUT;
2362         else if (fence->error == -EIO)
2363                 rc = -EIO;
2364
2365         return rc;
2366 }
2367
2368 /*
2369  * hl_cs_poll_fences - iterate CS fences to check for CS completion
2370  *
2371  * @mcs_data: multi-CS internal data
2372  * @mcs_compl: multi-CS completion structure
2373  *
2374  * @return 0 on success, otherwise non 0 error code
2375  *
2376  * The function iterates on all CS sequence in the list and set bit in
2377  * completion_bitmap for each completed CS.
2378  * While iterating, the function sets the stream map of each fence in the fence
2379  * array in the completion QID stream map to be used by CSs to perform
2380  * completion to the multi-CS context.
2381  * This function shall be called after taking context ref
2382  */
2383 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2384 {
2385         struct hl_fence **fence_ptr = mcs_data->fence_arr;
2386         struct hl_device *hdev = mcs_data->ctx->hdev;
2387         int i, rc, arr_len = mcs_data->arr_len;
2388         u64 *seq_arr = mcs_data->seq_arr;
2389         ktime_t max_ktime, first_cs_time;
2390         enum hl_cs_wait_status status;
2391
2392         memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
2393
2394         /* get all fences under the same lock */
2395         rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2396         if (rc)
2397                 return rc;
2398
2399         /*
2400          * re-initialize the completion here to handle 2 possible cases:
2401          * 1. CS will complete the multi-CS prior clearing the completion. in which
2402          *    case the fence iteration is guaranteed to catch the CS completion.
2403          * 2. the completion will occur after re-init of the completion.
2404          *    in which case we will wake up immediately in wait_for_completion.
2405          */
2406         reinit_completion(&mcs_compl->completion);
2407
2408         /*
2409          * set to maximum time to verify timestamp is valid: if at the end
2410          * this value is maintained- no timestamp was updated
2411          */
2412         max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2413         first_cs_time = max_ktime;
2414
2415         for (i = 0; i < arr_len; i++, fence_ptr++) {
2416                 struct hl_fence *fence = *fence_ptr;
2417
2418                 /*
2419                  * In order to prevent case where we wait until timeout even though a CS associated
2420                  * with the multi-CS actually completed we do things in the below order:
2421                  * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2422                  *    any CS can, potentially, complete the multi CS for the specific QID (note
2423                  *    that once completion is initialized, calling complete* and then wait on the
2424                  *    completion will cause it to return at once)
2425                  * 2. only after allowing multi-CS completion for the specific QID we check whether
2426                  *    the specific CS already completed (and thus the wait for completion part will
2427                  *    be skipped). if the CS not completed it is guaranteed that completing CS will
2428                  *    wake up the completion.
2429                  */
2430                 if (fence)
2431                         mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2432
2433                 /*
2434                  * function won't sleep as it is called with timeout 0 (i.e.
2435                  * poll the fence)
2436                  */
2437                 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
2438                                                 &status, 0, NULL);
2439                 if (rc) {
2440                         dev_err(hdev->dev,
2441                                 "wait_for_fence error :%d for CS seq %llu\n",
2442                                                                 rc, seq_arr[i]);
2443                         break;
2444                 }
2445
2446                 switch (status) {
2447                 case CS_WAIT_STATUS_BUSY:
2448                         /* CS did not finished, QID to wait on already stored */
2449                         break;
2450                 case CS_WAIT_STATUS_COMPLETED:
2451                         /*
2452                          * Using mcs_handling_done to avoid possibility of mcs_data
2453                          * returns to user indicating CS completed before it finished
2454                          * all of its mcs handling, to avoid race the next time the
2455                          * user waits for mcs.
2456                          * note: when reaching this case fence is definitely not NULL
2457                          *       but NULL check was added to overcome static analysis
2458                          */
2459                         if (fence && !fence->mcs_handling_done) {
2460                                 /*
2461                                  * in case multi CS is completed but MCS handling not done
2462                                  * we "complete" the multi CS to prevent it from waiting
2463                                  * until time-out and the "multi-CS handling done" will have
2464                                  * another chance at the next iteration
2465                                  */
2466                                 complete_all(&mcs_compl->completion);
2467                                 break;
2468                         }
2469
2470                         mcs_data->completion_bitmap |= BIT(i);
2471                         /*
2472                          * For all completed CSs we take the earliest timestamp.
2473                          * For this we have to validate that the timestamp is
2474                          * earliest of all timestamps so far.
2475                          */
2476                         if (mcs_data->update_ts &&
2477                                         (ktime_compare(fence->timestamp, first_cs_time) < 0))
2478                                 first_cs_time = fence->timestamp;
2479                         break;
2480                 case CS_WAIT_STATUS_GONE:
2481                         mcs_data->update_ts = false;
2482                         mcs_data->gone_cs = true;
2483                         /*
2484                          * It is possible to get an old sequence numbers from user
2485                          * which related to already completed CSs and their fences
2486                          * already gone. In this case, CS set as completed but
2487                          * no need to consider its QID for mcs completion.
2488                          */
2489                         mcs_data->completion_bitmap |= BIT(i);
2490                         break;
2491                 default:
2492                         dev_err(hdev->dev, "Invalid fence status\n");
2493                         return -EINVAL;
2494                 }
2495
2496         }
2497
2498         hl_fences_put(mcs_data->fence_arr, arr_len);
2499
2500         if (mcs_data->update_ts &&
2501                         (ktime_compare(first_cs_time, max_ktime) != 0))
2502                 mcs_data->timestamp = ktime_to_ns(first_cs_time);
2503
2504         return rc;
2505 }
2506
2507 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2508                                 u64 timeout_us, u64 seq,
2509                                 enum hl_cs_wait_status *status, s64 *timestamp)
2510 {
2511         struct hl_fence *fence;
2512         int rc = 0;
2513
2514         if (timestamp)
2515                 *timestamp = 0;
2516
2517         hl_ctx_get(hdev, ctx);
2518
2519         fence = hl_ctx_get_fence(ctx, seq);
2520
2521         rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2522         hl_fence_put(fence);
2523         hl_ctx_put(ctx);
2524
2525         return rc;
2526 }
2527
2528 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2529 {
2530         if (usecs <= U32_MAX)
2531                 return usecs_to_jiffies(usecs);
2532
2533         /*
2534          * If the value in nanoseconds is larger than 64 bit, use the largest
2535          * 64 bit value.
2536          */
2537         if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2538                 return nsecs_to_jiffies(U64_MAX);
2539
2540         return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2541 }
2542
2543 /*
2544  * hl_wait_multi_cs_completion_init - init completion structure
2545  *
2546  * @hdev: pointer to habanalabs device structure
2547  * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2548  *                        master QID to wait on
2549  *
2550  * @return valid completion struct pointer on success, otherwise error pointer
2551  *
2552  * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2553  * the function gets the first available completion (by marking it "used")
2554  * and initialize its values.
2555  */
2556 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2557 {
2558         struct multi_cs_completion *mcs_compl;
2559         int i;
2560
2561         /* find free multi_cs completion structure */
2562         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2563                 mcs_compl = &hdev->multi_cs_completion[i];
2564                 spin_lock(&mcs_compl->lock);
2565                 if (!mcs_compl->used) {
2566                         mcs_compl->used = 1;
2567                         mcs_compl->timestamp = 0;
2568                         /*
2569                          * init QID map to 0 to avoid completion by CSs. the actual QID map
2570                          * to multi-CS CSs will be set incrementally at a later stage
2571                          */
2572                         mcs_compl->stream_master_qid_map = 0;
2573                         spin_unlock(&mcs_compl->lock);
2574                         break;
2575                 }
2576                 spin_unlock(&mcs_compl->lock);
2577         }
2578
2579         if (i == MULTI_CS_MAX_USER_CTX) {
2580                 dev_err(hdev->dev, "no available multi-CS completion structure\n");
2581                 return ERR_PTR(-ENOMEM);
2582         }
2583         return mcs_compl;
2584 }
2585
2586 /*
2587  * hl_wait_multi_cs_completion_fini - return completion structure and set as
2588  *                                    unused
2589  *
2590  * @mcs_compl: pointer to the completion structure
2591  */
2592 static void hl_wait_multi_cs_completion_fini(
2593                                         struct multi_cs_completion *mcs_compl)
2594 {
2595         /*
2596          * free completion structure, do it under lock to be in-sync with the
2597          * thread that signals completion
2598          */
2599         spin_lock(&mcs_compl->lock);
2600         mcs_compl->used = 0;
2601         spin_unlock(&mcs_compl->lock);
2602 }
2603
2604 /*
2605  * hl_wait_multi_cs_completion - wait for first CS to complete
2606  *
2607  * @mcs_data: multi-CS internal data
2608  *
2609  * @return 0 on success, otherwise non 0 error code
2610  */
2611 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2612                                                 struct multi_cs_completion *mcs_compl)
2613 {
2614         long completion_rc;
2615
2616         completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2617                                                                         mcs_data->timeout_jiffies);
2618
2619         /* update timestamp */
2620         if (completion_rc > 0)
2621                 mcs_data->timestamp = mcs_compl->timestamp;
2622
2623         mcs_data->wait_status = completion_rc;
2624
2625         return 0;
2626 }
2627
2628 /*
2629  * hl_multi_cs_completion_init - init array of multi-CS completion structures
2630  *
2631  * @hdev: pointer to habanalabs device structure
2632  */
2633 void hl_multi_cs_completion_init(struct hl_device *hdev)
2634 {
2635         struct multi_cs_completion *mcs_cmpl;
2636         int i;
2637
2638         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2639                 mcs_cmpl = &hdev->multi_cs_completion[i];
2640                 mcs_cmpl->used = 0;
2641                 spin_lock_init(&mcs_cmpl->lock);
2642                 init_completion(&mcs_cmpl->completion);
2643         }
2644 }
2645
2646 /*
2647  * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2648  *
2649  * @hpriv: pointer to the private data of the fd
2650  * @data: pointer to multi-CS wait ioctl in/out args
2651  *
2652  */
2653 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2654 {
2655         struct multi_cs_completion *mcs_compl;
2656         struct hl_device *hdev = hpriv->hdev;
2657         struct multi_cs_data mcs_data = {0};
2658         union hl_wait_cs_args *args = data;
2659         struct hl_ctx *ctx = hpriv->ctx;
2660         struct hl_fence **fence_arr;
2661         void __user *seq_arr;
2662         u32 size_to_copy;
2663         u64 *cs_seq_arr;
2664         u8 seq_arr_len;
2665         int rc;
2666
2667         if (!hdev->supports_wait_for_multi_cs) {
2668                 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
2669                 return -EPERM;
2670         }
2671
2672         seq_arr_len = args->in.seq_arr_len;
2673
2674         if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
2675                 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
2676                                 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
2677                 return -EINVAL;
2678         }
2679
2680         /* allocate memory for sequence array */
2681         cs_seq_arr =
2682                 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
2683         if (!cs_seq_arr)
2684                 return -ENOMEM;
2685
2686         /* copy CS sequence array from user */
2687         seq_arr = (void __user *) (uintptr_t) args->in.seq;
2688         size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
2689         if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
2690                 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
2691                 rc = -EFAULT;
2692                 goto free_seq_arr;
2693         }
2694
2695         /* allocate array for the fences */
2696         fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
2697         if (!fence_arr) {
2698                 rc = -ENOMEM;
2699                 goto free_seq_arr;
2700         }
2701
2702         /* initialize the multi-CS internal data */
2703         mcs_data.ctx = ctx;
2704         mcs_data.seq_arr = cs_seq_arr;
2705         mcs_data.fence_arr = fence_arr;
2706         mcs_data.arr_len = seq_arr_len;
2707
2708         hl_ctx_get(hdev, ctx);
2709
2710         /* wait (with timeout) for the first CS to be completed */
2711         mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
2712         mcs_compl = hl_wait_multi_cs_completion_init(hdev);
2713         if (IS_ERR(mcs_compl)) {
2714                 rc = PTR_ERR(mcs_compl);
2715                 goto put_ctx;
2716         }
2717
2718         /* poll all CS fences, extract timestamp */
2719         mcs_data.update_ts = true;
2720         rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2721         /*
2722          * skip wait for CS completion when one of the below is true:
2723          * - an error on the poll function
2724          * - one or more CS in the list completed
2725          * - the user called ioctl with timeout 0
2726          */
2727         if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
2728                 goto completion_fini;
2729
2730         while (true) {
2731                 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
2732                 if (rc || (mcs_data.wait_status == 0))
2733                         break;
2734
2735                 /*
2736                  * poll fences once again to update the CS map.
2737                  * no timestamp should be updated this time.
2738                  */
2739                 mcs_data.update_ts = false;
2740                 rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2741
2742                 if (mcs_data.completion_bitmap)
2743                         break;
2744
2745                 /*
2746                  * if hl_wait_multi_cs_completion returned before timeout (i.e.
2747                  * it got a completion) it either got completed by CS in the multi CS list
2748                  * (in which case the indication will be non empty completion_bitmap) or it
2749                  * got completed by CS submitted to one of the shared stream master but
2750                  * not in the multi CS list (in which case we should wait again but modify
2751                  * the timeout and set timestamp as zero to let a CS related to the current
2752                  * multi-CS set a new, relevant, timestamp)
2753                  */
2754                 mcs_data.timeout_jiffies = mcs_data.wait_status;
2755                 mcs_compl->timestamp = 0;
2756         }
2757
2758 completion_fini:
2759         hl_wait_multi_cs_completion_fini(mcs_compl);
2760
2761 put_ctx:
2762         hl_ctx_put(ctx);
2763         kfree(fence_arr);
2764
2765 free_seq_arr:
2766         kfree(cs_seq_arr);
2767
2768         if (rc)
2769                 return rc;
2770
2771         if (mcs_data.wait_status == -ERESTARTSYS) {
2772                 dev_err_ratelimited(hdev->dev,
2773                                 "user process got signal while waiting for Multi-CS\n");
2774                 return -EINTR;
2775         }
2776
2777         /* update output args */
2778         memset(args, 0, sizeof(*args));
2779
2780         if (mcs_data.completion_bitmap) {
2781                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2782                 args->out.cs_completion_map = mcs_data.completion_bitmap;
2783
2784                 /* if timestamp not 0- it's valid */
2785                 if (mcs_data.timestamp) {
2786                         args->out.timestamp_nsec = mcs_data.timestamp;
2787                         args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2788                 }
2789
2790                 /* update if some CS was gone */
2791                 if (!mcs_data.timestamp)
2792                         args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2793         } else {
2794                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2795         }
2796
2797         return 0;
2798 }
2799
2800 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2801 {
2802         struct hl_device *hdev = hpriv->hdev;
2803         union hl_wait_cs_args *args = data;
2804         enum hl_cs_wait_status status;
2805         u64 seq = args->in.seq;
2806         s64 timestamp;
2807         int rc;
2808
2809         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
2810                                 &status, &timestamp);
2811
2812         if (rc == -ERESTARTSYS) {
2813                 dev_err_ratelimited(hdev->dev,
2814                         "user process got signal while waiting for CS handle %llu\n",
2815                         seq);
2816                 return -EINTR;
2817         }
2818
2819         memset(args, 0, sizeof(*args));
2820
2821         if (rc) {
2822                 if (rc == -ETIMEDOUT) {
2823                         dev_err_ratelimited(hdev->dev,
2824                                 "CS %llu has timed-out while user process is waiting for it\n",
2825                                 seq);
2826                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
2827                 } else if (rc == -EIO) {
2828                         dev_err_ratelimited(hdev->dev,
2829                                 "CS %llu has been aborted while user process is waiting for it\n",
2830                                 seq);
2831                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
2832                 }
2833                 return rc;
2834         }
2835
2836         if (timestamp) {
2837                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2838                 args->out.timestamp_nsec = timestamp;
2839         }
2840
2841         switch (status) {
2842         case CS_WAIT_STATUS_GONE:
2843                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2844                 fallthrough;
2845         case CS_WAIT_STATUS_COMPLETED:
2846                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2847                 break;
2848         case CS_WAIT_STATUS_BUSY:
2849         default:
2850                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2851                 break;
2852         }
2853
2854         return 0;
2855 }
2856
2857 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2858                                 struct hl_cb_mgr *cb_mgr, u64 timeout_us,
2859                                 u64 cq_counters_handle, u64 cq_counters_offset,
2860                                 u64 target_value, struct hl_user_interrupt *interrupt,
2861                                 u32 *status,
2862                                 u64 *timestamp)
2863 {
2864         struct hl_user_pending_interrupt *pend;
2865         unsigned long timeout, flags;
2866         long completion_rc;
2867         struct hl_cb *cb;
2868         int rc = 0;
2869         u32 handle;
2870
2871         timeout = hl_usecs64_to_jiffies(timeout_us);
2872
2873         hl_ctx_get(hdev, ctx);
2874
2875         cq_counters_handle >>= PAGE_SHIFT;
2876         handle = (u32) cq_counters_handle;
2877
2878         cb = hl_cb_get(hdev, cb_mgr, handle);
2879         if (!cb) {
2880                 hl_ctx_put(ctx);
2881                 return -EINVAL;
2882         }
2883
2884         pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2885         if (!pend) {
2886                 hl_cb_put(cb);
2887                 hl_ctx_put(ctx);
2888                 return -ENOMEM;
2889         }
2890
2891         hl_fence_init(&pend->fence, ULONG_MAX);
2892
2893         pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
2894         pend->cq_target_value = target_value;
2895
2896         /* We check for completion value as interrupt could have been received
2897          * before we added the node to the wait list
2898          */
2899         if (*pend->cq_kernel_addr >= target_value) {
2900                 *status = HL_WAIT_CS_STATUS_COMPLETED;
2901                 /* There was no interrupt, we assume the completion is now. */
2902                 pend->fence.timestamp = ktime_get();
2903         }
2904
2905         if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
2906                 goto set_timestamp;
2907
2908         /* Add pending user interrupt to relevant list for the interrupt
2909          * handler to monitor
2910          */
2911         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2912         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2913         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2914
2915         /* Wait for interrupt handler to signal completion */
2916         completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
2917                                                                 timeout);
2918         if (completion_rc > 0) {
2919                 *status = HL_WAIT_CS_STATUS_COMPLETED;
2920         } else {
2921                 if (completion_rc == -ERESTARTSYS) {
2922                         dev_err_ratelimited(hdev->dev,
2923                                         "user process got signal while waiting for interrupt ID %d\n",
2924                                         interrupt->interrupt_id);
2925                         rc = -EINTR;
2926                         *status = HL_WAIT_CS_STATUS_ABORTED;
2927                 } else {
2928                         if (pend->fence.error == -EIO) {
2929                                 dev_err_ratelimited(hdev->dev,
2930                                                 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
2931                                                 pend->fence.error);
2932                                 rc = -EIO;
2933                                 *status = HL_WAIT_CS_STATUS_ABORTED;
2934                         } else {
2935                                 dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
2936                                                 interrupt->interrupt_id);
2937                                 rc = -ETIMEDOUT;
2938                         }
2939                         *status = HL_WAIT_CS_STATUS_BUSY;
2940                 }
2941         }
2942
2943         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2944         list_del(&pend->wait_list_node);
2945         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2946
2947 set_timestamp:
2948         *timestamp = ktime_to_ns(pend->fence.timestamp);
2949
2950         kfree(pend);
2951         hl_cb_put(cb);
2952         hl_ctx_put(ctx);
2953
2954         return rc;
2955 }
2956
2957 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
2958                                 u64 timeout_us, u64 user_address,
2959                                 u64 target_value, struct hl_user_interrupt *interrupt,
2960
2961                                 u32 *status,
2962                                 u64 *timestamp)
2963 {
2964         struct hl_user_pending_interrupt *pend;
2965         unsigned long timeout, flags;
2966         u64 completion_value;
2967         long completion_rc;
2968         int rc = 0;
2969
2970         timeout = hl_usecs64_to_jiffies(timeout_us);
2971
2972         hl_ctx_get(hdev, ctx);
2973
2974         pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2975         if (!pend) {
2976                 hl_ctx_put(ctx);
2977                 return -ENOMEM;
2978         }
2979
2980         hl_fence_init(&pend->fence, ULONG_MAX);
2981
2982         /* Add pending user interrupt to relevant list for the interrupt
2983          * handler to monitor
2984          */
2985         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2986         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2987         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2988
2989         /* We check for completion value as interrupt could have been received
2990          * before we added the node to the wait list
2991          */
2992         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
2993                 dev_err(hdev->dev, "Failed to copy completion value from user\n");
2994                 rc = -EFAULT;
2995                 goto remove_pending_user_interrupt;
2996         }
2997
2998         if (completion_value >= target_value) {
2999                 *status = HL_WAIT_CS_STATUS_COMPLETED;
3000                 /* There was no interrupt, we assume the completion is now. */
3001                 pend->fence.timestamp = ktime_get();
3002         } else {
3003                 *status = HL_WAIT_CS_STATUS_BUSY;
3004         }
3005
3006         if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3007                 goto remove_pending_user_interrupt;
3008
3009 wait_again:
3010         /* Wait for interrupt handler to signal completion */
3011         completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3012                                                                                 timeout);
3013
3014         /* If timeout did not expire we need to perform the comparison.
3015          * If comparison fails, keep waiting until timeout expires
3016          */
3017         if (completion_rc > 0) {
3018                 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3019                 /* reinit_completion must be called before we check for user
3020                  * completion value, otherwise, if interrupt is received after
3021                  * the comparison and before the next wait_for_completion,
3022                  * we will reach timeout and fail
3023                  */
3024                 reinit_completion(&pend->fence.completion);
3025                 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3026
3027                 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3028                         dev_err(hdev->dev, "Failed to copy completion value from user\n");
3029                         rc = -EFAULT;
3030
3031                         goto remove_pending_user_interrupt;
3032                 }
3033
3034                 if (completion_value >= target_value) {
3035                         *status = HL_WAIT_CS_STATUS_COMPLETED;
3036                 } else if (pend->fence.error) {
3037                         dev_err_ratelimited(hdev->dev,
3038                                 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3039                                 pend->fence.error);
3040                         /* set the command completion status as ABORTED */
3041                         *status = HL_WAIT_CS_STATUS_ABORTED;
3042                 } else {
3043                         timeout = completion_rc;
3044                         goto wait_again;
3045                 }
3046         } else if (completion_rc == -ERESTARTSYS) {
3047                 dev_err_ratelimited(hdev->dev,
3048                         "user process got signal while waiting for interrupt ID %d\n",
3049                         interrupt->interrupt_id);
3050                 rc = -EINTR;
3051         } else {
3052                 *status = HL_WAIT_CS_STATUS_BUSY;
3053         }
3054
3055 remove_pending_user_interrupt:
3056         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3057         list_del(&pend->wait_list_node);
3058         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3059
3060         *timestamp = ktime_to_ns(pend->fence.timestamp);
3061
3062         kfree(pend);
3063         hl_ctx_put(ctx);
3064
3065         return rc;
3066 }
3067
3068 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3069 {
3070         u16 interrupt_id, first_interrupt, last_interrupt;
3071         struct hl_device *hdev = hpriv->hdev;
3072         struct asic_fixed_properties *prop;
3073         struct hl_user_interrupt *interrupt;
3074         union hl_wait_cs_args *args = data;
3075         u32 status = HL_WAIT_CS_STATUS_BUSY;
3076         u64 timestamp;
3077         int rc;
3078
3079         prop = &hdev->asic_prop;
3080
3081         if (!prop->user_interrupt_count) {
3082                 dev_err(hdev->dev, "no user interrupts allowed");
3083                 return -EPERM;
3084         }
3085
3086         interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3087
3088         first_interrupt = prop->first_available_user_msix_interrupt;
3089         last_interrupt = prop->first_available_user_msix_interrupt +
3090                                                 prop->user_interrupt_count - 1;
3091
3092         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
3093                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
3094                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3095                 return -EINVAL;
3096         }
3097
3098         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
3099                 interrupt = &hdev->common_user_interrupt;
3100         else
3101                 interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
3102
3103         if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
3104                 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
3105                                 args->in.interrupt_timeout_us, args->in.cq_counters_handle,
3106                                 args->in.cq_counters_offset,
3107                                 args->in.target, interrupt, &status,
3108                                 &timestamp);
3109         else
3110                 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3111                                 args->in.interrupt_timeout_us, args->in.addr,
3112                                 args->in.target, interrupt, &status,
3113                                 &timestamp);
3114         if (rc) {
3115                 if (rc != -EINTR)
3116                         dev_err_ratelimited(hdev->dev,
3117                                 "interrupt_wait_ioctl failed (%d)\n", rc);
3118
3119                 return rc;
3120         }
3121
3122         memset(args, 0, sizeof(*args));
3123         args->out.status = status;
3124
3125         if (timestamp) {
3126                 args->out.timestamp_nsec = timestamp;
3127                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3128         }
3129
3130         return 0;
3131 }
3132
3133 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3134 {
3135         union hl_wait_cs_args *args = data;
3136         u32 flags = args->in.flags;
3137         int rc;
3138
3139         /* If the device is not operational, no point in waiting for any command submission or
3140          * user interrupt
3141          */
3142         if (!hl_device_operational(hpriv->hdev, NULL))
3143                 return -EBUSY;
3144
3145         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3146                 rc = hl_interrupt_wait_ioctl(hpriv, data);
3147         else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3148                 rc = hl_multi_cs_wait_ioctl(hpriv, data);
3149         else
3150                 rc = hl_cs_wait_ioctl(hpriv, data);
3151
3152         return rc;
3153 }