drivers/misc/habanalabs/common/command_submission.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2021 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include <uapi/misc/habanalabs.h>
   9 #include "habanalabs.h"
  10
  11 #include <linux/uaccess.h>
  12 #include <linux/slab.h>
  13
  14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
  15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
  16
  17 /**
  18  * enum hl_cs_wait_status - cs wait status
  19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
  20  * @CS_WAIT_STATUS_COMPLETED: cs completed
  21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
  22  */
  23 enum hl_cs_wait_status {
  24         CS_WAIT_STATUS_BUSY,
  25         CS_WAIT_STATUS_COMPLETED,
  26         CS_WAIT_STATUS_GONE
  27 };
  28
  29 static void job_wq_completion(struct work_struct *work);
  30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
  31                                 u64 timeout_us, u64 seq,
  32                                 enum hl_cs_wait_status *status, s64 *timestamp);
  33 static void cs_do_release(struct kref *ref);
  34
  35 static void hl_sob_reset(struct kref *ref)
  36 {
  37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  38                                                         kref);
  39         struct hl_device *hdev = hw_sob->hdev;
  40
  41         dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
  42
  43         hdev->asic_funcs->reset_sob(hdev, hw_sob);
  44
  45         hw_sob->need_reset = false;
  46 }
  47
  48 void hl_sob_reset_error(struct kref *ref)
  49 {
  50         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  51                                                         kref);
  52         struct hl_device *hdev = hw_sob->hdev;
  53
  54         dev_crit(hdev->dev,
  55                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
  56                 hw_sob->q_idx, hw_sob->sob_id);
  57 }
  58
  59 void hw_sob_put(struct hl_hw_sob *hw_sob)
  60 {
  61         if (hw_sob)
  62                 kref_put(&hw_sob->kref, hl_sob_reset);
  63 }
  64
  65 static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
  66 {
  67         if (hw_sob)
  68                 kref_put(&hw_sob->kref, hl_sob_reset_error);
  69 }
  70
  71 void hw_sob_get(struct hl_hw_sob *hw_sob)
  72 {
  73         if (hw_sob)
  74                 kref_get(&hw_sob->kref);
  75 }
  76
  77 /**
  78  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
  79  * @sob_base: sob base id
  80  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
  81  * @mask: generated mask
  82  *
  83  * Return: 0 if given parameters are valid
  84  */
  85 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
  86 {
  87         int i;
  88
  89         if (sob_mask == 0)
  90                 return -EINVAL;
  91
  92         if (sob_mask == 0x1) {
  93                 *mask = ~(1 << (sob_base & 0x7));
  94         } else {
  95                 /* find msb in order to verify sob range is valid */
  96                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
  97                         if (BIT(i) & sob_mask)
  98                                 break;
  99
 100                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
 101                         return -EINVAL;
 102
 103                 *mask = ~sob_mask;
 104         }
 105
 106         return 0;
 107 }
 108
 109 static void hl_fence_release(struct kref *kref)
 110 {
 111         struct hl_fence *fence =
 112                 container_of(kref, struct hl_fence, refcount);
 113         struct hl_cs_compl *hl_cs_cmpl =
 114                 container_of(fence, struct hl_cs_compl, base_fence);
 115
 116         kfree(hl_cs_cmpl);
 117 }
 118
 119 void hl_fence_put(struct hl_fence *fence)
 120 {
 121         if (IS_ERR_OR_NULL(fence))
 122                 return;
 123         kref_put(&fence->refcount, hl_fence_release);
 124 }
 125
 126 void hl_fences_put(struct hl_fence **fence, int len)
 127 {
 128         int i;
 129
 130         for (i = 0; i < len; i++, fence++)
 131                 hl_fence_put(*fence);
 132 }
 133
 134 void hl_fence_get(struct hl_fence *fence)
 135 {
 136         if (fence)
 137                 kref_get(&fence->refcount);
 138 }
 139
 140 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 141 {
 142         kref_init(&fence->refcount);
 143         fence->cs_sequence = sequence;
 144         fence->error = 0;
 145         fence->timestamp = ktime_set(0, 0);
 146         fence->mcs_handling_done = false;
 147         init_completion(&fence->completion);
 148 }
 149
 150 void cs_get(struct hl_cs *cs)
 151 {
 152         kref_get(&cs->refcount);
 153 }
 154
 155 static int cs_get_unless_zero(struct hl_cs *cs)
 156 {
 157         return kref_get_unless_zero(&cs->refcount);
 158 }
 159
 160 static void cs_put(struct hl_cs *cs)
 161 {
 162         kref_put(&cs->refcount, cs_do_release);
 163 }
 164
 165 static void cs_job_do_release(struct kref *ref)
 166 {
 167         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
 168
 169         kfree(job);
 170 }
 171
 172 static void cs_job_put(struct hl_cs_job *job)
 173 {
 174         kref_put(&job->refcount, cs_job_do_release);
 175 }
 176
 177 bool cs_needs_completion(struct hl_cs *cs)
 178 {
 179         /* In case this is a staged CS, only the last CS in sequence should
 180          * get a completion, any non staged CS will always get a completion
 181          */
 182         if (cs->staged_cs && !cs->staged_last)
 183                 return false;
 184
 185         return true;
 186 }
 187
 188 bool cs_needs_timeout(struct hl_cs *cs)
 189 {
 190         /* In case this is a staged CS, only the first CS in sequence should
 191          * get a timeout, any non staged CS will always get a timeout
 192          */
 193         if (cs->staged_cs && !cs->staged_first)
 194                 return false;
 195
 196         return true;
 197 }
 198
 199 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 200 {
 201         /*
 202          * Patched CB is created for external queues jobs, and for H/W queues
 203          * jobs if the user CB was allocated by driver and MMU is disabled.
 204          */
 205         return (job->queue_type == QUEUE_TYPE_EXT ||
 206                         (job->queue_type == QUEUE_TYPE_HW &&
 207                                         job->is_kernel_allocated_cb &&
 208                                         !hdev->mmu_enable));
 209 }
 210
 211 /*
 212  * cs_parser - parse the user command submission
 213  *
 214  * @hpriv       : pointer to the private data of the fd
 215  * @job        : pointer to the job that holds the command submission info
 216  *
 217  * The function parses the command submission of the user. It calls the
 218  * ASIC specific parser, which returns a list of memory blocks to send
 219  * to the device as different command buffers
 220  *
 221  */
 222 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 223 {
 224         struct hl_device *hdev = hpriv->hdev;
 225         struct hl_cs_parser parser;
 226         int rc;
 227
 228         parser.ctx_id = job->cs->ctx->asid;
 229         parser.cs_sequence = job->cs->sequence;
 230         parser.job_id = job->id;
 231
 232         parser.hw_queue_id = job->hw_queue_id;
 233         parser.job_userptr_list = &job->userptr_list;
 234         parser.patched_cb = NULL;
 235         parser.user_cb = job->user_cb;
 236         parser.user_cb_size = job->user_cb_size;
 237         parser.queue_type = job->queue_type;
 238         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 239         job->patched_cb = NULL;
 240         parser.completion = cs_needs_completion(job->cs);
 241
 242         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 243
 244         if (is_cb_patched(hdev, job)) {
 245                 if (!rc) {
 246                         job->patched_cb = parser.patched_cb;
 247                         job->job_cb_size = parser.patched_cb_size;
 248                         job->contains_dma_pkt = parser.contains_dma_pkt;
 249                         atomic_inc(&job->patched_cb->cs_cnt);
 250                 }
 251
 252                 /*
 253                  * Whether the parsing worked or not, we don't need the
 254                  * original CB anymore because it was already parsed and
 255                  * won't be accessed again for this CS
 256                  */
 257                 atomic_dec(&job->user_cb->cs_cnt);
 258                 hl_cb_put(job->user_cb);
 259                 job->user_cb = NULL;
 260         } else if (!rc) {
 261                 job->job_cb_size = job->user_cb_size;
 262         }
 263
 264         return rc;
 265 }
 266
 267 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 268 {
 269         struct hl_cs *cs = job->cs;
 270
 271         if (is_cb_patched(hdev, job)) {
 272                 hl_userptr_delete_list(hdev, &job->userptr_list);
 273
 274                 /*
 275                  * We might arrive here from rollback and patched CB wasn't
 276                  * created, so we need to check it's not NULL
 277                  */
 278                 if (job->patched_cb) {
 279                         atomic_dec(&job->patched_cb->cs_cnt);
 280                         hl_cb_put(job->patched_cb);
 281                 }
 282         }
 283
 284         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
 285          * enabled, the user CB isn't released in cs_parser() and thus should be
 286          * released here.
 287          * This is also true for INT queues jobs which were allocated by driver
 288          */
 289         if (job->is_kernel_allocated_cb &&
 290                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
 291                                 job->queue_type == QUEUE_TYPE_INT)) {
 292                 atomic_dec(&job->user_cb->cs_cnt);
 293                 hl_cb_put(job->user_cb);
 294         }
 295
 296         /*
 297          * This is the only place where there can be multiple threads
 298          * modifying the list at the same time
 299          */
 300         spin_lock(&cs->job_lock);
 301         list_del(&job->cs_node);
 302         spin_unlock(&cs->job_lock);
 303
 304         hl_debugfs_remove_job(hdev, job);
 305
 306         /* We decrement reference only for a CS that gets completion
 307          * because the reference was incremented only for this kind of CS
 308          * right before it was scheduled.
 309          *
 310          * In staged submission, only the last CS marked as 'staged_last'
 311          * gets completion, hence its release function will be called from here.
 312          * As for all the rest CS's in the staged submission which do not get
 313          * completion, their CS reference will be decremented by the
 314          * 'staged_last' CS during the CS release flow.
 315          * All relevant PQ CI counters will be incremented during the CS release
 316          * flow by calling 'hl_hw_queue_update_ci'.
 317          */
 318         if (cs_needs_completion(cs) &&
 319                 (job->queue_type == QUEUE_TYPE_EXT ||
 320                         job->queue_type == QUEUE_TYPE_HW))
 321                 cs_put(cs);
 322
 323         cs_job_put(job);
 324 }
 325
 326 /*
 327  * hl_staged_cs_find_first - locate the first CS in this staged submission
 328  *
 329  * @hdev: pointer to device structure
 330  * @cs_seq: staged submission sequence number
 331  *
 332  * @note: This function must be called under 'hdev->cs_mirror_lock'
 333  *
 334  * Find and return a CS pointer with the given sequence
 335  */
 336 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
 337 {
 338         struct hl_cs *cs;
 339
 340         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
 341                 if (cs->staged_cs && cs->staged_first &&
 342                                 cs->sequence == cs_seq)
 343                         return cs;
 344
 345         return NULL;
 346 }
 347
 348 /*
 349  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
 350  *
 351  * @hdev: pointer to device structure
 352  * @cs: staged submission member
 353  *
 354  */
 355 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
 356 {
 357         struct hl_cs *last_entry;
 358
 359         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
 360                                                                 staged_cs_node);
 361
 362         if (last_entry->staged_last)
 363                 return true;
 364
 365         return false;
 366 }
 367
 368 /*
 369  * staged_cs_get - get CS reference if this CS is a part of a staged CS
 370  *
 371  * @hdev: pointer to device structure
 372  * @cs: current CS
 373  * @cs_seq: staged submission sequence number
 374  *
 375  * Increment CS reference for every CS in this staged submission except for
 376  * the CS which get completion.
 377  */
 378 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
 379 {
 380         /* Only the last CS in this staged submission will get a completion.
 381          * We must increment the reference for all other CS's in this
 382          * staged submission.
 383          * Once we get a completion we will release the whole staged submission.
 384          */
 385         if (!cs->staged_last)
 386                 cs_get(cs);
 387 }
 388
 389 /*
 390  * staged_cs_put - put a CS in case it is part of staged submission
 391  *
 392  * @hdev: pointer to device structure
 393  * @cs: CS to put
 394  *
 395  * This function decrements a CS reference (for a non completion CS)
 396  */
 397 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
 398 {
 399         /* We release all CS's in a staged submission except the last
 400          * CS which we have never incremented its reference.
 401          */
 402         if (!cs_needs_completion(cs))
 403                 cs_put(cs);
 404 }
 405
 406 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 407 {
 408         bool next_entry_found = false;
 409         struct hl_cs *next, *first_cs;
 410
 411         if (!cs_needs_timeout(cs))
 412                 return;
 413
 414         spin_lock(&hdev->cs_mirror_lock);
 415
 416         /* We need to handle tdr only once for the complete staged submission.
 417          * Hence, we choose the CS that reaches this function first which is
 418          * the CS marked as 'staged_last'.
 419          * In case single staged cs was submitted which has both first and last
 420          * indications, then "cs_find_first" below will return NULL, since we
 421          * removed the cs node from the list before getting here,
 422          * in such cases just continue with the cs to cancel it's TDR work.
 423          */
 424         if (cs->staged_cs && cs->staged_last) {
 425                 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
 426                 if (first_cs)
 427                         cs = first_cs;
 428         }
 429
 430         spin_unlock(&hdev->cs_mirror_lock);
 431
 432         /* Don't cancel TDR in case this CS was timedout because we might be
 433          * running from the TDR context
 434          */
 435         if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
 436                 return;
 437
 438         if (cs->tdr_active)
 439                 cancel_delayed_work_sync(&cs->work_tdr);
 440
 441         spin_lock(&hdev->cs_mirror_lock);
 442
 443         /* queue TDR for next CS */
 444         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
 445                 if (cs_needs_timeout(next)) {
 446                         next_entry_found = true;
 447                         break;
 448                 }
 449
 450         if (next_entry_found && !next->tdr_active) {
 451                 next->tdr_active = true;
 452                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
 453         }
 454
 455         spin_unlock(&hdev->cs_mirror_lock);
 456 }
 457
 458 /*
 459  * force_complete_multi_cs - complete all contexts that wait on multi-CS
 460  *
 461  * @hdev: pointer to habanalabs device structure
 462  */
 463 static void force_complete_multi_cs(struct hl_device *hdev)
 464 {
 465         int i;
 466
 467         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
 468                 struct multi_cs_completion *mcs_compl;
 469
 470                 mcs_compl = &hdev->multi_cs_completion[i];
 471
 472                 spin_lock(&mcs_compl->lock);
 473
 474                 if (!mcs_compl->used) {
 475                         spin_unlock(&mcs_compl->lock);
 476                         continue;
 477                 }
 478
 479                 /* when calling force complete no context should be waiting on
 480                  * multi-cS.
 481                  * We are calling the function as a protection for such case
 482                  * to free any pending context and print error message
 483                  */
 484                 dev_err(hdev->dev,
 485                                 "multi-CS completion context %d still waiting when calling force completion\n",
 486                                 i);
 487                 complete_all(&mcs_compl->completion);
 488                 spin_unlock(&mcs_compl->lock);
 489         }
 490 }
 491
 492 /*
 493  * complete_multi_cs - complete all waiting entities on multi-CS
 494  *
 495  * @hdev: pointer to habanalabs device structure
 496  * @cs: CS structure
 497  * The function signals a waiting entity that has an overlapping stream masters
 498  * with the completed CS.
 499  * For example:
 500  * - a completed CS worked on stream master QID 4, multi CS completion
 501  *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
 502  *   common stream master QID
 503  * - a completed CS worked on stream master QID 4, multi CS completion
 504  *   is actively waiting on stream master QIDs 3, 4. send signal as stream
 505  *   master QID 4 is common
 506  */
 507 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
 508 {
 509         struct hl_fence *fence = cs->fence;
 510         int i;
 511
 512         /* in case of multi CS check for completion only for the first CS */
 513         if (cs->staged_cs && !cs->staged_first)
 514                 return;
 515
 516         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
 517                 struct multi_cs_completion *mcs_compl;
 518
 519                 mcs_compl = &hdev->multi_cs_completion[i];
 520                 if (!mcs_compl->used)
 521                         continue;
 522
 523                 spin_lock(&mcs_compl->lock);
 524
 525                 /*
 526                  * complete if:
 527                  * 1. still waiting for completion
 528                  * 2. the completed CS has at least one overlapping stream
 529                  *    master with the stream masters in the completion
 530                  */
 531                 if (mcs_compl->used &&
 532                                 (fence->stream_master_qid_map &
 533                                         mcs_compl->stream_master_qid_map)) {
 534                         /* extract the timestamp only of first completed CS */
 535                         if (!mcs_compl->timestamp)
 536                                 mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
 537
 538                         complete_all(&mcs_compl->completion);
 539
 540                         /*
 541                          * Setting mcs_handling_done inside the lock ensures
 542                          * at least one fence have mcs_handling_done set to
 543                          * true before wait for mcs finish. This ensures at
 544                          * least one CS will be set as completed when polling
 545                          * mcs fences.
 546                          */
 547                         fence->mcs_handling_done = true;
 548                 }
 549
 550                 spin_unlock(&mcs_compl->lock);
 551         }
 552         /* In case CS completed without mcs completion initialized */
 553         fence->mcs_handling_done = true;
 554 }
 555
 556 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
 557                                         struct hl_cs *cs,
 558                                         struct hl_cs_compl *hl_cs_cmpl)
 559 {
 560         /* Skip this handler if the cs wasn't submitted, to avoid putting
 561          * the hw_sob twice, since this case already handled at this point,
 562          * also skip if the hw_sob pointer wasn't set.
 563          */
 564         if (!hl_cs_cmpl->hw_sob || !cs->submitted)
 565                 return;
 566
 567         spin_lock(&hl_cs_cmpl->lock);
 568
 569         /*
 570          * we get refcount upon reservation of signals or signal/wait cs for the
 571          * hw_sob object, and need to put it when the first staged cs
 572          * (which cotains the encaps signals) or cs signal/wait is completed.
 573          */
 574         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 575                         (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
 576                         (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
 577                         (!!hl_cs_cmpl->encaps_signals)) {
 578                 dev_dbg(hdev->dev,
 579                                 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
 580                                 hl_cs_cmpl->cs_seq,
 581                                 hl_cs_cmpl->type,
 582                                 hl_cs_cmpl->hw_sob->sob_id,
 583                                 hl_cs_cmpl->sob_val);
 584
 585                 hw_sob_put(hl_cs_cmpl->hw_sob);
 586
 587                 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
 588                         hdev->asic_funcs->reset_sob_group(hdev,
 589                                         hl_cs_cmpl->sob_group);
 590         }
 591
 592         spin_unlock(&hl_cs_cmpl->lock);
 593 }
 594
 595 static void cs_do_release(struct kref *ref)
 596 {
 597         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
 598         struct hl_device *hdev = cs->ctx->hdev;
 599         struct hl_cs_job *job, *tmp;
 600         struct hl_cs_compl *hl_cs_cmpl =
 601                         container_of(cs->fence, struct hl_cs_compl, base_fence);
 602
 603         cs->completed = true;
 604
 605         /*
 606          * Although if we reached here it means that all external jobs have
 607          * finished, because each one of them took refcnt to CS, we still
 608          * need to go over the internal jobs and complete them. Otherwise, we
 609          * will have leaked memory and what's worse, the CS object (and
 610          * potentially the CTX object) could be released, while the JOB
 611          * still holds a pointer to them (but no reference).
 612          */
 613         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 614                 complete_job(hdev, job);
 615
 616         if (!cs->submitted) {
 617                 /*
 618                  * In case the wait for signal CS was submitted, the fence put
 619                  * occurs in init_signal_wait_cs() or collective_wait_init_cs()
 620                  * right before hanging on the PQ.
 621                  */
 622                 if (cs->type == CS_TYPE_WAIT ||
 623                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
 624                         hl_fence_put(cs->signal_fence);
 625
 626                 goto out;
 627         }
 628
 629         /* Need to update CI for all queue jobs that does not get completion */
 630         hl_hw_queue_update_ci(cs);
 631
 632         /* remove CS from CS mirror list */
 633         spin_lock(&hdev->cs_mirror_lock);
 634         list_del_init(&cs->mirror_node);
 635         spin_unlock(&hdev->cs_mirror_lock);
 636
 637         cs_handle_tdr(hdev, cs);
 638
 639         if (cs->staged_cs) {
 640                 /* the completion CS decrements reference for the entire
 641                  * staged submission
 642                  */
 643                 if (cs->staged_last) {
 644                         struct hl_cs *staged_cs, *tmp;
 645
 646                         list_for_each_entry_safe(staged_cs, tmp,
 647                                         &cs->staged_cs_node, staged_cs_node)
 648                                 staged_cs_put(hdev, staged_cs);
 649                 }
 650
 651                 /* A staged CS will be a member in the list only after it
 652                  * was submitted. We used 'cs_mirror_lock' when inserting
 653                  * it to list so we will use it again when removing it
 654                  */
 655                 if (cs->submitted) {
 656                         spin_lock(&hdev->cs_mirror_lock);
 657                         list_del(&cs->staged_cs_node);
 658                         spin_unlock(&hdev->cs_mirror_lock);
 659                 }
 660
 661                 /* decrement refcount to handle when first staged cs
 662                  * with encaps signals is completed.
 663                  */
 664                 if (hl_cs_cmpl->encaps_signals)
 665                         kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
 666                                                 hl_encaps_handle_do_release);
 667         }
 668
 669         if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
 670                         && cs->encaps_signals)
 671                 kref_put(&cs->encaps_sig_hdl->refcount,
 672                                         hl_encaps_handle_do_release);
 673
 674 out:
 675         /* Must be called before hl_ctx_put because inside we use ctx to get
 676          * the device
 677          */
 678         hl_debugfs_remove_cs(cs);
 679
 680         hl_ctx_put(cs->ctx);
 681
 682         /* We need to mark an error for not submitted because in that case
 683          * the hl fence release flow is different. Mainly, we don't need
 684          * to handle hw_sob for signal/wait
 685          */
 686         if (cs->timedout)
 687                 cs->fence->error = -ETIMEDOUT;
 688         else if (cs->aborted)
 689                 cs->fence->error = -EIO;
 690         else if (!cs->submitted)
 691                 cs->fence->error = -EBUSY;
 692
 693         if (unlikely(cs->skip_reset_on_timeout)) {
 694                 dev_err(hdev->dev,
 695                         "Command submission %llu completed after %llu (s)\n",
 696                         cs->sequence,
 697                         div_u64(jiffies - cs->submission_time_jiffies, HZ));
 698         }
 699
 700         if (cs->timestamp)
 701                 cs->fence->timestamp = ktime_get();
 702         complete_all(&cs->fence->completion);
 703         complete_multi_cs(hdev, cs);
 704
 705         cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
 706
 707         hl_fence_put(cs->fence);
 708
 709         kfree(cs->jobs_in_queue_cnt);
 710         kfree(cs);
 711 }
 712
 713 static void cs_timedout(struct work_struct *work)
 714 {
 715         struct hl_device *hdev;
 716         int rc;
 717         struct hl_cs *cs = container_of(work, struct hl_cs,
 718                                                  work_tdr.work);
 719         bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
 720
 721         rc = cs_get_unless_zero(cs);
 722         if (!rc)
 723                 return;
 724
 725         if ((!cs->submitted) || (cs->completed)) {
 726                 cs_put(cs);
 727                 return;
 728         }
 729
 730         /* Mark the CS is timed out so we won't try to cancel its TDR */
 731         if (likely(!skip_reset_on_timeout))
 732                 cs->timedout = true;
 733
 734         hdev = cs->ctx->hdev;
 735
 736         /* Save only the first CS timeout parameters */
 737         rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
 738         if (!rc) {
 739                 hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
 740                 hdev->last_error.cs_timeout_timestamp = ktime_get();
 741                 hdev->last_error.cs_timeout_seq = cs->sequence;
 742         }
 743
 744         switch (cs->type) {
 745         case CS_TYPE_SIGNAL:
 746                 dev_err(hdev->dev,
 747                         "Signal command submission %llu has not finished in time!\n",
 748                         cs->sequence);
 749                 break;
 750
 751         case CS_TYPE_WAIT:
 752                 dev_err(hdev->dev,
 753                         "Wait command submission %llu has not finished in time!\n",
 754                         cs->sequence);
 755                 break;
 756
 757         case CS_TYPE_COLLECTIVE_WAIT:
 758                 dev_err(hdev->dev,
 759                         "Collective Wait command submission %llu has not finished in time!\n",
 760                         cs->sequence);
 761                 break;
 762
 763         default:
 764                 dev_err(hdev->dev,
 765                         "Command submission %llu has not finished in time!\n",
 766                         cs->sequence);
 767                 break;
 768         }
 769
 770         rc = hl_state_dump(hdev);
 771         if (rc)
 772                 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
 773
 774         cs_put(cs);
 775
 776         if (likely(!skip_reset_on_timeout)) {
 777                 if (hdev->reset_on_lockup)
 778                         hl_device_reset(hdev, HL_DRV_RESET_TDR);
 779                 else
 780                         hdev->reset_info.needs_reset = true;
 781         }
 782 }
 783
 784 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 785                         enum hl_cs_type cs_type, u64 user_sequence,
 786                         struct hl_cs **cs_new, u32 flags, u32 timeout)
 787 {
 788         struct hl_cs_counters_atomic *cntr;
 789         struct hl_fence *other = NULL;
 790         struct hl_cs_compl *cs_cmpl;
 791         struct hl_cs *cs;
 792         int rc;
 793
 794         cntr = &hdev->aggregated_cs_counters;
 795
 796         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 797         if (!cs)
 798                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 799
 800         if (!cs) {
 801                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 802                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 803                 return -ENOMEM;
 804         }
 805
 806         /* increment refcnt for context */
 807         hl_ctx_get(hdev, ctx);
 808
 809         cs->ctx = ctx;
 810         cs->submitted = false;
 811         cs->completed = false;
 812         cs->type = cs_type;
 813         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 814         cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
 815         cs->timeout_jiffies = timeout;
 816         cs->skip_reset_on_timeout =
 817                 hdev->reset_info.skip_reset_on_timeout ||
 818                 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
 819         cs->submission_time_jiffies = jiffies;
 820         INIT_LIST_HEAD(&cs->job_list);
 821         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 822         kref_init(&cs->refcount);
 823         spin_lock_init(&cs->job_lock);
 824
 825         cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 826         if (!cs_cmpl)
 827                 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 828
 829         if (!cs_cmpl) {
 830                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 831                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 832                 rc = -ENOMEM;
 833                 goto free_cs;
 834         }
 835
 836         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 837                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 838         if (!cs->jobs_in_queue_cnt)
 839                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 840                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
 841
 842         if (!cs->jobs_in_queue_cnt) {
 843                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 844                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 845                 rc = -ENOMEM;
 846                 goto free_cs_cmpl;
 847         }
 848
 849         cs_cmpl->hdev = hdev;
 850         cs_cmpl->type = cs->type;
 851         spin_lock_init(&cs_cmpl->lock);
 852         cs->fence = &cs_cmpl->base_fence;
 853
 854         spin_lock(&ctx->cs_lock);
 855
 856         cs_cmpl->cs_seq = ctx->cs_sequence;
 857         other = ctx->cs_pending[cs_cmpl->cs_seq &
 858                                 (hdev->asic_prop.max_pending_cs - 1)];
 859
 860         if (other && !completion_done(&other->completion)) {
 861                 /* If the following statement is true, it means we have reached
 862                  * a point in which only part of the staged submission was
 863                  * submitted and we don't have enough room in the 'cs_pending'
 864                  * array for the rest of the submission.
 865                  * This causes a deadlock because this CS will never be
 866                  * completed as it depends on future CS's for completion.
 867                  */
 868                 if (other->cs_sequence == user_sequence)
 869                         dev_crit_ratelimited(hdev->dev,
 870                                 "Staged CS %llu deadlock due to lack of resources",
 871                                 user_sequence);
 872
 873                 dev_dbg_ratelimited(hdev->dev,
 874                         "Rejecting CS because of too many in-flights CS\n");
 875                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 876                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 877                 rc = -EAGAIN;
 878                 goto free_fence;
 879         }
 880
 881         /* init hl_fence */
 882         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 883
 884         cs->sequence = cs_cmpl->cs_seq;
 885
 886         ctx->cs_pending[cs_cmpl->cs_seq &
 887                         (hdev->asic_prop.max_pending_cs - 1)] =
 888                                                         &cs_cmpl->base_fence;
 889         ctx->cs_sequence++;
 890
 891         hl_fence_get(&cs_cmpl->base_fence);
 892
 893         hl_fence_put(other);
 894
 895         spin_unlock(&ctx->cs_lock);
 896
 897         *cs_new = cs;
 898
 899         return 0;
 900
 901 free_fence:
 902         spin_unlock(&ctx->cs_lock);
 903         kfree(cs->jobs_in_queue_cnt);
 904 free_cs_cmpl:
 905         kfree(cs_cmpl);
 906 free_cs:
 907         kfree(cs);
 908         hl_ctx_put(ctx);
 909         return rc;
 910 }
 911
 912 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 913 {
 914         struct hl_cs_job *job, *tmp;
 915
 916         staged_cs_put(hdev, cs);
 917
 918         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 919                 complete_job(hdev, job);
 920 }
 921
 922 void hl_cs_rollback_all(struct hl_device *hdev)
 923 {
 924         int i;
 925         struct hl_cs *cs, *tmp;
 926
 927         flush_workqueue(hdev->sob_reset_wq);
 928
 929         /* flush all completions before iterating over the CS mirror list in
 930          * order to avoid a race with the release functions
 931          */
 932         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 933                 flush_workqueue(hdev->cq_wq[i]);
 934
 935         /* Make sure we don't have leftovers in the CS mirror list */
 936         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
 937                 cs_get(cs);
 938                 cs->aborted = true;
 939                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
 940                                 cs->ctx->asid, cs->sequence);
 941                 cs_rollback(hdev, cs);
 942                 cs_put(cs);
 943         }
 944
 945         force_complete_multi_cs(hdev);
 946 }
 947
 948 static void
 949 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 950 {
 951         struct hl_user_pending_interrupt *pend;
 952         unsigned long flags;
 953
 954         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 955         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
 956                 pend->fence.error = -EIO;
 957                 complete_all(&pend->fence.completion);
 958         }
 959         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 960 }
 961
 962 void hl_release_pending_user_interrupts(struct hl_device *hdev)
 963 {
 964         struct asic_fixed_properties *prop = &hdev->asic_prop;
 965         struct hl_user_interrupt *interrupt;
 966         int i;
 967
 968         if (!prop->user_interrupt_count)
 969                 return;
 970
 971         /* We iterate through the user interrupt requests and waking up all
 972          * user threads waiting for interrupt completion. We iterate the
 973          * list under a lock, this is why all user threads, once awake,
 974          * will wait on the same lock and will release the waiting object upon
 975          * unlock.
 976          */
 977
 978         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
 979                 interrupt = &hdev->user_interrupt[i];
 980                 wake_pending_user_interrupt_threads(interrupt);
 981         }
 982
 983         interrupt = &hdev->common_user_interrupt;
 984         wake_pending_user_interrupt_threads(interrupt);
 985 }
 986
 987 static void job_wq_completion(struct work_struct *work)
 988 {
 989         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
 990                                                 finish_work);
 991         struct hl_cs *cs = job->cs;
 992         struct hl_device *hdev = cs->ctx->hdev;
 993
 994         /* job is no longer needed */
 995         complete_job(hdev, job);
 996 }
 997
 998 static int validate_queue_index(struct hl_device *hdev,
 999                                 struct hl_cs_chunk *chunk,
1000                                 enum hl_queue_type *queue_type,
1001                                 bool *is_kernel_allocated_cb)
1002 {
1003         struct asic_fixed_properties *asic = &hdev->asic_prop;
1004         struct hw_queue_properties *hw_queue_prop;
1005
1006         /* This must be checked here to prevent out-of-bounds access to
1007          * hw_queues_props array
1008          */
1009         if (chunk->queue_index >= asic->max_queues) {
1010                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1011                         chunk->queue_index);
1012                 return -EINVAL;
1013         }
1014
1015         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1016
1017         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1018                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1019                         chunk->queue_index);
1020                 return -EINVAL;
1021         }
1022
1023         if (hw_queue_prop->driver_only) {
1024                 dev_err(hdev->dev,
1025                         "Queue index %d is restricted for the kernel driver\n",
1026                         chunk->queue_index);
1027                 return -EINVAL;
1028         }
1029
1030         /* When hw queue type isn't QUEUE_TYPE_HW,
1031          * USER_ALLOC_CB flag shall be referred as "don't care".
1032          */
1033         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1034                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1035                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1036                                 dev_err(hdev->dev,
1037                                         "Queue index %d doesn't support user CB\n",
1038                                         chunk->queue_index);
1039                                 return -EINVAL;
1040                         }
1041
1042                         *is_kernel_allocated_cb = false;
1043                 } else {
1044                         if (!(hw_queue_prop->cb_alloc_flags &
1045                                         CB_ALLOC_KERNEL)) {
1046                                 dev_err(hdev->dev,
1047                                         "Queue index %d doesn't support kernel CB\n",
1048                                         chunk->queue_index);
1049                                 return -EINVAL;
1050                         }
1051
1052                         *is_kernel_allocated_cb = true;
1053                 }
1054         } else {
1055                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1056                                                 & CB_ALLOC_KERNEL);
1057         }
1058
1059         *queue_type = hw_queue_prop->type;
1060         return 0;
1061 }
1062
1063 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1064                                         struct hl_cb_mgr *cb_mgr,
1065                                         struct hl_cs_chunk *chunk)
1066 {
1067         struct hl_cb *cb;
1068         u32 cb_handle;
1069
1070         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
1071
1072         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
1073         if (!cb) {
1074                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
1075                 return NULL;
1076         }
1077
1078         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1079                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1080                 goto release_cb;
1081         }
1082
1083         atomic_inc(&cb->cs_cnt);
1084
1085         return cb;
1086
1087 release_cb:
1088         hl_cb_put(cb);
1089         return NULL;
1090 }
1091
1092 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1093                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1094 {
1095         struct hl_cs_job *job;
1096
1097         job = kzalloc(sizeof(*job), GFP_ATOMIC);
1098         if (!job)
1099                 job = kzalloc(sizeof(*job), GFP_KERNEL);
1100
1101         if (!job)
1102                 return NULL;
1103
1104         kref_init(&job->refcount);
1105         job->queue_type = queue_type;
1106         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1107
1108         if (is_cb_patched(hdev, job))
1109                 INIT_LIST_HEAD(&job->userptr_list);
1110
1111         if (job->queue_type == QUEUE_TYPE_EXT)
1112                 INIT_WORK(&job->finish_work, job_wq_completion);
1113
1114         return job;
1115 }
1116
1117 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1118 {
1119         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1120                 return CS_TYPE_SIGNAL;
1121         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1122                 return CS_TYPE_WAIT;
1123         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1124                 return CS_TYPE_COLLECTIVE_WAIT;
1125         else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1126                 return CS_RESERVE_SIGNALS;
1127         else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1128                 return CS_UNRESERVE_SIGNALS;
1129         else
1130                 return CS_TYPE_DEFAULT;
1131 }
1132
1133 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1134 {
1135         struct hl_device *hdev = hpriv->hdev;
1136         struct hl_ctx *ctx = hpriv->ctx;
1137         u32 cs_type_flags, num_chunks;
1138         enum hl_device_status status;
1139         enum hl_cs_type cs_type;
1140
1141         if (!hl_device_operational(hdev, &status)) {
1142                 return -EBUSY;
1143         }
1144
1145         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1146                         !hdev->supports_staged_submission) {
1147                 dev_err(hdev->dev, "staged submission not supported");
1148                 return -EPERM;
1149         }
1150
1151         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1152
1153         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1154                 dev_err(hdev->dev,
1155                         "CS type flags are mutually exclusive, context %d\n",
1156                         ctx->asid);
1157                 return -EINVAL;
1158         }
1159
1160         cs_type = hl_cs_get_cs_type(cs_type_flags);
1161         num_chunks = args->in.num_chunks_execute;
1162
1163         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1164                                         !hdev->supports_sync_stream)) {
1165                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1166                 return -EINVAL;
1167         }
1168
1169         if (cs_type == CS_TYPE_DEFAULT) {
1170                 if (!num_chunks) {
1171                         dev_err(hdev->dev,
1172                                 "Got execute CS with 0 chunks, context %d\n",
1173                                 ctx->asid);
1174                         return -EINVAL;
1175                 }
1176         } else if (num_chunks != 1) {
1177                 dev_err(hdev->dev,
1178                         "Sync stream CS mandates one chunk only, context %d\n",
1179                         ctx->asid);
1180                 return -EINVAL;
1181         }
1182
1183         return 0;
1184 }
1185
1186 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1187                                         struct hl_cs_chunk **cs_chunk_array,
1188                                         void __user *chunks, u32 num_chunks,
1189                                         struct hl_ctx *ctx)
1190 {
1191         u32 size_to_copy;
1192
1193         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1194                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1195                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1196                 dev_err(hdev->dev,
1197                         "Number of chunks can NOT be larger than %d\n",
1198                         HL_MAX_JOBS_PER_CS);
1199                 return -EINVAL;
1200         }
1201
1202         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1203                                         GFP_ATOMIC);
1204         if (!*cs_chunk_array)
1205                 *cs_chunk_array = kmalloc_array(num_chunks,
1206                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1207         if (!*cs_chunk_array) {
1208                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1209                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1210                 return -ENOMEM;
1211         }
1212
1213         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1214         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1215                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1216                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1217                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1218                 kfree(*cs_chunk_array);
1219                 return -EFAULT;
1220         }
1221
1222         return 0;
1223 }
1224
1225 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1226                                 u64 sequence, u32 flags,
1227                                 u32 encaps_signal_handle)
1228 {
1229         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1230                 return 0;
1231
1232         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1233         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1234
1235         if (cs->staged_first) {
1236                 /* Staged CS sequence is the first CS sequence */
1237                 INIT_LIST_HEAD(&cs->staged_cs_node);
1238                 cs->staged_sequence = cs->sequence;
1239
1240                 if (cs->encaps_signals)
1241                         cs->encaps_sig_hdl_id = encaps_signal_handle;
1242         } else {
1243                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1244                  * under the cs_mirror_lock
1245                  */
1246                 cs->staged_sequence = sequence;
1247         }
1248
1249         /* Increment CS reference if needed */
1250         staged_cs_get(hdev, cs);
1251
1252         cs->staged_cs = true;
1253
1254         return 0;
1255 }
1256
1257 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1258 {
1259         int i;
1260
1261         for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1262                 if (qid == hdev->stream_master_qid_arr[i])
1263                         return BIT(i);
1264
1265         return 0;
1266 }
1267
1268 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1269                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1270                                 u32 encaps_signals_handle, u32 timeout,
1271                                 u16 *signal_initial_sob_count)
1272 {
1273         bool staged_mid, int_queues_only = true;
1274         struct hl_device *hdev = hpriv->hdev;
1275         struct hl_cs_chunk *cs_chunk_array;
1276         struct hl_cs_counters_atomic *cntr;
1277         struct hl_ctx *ctx = hpriv->ctx;
1278         struct hl_cs_job *job;
1279         struct hl_cs *cs;
1280         struct hl_cb *cb;
1281         u64 user_sequence;
1282         u8 stream_master_qid_map = 0;
1283         int rc, i;
1284
1285         cntr = &hdev->aggregated_cs_counters;
1286         user_sequence = *cs_seq;
1287         *cs_seq = ULLONG_MAX;
1288
1289         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1290                         hpriv->ctx);
1291         if (rc)
1292                 goto out;
1293
1294         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1295                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1296                 staged_mid = true;
1297         else
1298                 staged_mid = false;
1299
1300         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1301                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1302                         timeout);
1303         if (rc)
1304                 goto free_cs_chunk_array;
1305
1306         *cs_seq = cs->sequence;
1307
1308         hl_debugfs_add_cs(cs);
1309
1310         rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1311                                                 encaps_signals_handle);
1312         if (rc)
1313                 goto free_cs_object;
1314
1315         /* If this is a staged submission we must return the staged sequence
1316          * rather than the internal CS sequence
1317          */
1318         if (cs->staged_cs)
1319                 *cs_seq = cs->staged_sequence;
1320
1321         /* Validate ALL the CS chunks before submitting the CS */
1322         for (i = 0 ; i < num_chunks ; i++) {
1323                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1324                 enum hl_queue_type queue_type;
1325                 bool is_kernel_allocated_cb;
1326
1327                 rc = validate_queue_index(hdev, chunk, &queue_type,
1328                                                 &is_kernel_allocated_cb);
1329                 if (rc) {
1330                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1331                         atomic64_inc(&cntr->validation_drop_cnt);
1332                         goto free_cs_object;
1333                 }
1334
1335                 if (is_kernel_allocated_cb) {
1336                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1337                         if (!cb) {
1338                                 atomic64_inc(
1339                                         &ctx->cs_counters.validation_drop_cnt);
1340                                 atomic64_inc(&cntr->validation_drop_cnt);
1341                                 rc = -EINVAL;
1342                                 goto free_cs_object;
1343                         }
1344                 } else {
1345                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1346                 }
1347
1348                 if (queue_type == QUEUE_TYPE_EXT ||
1349                                                 queue_type == QUEUE_TYPE_HW) {
1350                         int_queues_only = false;
1351
1352                         /*
1353                          * store which stream are being used for external/HW
1354                          * queues of this CS
1355                          */
1356                         if (hdev->supports_wait_for_multi_cs)
1357                                 stream_master_qid_map |=
1358                                         get_stream_master_qid_mask(hdev,
1359                                                         chunk->queue_index);
1360                 }
1361
1362                 job = hl_cs_allocate_job(hdev, queue_type,
1363                                                 is_kernel_allocated_cb);
1364                 if (!job) {
1365                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1366                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1367                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1368                         rc = -ENOMEM;
1369                         if (is_kernel_allocated_cb)
1370                                 goto release_cb;
1371
1372                         goto free_cs_object;
1373                 }
1374
1375                 job->id = i + 1;
1376                 job->cs = cs;
1377                 job->user_cb = cb;
1378                 job->user_cb_size = chunk->cb_size;
1379                 job->hw_queue_id = chunk->queue_index;
1380
1381                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1382
1383                 list_add_tail(&job->cs_node, &cs->job_list);
1384
1385                 /*
1386                  * Increment CS reference. When CS reference is 0, CS is
1387                  * done and can be signaled to user and free all its resources
1388                  * Only increment for JOB on external or H/W queues, because
1389                  * only for those JOBs we get completion
1390                  */
1391                 if (cs_needs_completion(cs) &&
1392                         (job->queue_type == QUEUE_TYPE_EXT ||
1393                                 job->queue_type == QUEUE_TYPE_HW))
1394                         cs_get(cs);
1395
1396                 hl_debugfs_add_job(hdev, job);
1397
1398                 rc = cs_parser(hpriv, job);
1399                 if (rc) {
1400                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1401                         atomic64_inc(&cntr->parsing_drop_cnt);
1402                         dev_err(hdev->dev,
1403                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1404                                 cs->ctx->asid, cs->sequence, job->id, rc);
1405                         goto free_cs_object;
1406                 }
1407         }
1408
1409         /* We allow a CS with any queue type combination as long as it does
1410          * not get a completion
1411          */
1412         if (int_queues_only && cs_needs_completion(cs)) {
1413                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1414                 atomic64_inc(&cntr->validation_drop_cnt);
1415                 dev_err(hdev->dev,
1416                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1417                         cs->ctx->asid, cs->sequence);
1418                 rc = -EINVAL;
1419                 goto free_cs_object;
1420         }
1421
1422         /*
1423          * store the (external/HW queues) streams used by the CS in the
1424          * fence object for multi-CS completion
1425          */
1426         if (hdev->supports_wait_for_multi_cs)
1427                 cs->fence->stream_master_qid_map = stream_master_qid_map;
1428
1429         rc = hl_hw_queue_schedule_cs(cs);
1430         if (rc) {
1431                 if (rc != -EAGAIN)
1432                         dev_err(hdev->dev,
1433                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1434                                 cs->ctx->asid, cs->sequence, rc);
1435                 goto free_cs_object;
1436         }
1437
1438         *signal_initial_sob_count = cs->initial_sob_count;
1439
1440         rc = HL_CS_STATUS_SUCCESS;
1441         goto put_cs;
1442
1443 release_cb:
1444         atomic_dec(&cb->cs_cnt);
1445         hl_cb_put(cb);
1446 free_cs_object:
1447         cs_rollback(hdev, cs);
1448         *cs_seq = ULLONG_MAX;
1449         /* The path below is both for good and erroneous exits */
1450 put_cs:
1451         /* We finished with the CS in this function, so put the ref */
1452         cs_put(cs);
1453 free_cs_chunk_array:
1454         kfree(cs_chunk_array);
1455 out:
1456         return rc;
1457 }
1458
1459 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1460                                 u64 *cs_seq)
1461 {
1462         struct hl_device *hdev = hpriv->hdev;
1463         struct hl_ctx *ctx = hpriv->ctx;
1464         bool need_soft_reset = false;
1465         int rc = 0, do_ctx_switch;
1466         void __user *chunks;
1467         u32 num_chunks, tmp;
1468         u16 sob_count;
1469         int ret;
1470
1471         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1472
1473         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1474                 mutex_lock(&hpriv->restore_phase_mutex);
1475
1476                 if (do_ctx_switch) {
1477                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1478                         if (rc) {
1479                                 dev_err_ratelimited(hdev->dev,
1480                                         "Failed to switch to context %d, rejecting CS! %d\n",
1481                                         ctx->asid, rc);
1482                                 /*
1483                                  * If we timedout, or if the device is not IDLE
1484                                  * while we want to do context-switch (-EBUSY),
1485                                  * we need to soft-reset because QMAN is
1486                                  * probably stuck. However, we can't call to
1487                                  * reset here directly because of deadlock, so
1488                                  * need to do it at the very end of this
1489                                  * function
1490                                  */
1491                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1492                                         need_soft_reset = true;
1493                                 mutex_unlock(&hpriv->restore_phase_mutex);
1494                                 goto out;
1495                         }
1496                 }
1497
1498                 hdev->asic_funcs->restore_phase_topology(hdev);
1499
1500                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1501                 num_chunks = args->in.num_chunks_restore;
1502
1503                 if (!num_chunks) {
1504                         dev_dbg(hdev->dev,
1505                                 "Need to run restore phase but restore CS is empty\n");
1506                         rc = 0;
1507                 } else {
1508                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1509                                         cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1510                 }
1511
1512                 mutex_unlock(&hpriv->restore_phase_mutex);
1513
1514                 if (rc) {
1515                         dev_err(hdev->dev,
1516                                 "Failed to submit restore CS for context %d (%d)\n",
1517                                 ctx->asid, rc);
1518                         goto out;
1519                 }
1520
1521                 /* Need to wait for restore completion before execution phase */
1522                 if (num_chunks) {
1523                         enum hl_cs_wait_status status;
1524 wait_again:
1525                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1526                                         jiffies_to_usecs(hdev->timeout_jiffies),
1527                                         *cs_seq, &status, NULL);
1528                         if (ret) {
1529                                 if (ret == -ERESTARTSYS) {
1530                                         usleep_range(100, 200);
1531                                         goto wait_again;
1532                                 }
1533
1534                                 dev_err(hdev->dev,
1535                                         "Restore CS for context %d failed to complete %d\n",
1536                                         ctx->asid, ret);
1537                                 rc = -ENOEXEC;
1538                                 goto out;
1539                         }
1540                 }
1541
1542                 ctx->thread_ctx_switch_wait_token = 1;
1543
1544         } else if (!ctx->thread_ctx_switch_wait_token) {
1545                 rc = hl_poll_timeout_memory(hdev,
1546                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1547                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1548
1549                 if (rc == -ETIMEDOUT) {
1550                         dev_err(hdev->dev,
1551                                 "context switch phase timeout (%d)\n", tmp);
1552                         goto out;
1553                 }
1554         }
1555
1556 out:
1557         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1558                 hl_device_reset(hdev, 0);
1559
1560         return rc;
1561 }
1562
1563 /*
1564  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1565  * if the SOB value reaches the max value move to the other SOB reserved
1566  * to the queue.
1567  * @hdev: pointer to device structure
1568  * @q_idx: stream queue index
1569  * @hw_sob: the H/W SOB used in this signal CS.
1570  * @count: signals count
1571  * @encaps_sig: tells whether it's reservation for encaps signals or not.
1572  *
1573  * Note that this function must be called while hw_queues_lock is taken.
1574  */
1575 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1576                         struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1577
1578 {
1579         struct hl_sync_stream_properties *prop;
1580         struct hl_hw_sob *sob = *hw_sob, *other_sob;
1581         u8 other_sob_offset;
1582
1583         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1584
1585         hw_sob_get(sob);
1586
1587         /* check for wraparound */
1588         if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1589                 /*
1590                  * Decrement as we reached the max value.
1591                  * The release function won't be called here as we've
1592                  * just incremented the refcount right before calling this
1593                  * function.
1594                  */
1595                 hw_sob_put_err(sob);
1596
1597                 /*
1598                  * check the other sob value, if it still in use then fail
1599                  * otherwise make the switch
1600                  */
1601                 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1602                 other_sob = &prop->hw_sob[other_sob_offset];
1603
1604                 if (kref_read(&other_sob->kref) != 1) {
1605                         dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1606                                                                 q_idx);
1607                         return -EINVAL;
1608                 }
1609
1610                 /*
1611                  * next_sob_val always points to the next available signal
1612                  * in the sob, so in encaps signals it will be the next one
1613                  * after reserving the required amount.
1614                  */
1615                 if (encaps_sig)
1616                         prop->next_sob_val = count + 1;
1617                 else
1618                         prop->next_sob_val = count;
1619
1620                 /* only two SOBs are currently in use */
1621                 prop->curr_sob_offset = other_sob_offset;
1622                 *hw_sob = other_sob;
1623
1624                 /*
1625                  * check if other_sob needs reset, then do it before using it
1626                  * for the reservation or the next signal cs.
1627                  * we do it here, and for both encaps and regular signal cs
1628                  * cases in order to avoid possible races of two kref_put
1629                  * of the sob which can occur at the same time if we move the
1630                  * sob reset(kref_put) to cs_do_release function.
1631                  * in addition, if we have combination of cs signal and
1632                  * encaps, and at the point we need to reset the sob there was
1633                  * no more reservations and only signal cs keep coming,
1634                  * in such case we need signal_cs to put the refcount and
1635                  * reset the sob.
1636                  */
1637                 if (other_sob->need_reset)
1638                         hw_sob_put(other_sob);
1639
1640                 if (encaps_sig) {
1641                         /* set reset indication for the sob */
1642                         sob->need_reset = true;
1643                         hw_sob_get(other_sob);
1644                 }
1645
1646                 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1647                                 prop->curr_sob_offset, q_idx);
1648         } else {
1649                 prop->next_sob_val += count;
1650         }
1651
1652         return 0;
1653 }
1654
1655 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1656                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1657                 bool encaps_signals)
1658 {
1659         u64 *signal_seq_arr = NULL;
1660         u32 size_to_copy, signal_seq_arr_len;
1661         int rc = 0;
1662
1663         if (encaps_signals) {
1664                 *signal_seq = chunk->encaps_signal_seq;
1665                 return 0;
1666         }
1667
1668         signal_seq_arr_len = chunk->num_signal_seq_arr;
1669
1670         /* currently only one signal seq is supported */
1671         if (signal_seq_arr_len != 1) {
1672                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1673                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1674                 dev_err(hdev->dev,
1675                         "Wait for signal CS supports only one signal CS seq\n");
1676                 return -EINVAL;
1677         }
1678
1679         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1680                                         sizeof(*signal_seq_arr),
1681                                         GFP_ATOMIC);
1682         if (!signal_seq_arr)
1683                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1684                                         sizeof(*signal_seq_arr),
1685                                         GFP_KERNEL);
1686         if (!signal_seq_arr) {
1687                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1688                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1689                 return -ENOMEM;
1690         }
1691
1692         size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1693         if (copy_from_user(signal_seq_arr,
1694                                 u64_to_user_ptr(chunk->signal_seq_arr),
1695                                 size_to_copy)) {
1696                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1697                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1698                 dev_err(hdev->dev,
1699                         "Failed to copy signal seq array from user\n");
1700                 rc = -EFAULT;
1701                 goto out;
1702         }
1703
1704         /* currently it is guaranteed to have only one signal seq */
1705         *signal_seq = signal_seq_arr[0];
1706
1707 out:
1708         kfree(signal_seq_arr);
1709
1710         return rc;
1711 }
1712
1713 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1714                 struct hl_ctx *ctx, struct hl_cs *cs,
1715                 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1716 {
1717         struct hl_cs_counters_atomic *cntr;
1718         struct hl_cs_job *job;
1719         struct hl_cb *cb;
1720         u32 cb_size;
1721
1722         cntr = &hdev->aggregated_cs_counters;
1723
1724         job = hl_cs_allocate_job(hdev, q_type, true);
1725         if (!job) {
1726                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1727                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1728                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1729                 return -ENOMEM;
1730         }
1731
1732         if (cs->type == CS_TYPE_WAIT)
1733                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1734         else
1735                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1736
1737         cb = hl_cb_kernel_create(hdev, cb_size,
1738                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1739         if (!cb) {
1740                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1741                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1742                 kfree(job);
1743                 return -EFAULT;
1744         }
1745
1746         job->id = 0;
1747         job->cs = cs;
1748         job->user_cb = cb;
1749         atomic_inc(&job->user_cb->cs_cnt);
1750         job->user_cb_size = cb_size;
1751         job->hw_queue_id = q_idx;
1752
1753         if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1754                         && cs->encaps_signals)
1755                 job->encaps_sig_wait_offset = encaps_signal_offset;
1756         /*
1757          * No need in parsing, user CB is the patched CB.
1758          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1759          * the CB idr anymore and to decrement its refcount as it was
1760          * incremented inside hl_cb_kernel_create().
1761          */
1762         job->patched_cb = job->user_cb;
1763         job->job_cb_size = job->user_cb_size;
1764         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1765
1766         /* increment refcount as for external queues we get completion */
1767         cs_get(cs);
1768
1769         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1770
1771         list_add_tail(&job->cs_node, &cs->job_list);
1772
1773         hl_debugfs_add_job(hdev, job);
1774
1775         return 0;
1776 }
1777
1778 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
1779                                 u32 q_idx, u32 count,
1780                                 u32 *handle_id, u32 *sob_addr,
1781                                 u32 *signals_count)
1782 {
1783         struct hw_queue_properties *hw_queue_prop;
1784         struct hl_sync_stream_properties *prop;
1785         struct hl_device *hdev = hpriv->hdev;
1786         struct hl_cs_encaps_sig_handle *handle;
1787         struct hl_encaps_signals_mgr *mgr;
1788         struct hl_hw_sob *hw_sob;
1789         int hdl_id;
1790         int rc = 0;
1791
1792         if (count >= HL_MAX_SOB_VAL) {
1793                 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
1794                                                 count);
1795                 rc = -EINVAL;
1796                 goto out;
1797         }
1798
1799         if (q_idx >= hdev->asic_prop.max_queues) {
1800                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1801                         q_idx);
1802                 rc = -EINVAL;
1803                 goto out;
1804         }
1805
1806         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1807
1808         if (!hw_queue_prop->supports_sync_stream) {
1809                 dev_err(hdev->dev,
1810                         "Queue index %d does not support sync stream operations\n",
1811                                                                         q_idx);
1812                 rc = -EINVAL;
1813                 goto out;
1814         }
1815
1816         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1817
1818         handle = kzalloc(sizeof(*handle), GFP_KERNEL);
1819         if (!handle) {
1820                 rc = -ENOMEM;
1821                 goto out;
1822         }
1823
1824         handle->count = count;
1825
1826         hl_ctx_get(hdev, hpriv->ctx);
1827         handle->ctx = hpriv->ctx;
1828         mgr = &hpriv->ctx->sig_mgr;
1829
1830         spin_lock(&mgr->lock);
1831         hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
1832         spin_unlock(&mgr->lock);
1833
1834         if (hdl_id < 0) {
1835                 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
1836                 rc = -EINVAL;
1837                 goto put_ctx;
1838         }
1839
1840         handle->id = hdl_id;
1841         handle->q_idx = q_idx;
1842         handle->hdev = hdev;
1843         kref_init(&handle->refcount);
1844
1845         hdev->asic_funcs->hw_queues_lock(hdev);
1846
1847         hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1848
1849         /*
1850          * Increment the SOB value by count by user request
1851          * to reserve those signals
1852          * check if the signals amount to reserve is not exceeding the max sob
1853          * value, if yes then switch sob.
1854          */
1855         rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
1856                                                                 true);
1857         if (rc) {
1858                 dev_err(hdev->dev, "Failed to switch SOB\n");
1859                 hdev->asic_funcs->hw_queues_unlock(hdev);
1860                 rc = -EINVAL;
1861                 goto remove_idr;
1862         }
1863         /* set the hw_sob to the handle after calling the sob wraparound handler
1864          * since sob could have changed.
1865          */
1866         handle->hw_sob = hw_sob;
1867
1868         /* store the current sob value for unreserve validity check, and
1869          * signal offset support
1870          */
1871         handle->pre_sob_val = prop->next_sob_val - handle->count;
1872
1873         *signals_count = prop->next_sob_val;
1874         hdev->asic_funcs->hw_queues_unlock(hdev);
1875
1876         *sob_addr = handle->hw_sob->sob_addr;
1877         *handle_id = hdl_id;
1878
1879         dev_dbg(hdev->dev,
1880                 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
1881                         hw_sob->sob_id, handle->hw_sob->sob_addr,
1882                         prop->next_sob_val - 1, q_idx, hdl_id);
1883         goto out;
1884
1885 remove_idr:
1886         spin_lock(&mgr->lock);
1887         idr_remove(&mgr->handles, hdl_id);
1888         spin_unlock(&mgr->lock);
1889
1890 put_ctx:
1891         hl_ctx_put(handle->ctx);
1892         kfree(handle);
1893
1894 out:
1895         return rc;
1896 }
1897
1898 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
1899 {
1900         struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
1901         struct hl_sync_stream_properties *prop;
1902         struct hl_device *hdev = hpriv->hdev;
1903         struct hl_encaps_signals_mgr *mgr;
1904         struct hl_hw_sob *hw_sob;
1905         u32 q_idx, sob_addr;
1906         int rc = 0;
1907
1908         mgr = &hpriv->ctx->sig_mgr;
1909
1910         spin_lock(&mgr->lock);
1911         encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
1912         if (encaps_sig_hdl) {
1913                 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
1914                                 handle_id, encaps_sig_hdl->hw_sob->sob_addr,
1915                                         encaps_sig_hdl->count);
1916
1917                 hdev->asic_funcs->hw_queues_lock(hdev);
1918
1919                 q_idx = encaps_sig_hdl->q_idx;
1920                 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1921                 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1922                 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
1923
1924                 /* Check if sob_val got out of sync due to other
1925                  * signal submission requests which were handled
1926                  * between the reserve-unreserve calls or SOB switch
1927                  * upon reaching SOB max value.
1928                  */
1929                 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
1930                                 != prop->next_sob_val ||
1931                                 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
1932                         dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
1933                                 encaps_sig_hdl->pre_sob_val,
1934                                 (prop->next_sob_val - encaps_sig_hdl->count));
1935
1936                         hdev->asic_funcs->hw_queues_unlock(hdev);
1937                         rc = -EINVAL;
1938                         goto out;
1939                 }
1940
1941                 /*
1942                  * Decrement the SOB value by count by user request
1943                  * to unreserve those signals
1944                  */
1945                 prop->next_sob_val -= encaps_sig_hdl->count;
1946
1947                 hdev->asic_funcs->hw_queues_unlock(hdev);
1948
1949                 hw_sob_put(hw_sob);
1950
1951                 /* Release the id and free allocated memory of the handle */
1952                 idr_remove(&mgr->handles, handle_id);
1953                 hl_ctx_put(encaps_sig_hdl->ctx);
1954                 kfree(encaps_sig_hdl);
1955         } else {
1956                 rc = -EINVAL;
1957                 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
1958         }
1959 out:
1960         spin_unlock(&mgr->lock);
1961
1962         return rc;
1963 }
1964
1965 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1966                                 void __user *chunks, u32 num_chunks,
1967                                 u64 *cs_seq, u32 flags, u32 timeout,
1968                                 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
1969 {
1970         struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
1971         bool handle_found = false, is_wait_cs = false,
1972                         wait_cs_submitted = false,
1973                         cs_encaps_signals = false;
1974         struct hl_cs_chunk *cs_chunk_array, *chunk;
1975         bool staged_cs_with_encaps_signals = false;
1976         struct hw_queue_properties *hw_queue_prop;
1977         struct hl_device *hdev = hpriv->hdev;
1978         struct hl_cs_compl *sig_waitcs_cmpl;
1979         u32 q_idx, collective_engine_id = 0;
1980         struct hl_cs_counters_atomic *cntr;
1981         struct hl_fence *sig_fence = NULL;
1982         struct hl_ctx *ctx = hpriv->ctx;
1983         enum hl_queue_type q_type;
1984         struct hl_cs *cs;
1985         u64 signal_seq;
1986         int rc;
1987
1988         cntr = &hdev->aggregated_cs_counters;
1989         *cs_seq = ULLONG_MAX;
1990
1991         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1992                         ctx);
1993         if (rc)
1994                 goto out;
1995
1996         /* currently it is guaranteed to have only one chunk */
1997         chunk = &cs_chunk_array[0];
1998
1999         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2000                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2001                 atomic64_inc(&cntr->validation_drop_cnt);
2002                 dev_err(hdev->dev, "Queue index %d is invalid\n",
2003                         chunk->queue_index);
2004                 rc = -EINVAL;
2005                 goto free_cs_chunk_array;
2006         }
2007
2008         q_idx = chunk->queue_index;
2009         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2010         q_type = hw_queue_prop->type;
2011
2012         if (!hw_queue_prop->supports_sync_stream) {
2013                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2014                 atomic64_inc(&cntr->validation_drop_cnt);
2015                 dev_err(hdev->dev,
2016                         "Queue index %d does not support sync stream operations\n",
2017                         q_idx);
2018                 rc = -EINVAL;
2019                 goto free_cs_chunk_array;
2020         }
2021
2022         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2023                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2024                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2025                         atomic64_inc(&cntr->validation_drop_cnt);
2026                         dev_err(hdev->dev,
2027                                 "Queue index %d is invalid\n", q_idx);
2028                         rc = -EINVAL;
2029                         goto free_cs_chunk_array;
2030                 }
2031
2032                 if (!hdev->nic_ports_mask) {
2033                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2034                         atomic64_inc(&cntr->validation_drop_cnt);
2035                         dev_err(hdev->dev,
2036                                 "Collective operations not supported when NIC ports are disabled");
2037                         rc = -EINVAL;
2038                         goto free_cs_chunk_array;
2039                 }
2040
2041                 collective_engine_id = chunk->collective_engine_id;
2042         }
2043
2044         is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2045                         cs_type == CS_TYPE_COLLECTIVE_WAIT);
2046
2047         cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2048
2049         if (is_wait_cs) {
2050                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2051                                 ctx, cs_encaps_signals);
2052                 if (rc)
2053                         goto free_cs_chunk_array;
2054
2055                 if (cs_encaps_signals) {
2056                         /* check if cs sequence has encapsulated
2057                          * signals handle
2058                          */
2059                         struct idr *idp;
2060                         u32 id;
2061
2062                         spin_lock(&ctx->sig_mgr.lock);
2063                         idp = &ctx->sig_mgr.handles;
2064                         idr_for_each_entry(idp, encaps_sig_hdl, id) {
2065                                 if (encaps_sig_hdl->cs_seq == signal_seq) {
2066                                         /* get refcount to protect removing this handle from idr,
2067                                          * needed when multiple wait cs are used with offset
2068                                          * to wait on reserved encaps signals.
2069                                          * Since kref_put of this handle is executed outside the
2070                                          * current lock, it is possible that the handle refcount
2071                                          * is 0 but it yet to be removed from the list. In this
2072                                          * case need to consider the handle as not valid.
2073                                          */
2074                                         if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
2075                                                 handle_found = true;
2076                                         break;
2077                                 }
2078                         }
2079                         spin_unlock(&ctx->sig_mgr.lock);
2080
2081                         if (!handle_found) {
2082                                 /* treat as signal CS already finished */
2083                                 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2084                                                 signal_seq);
2085                                 rc = 0;
2086                                 goto free_cs_chunk_array;
2087                         }
2088
2089                         /* validate also the signal offset value */
2090                         if (chunk->encaps_signal_offset >
2091                                         encaps_sig_hdl->count) {
2092                                 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2093                                                 chunk->encaps_signal_offset,
2094                                                 encaps_sig_hdl->count);
2095                                 rc = -EINVAL;
2096                                 goto free_cs_chunk_array;
2097                         }
2098                 }
2099
2100                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2101                 if (IS_ERR(sig_fence)) {
2102                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2103                         atomic64_inc(&cntr->validation_drop_cnt);
2104                         dev_err(hdev->dev,
2105                                 "Failed to get signal CS with seq 0x%llx\n",
2106                                 signal_seq);
2107                         rc = PTR_ERR(sig_fence);
2108                         goto free_cs_chunk_array;
2109                 }
2110
2111                 if (!sig_fence) {
2112                         /* signal CS already finished */
2113                         rc = 0;
2114                         goto free_cs_chunk_array;
2115                 }
2116
2117                 sig_waitcs_cmpl =
2118                         container_of(sig_fence, struct hl_cs_compl, base_fence);
2119
2120                 staged_cs_with_encaps_signals = !!
2121                                 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2122                                 (flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2123
2124                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2125                                 !staged_cs_with_encaps_signals) {
2126                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2127                         atomic64_inc(&cntr->validation_drop_cnt);
2128                         dev_err(hdev->dev,
2129                                 "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2130                                 signal_seq);
2131                         hl_fence_put(sig_fence);
2132                         rc = -EINVAL;
2133                         goto free_cs_chunk_array;
2134                 }
2135
2136                 if (completion_done(&sig_fence->completion)) {
2137                         /* signal CS already finished */
2138                         hl_fence_put(sig_fence);
2139                         rc = 0;
2140                         goto free_cs_chunk_array;
2141                 }
2142         }
2143
2144         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2145         if (rc) {
2146                 if (is_wait_cs)
2147                         hl_fence_put(sig_fence);
2148
2149                 goto free_cs_chunk_array;
2150         }
2151
2152         /*
2153          * Save the signal CS fence for later initialization right before
2154          * hanging the wait CS on the queue.
2155          * for encaps signals case, we save the cs sequence and handle pointer
2156          * for later initialization.
2157          */
2158         if (is_wait_cs) {
2159                 cs->signal_fence = sig_fence;
2160                 /* store the handle pointer, so we don't have to
2161                  * look for it again, later on the flow
2162                  * when we need to set SOB info in hw_queue.
2163                  */
2164                 if (cs->encaps_signals)
2165                         cs->encaps_sig_hdl = encaps_sig_hdl;
2166         }
2167
2168         hl_debugfs_add_cs(cs);
2169
2170         *cs_seq = cs->sequence;
2171
2172         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2173                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2174                                 q_idx, chunk->encaps_signal_offset);
2175         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2176                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2177                                 cs, q_idx, collective_engine_id,
2178                                 chunk->encaps_signal_offset);
2179         else {
2180                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2181                 atomic64_inc(&cntr->validation_drop_cnt);
2182                 rc = -EINVAL;
2183         }
2184
2185         if (rc)
2186                 goto free_cs_object;
2187
2188         rc = hl_hw_queue_schedule_cs(cs);
2189         if (rc) {
2190                 /* In case wait cs failed here, it means the signal cs
2191                  * already completed. we want to free all it's related objects
2192                  * but we don't want to fail the ioctl.
2193                  */
2194                 if (is_wait_cs)
2195                         rc = 0;
2196                 else if (rc != -EAGAIN)
2197                         dev_err(hdev->dev,
2198                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
2199                                 ctx->asid, cs->sequence, rc);
2200                 goto free_cs_object;
2201         }
2202
2203         *signal_sob_addr_offset = cs->sob_addr_offset;
2204         *signal_initial_sob_count = cs->initial_sob_count;
2205
2206         rc = HL_CS_STATUS_SUCCESS;
2207         if (is_wait_cs)
2208                 wait_cs_submitted = true;
2209         goto put_cs;
2210
2211 free_cs_object:
2212         cs_rollback(hdev, cs);
2213         *cs_seq = ULLONG_MAX;
2214         /* The path below is both for good and erroneous exits */
2215 put_cs:
2216         /* We finished with the CS in this function, so put the ref */
2217         cs_put(cs);
2218 free_cs_chunk_array:
2219         if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
2220                                                         is_wait_cs)
2221                 kref_put(&encaps_sig_hdl->refcount,
2222                                 hl_encaps_handle_do_release);
2223         kfree(cs_chunk_array);
2224 out:
2225         return rc;
2226 }
2227
2228 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
2229 {
2230         union hl_cs_args *args = data;
2231         enum hl_cs_type cs_type = 0;
2232         u64 cs_seq = ULONG_MAX;
2233         void __user *chunks;
2234         u32 num_chunks, flags, timeout,
2235                 signals_count = 0, sob_addr = 0, handle_id = 0;
2236         u16 sob_initial_count = 0;
2237         int rc;
2238
2239         rc = hl_cs_sanity_checks(hpriv, args);
2240         if (rc)
2241                 goto out;
2242
2243         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2244         if (rc)
2245                 goto out;
2246
2247         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2248                                         ~HL_CS_FLAGS_FORCE_RESTORE);
2249         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2250         num_chunks = args->in.num_chunks_execute;
2251         flags = args->in.cs_flags;
2252
2253         /* In case this is a staged CS, user should supply the CS sequence */
2254         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2255                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2256                 cs_seq = args->in.seq;
2257
2258         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2259                         ? msecs_to_jiffies(args->in.timeout * 1000)
2260                         : hpriv->hdev->timeout_jiffies;
2261
2262         switch (cs_type) {
2263         case CS_TYPE_SIGNAL:
2264         case CS_TYPE_WAIT:
2265         case CS_TYPE_COLLECTIVE_WAIT:
2266                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2267                                         &cs_seq, args->in.cs_flags, timeout,
2268                                         &sob_addr, &sob_initial_count);
2269                 break;
2270         case CS_RESERVE_SIGNALS:
2271                 rc = cs_ioctl_reserve_signals(hpriv,
2272                                         args->in.encaps_signals_q_idx,
2273                                         args->in.encaps_signals_count,
2274                                         &handle_id, &sob_addr, &signals_count);
2275                 break;
2276         case CS_UNRESERVE_SIGNALS:
2277                 rc = cs_ioctl_unreserve_signals(hpriv,
2278                                         args->in.encaps_sig_handle_id);
2279                 break;
2280         default:
2281                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2282                                                 args->in.cs_flags,
2283                                                 args->in.encaps_sig_handle_id,
2284                                                 timeout, &sob_initial_count);
2285                 break;
2286         }
2287 out:
2288         if (rc != -EAGAIN) {
2289                 memset(args, 0, sizeof(*args));
2290
2291                 switch (cs_type) {
2292                 case CS_RESERVE_SIGNALS:
2293                         args->out.handle_id = handle_id;
2294                         args->out.sob_base_addr_offset = sob_addr;
2295                         args->out.count = signals_count;
2296                         break;
2297                 case CS_TYPE_SIGNAL:
2298                         args->out.sob_base_addr_offset = sob_addr;
2299                         args->out.sob_count_before_submission = sob_initial_count;
2300                         args->out.seq = cs_seq;
2301                         break;
2302                 case CS_TYPE_DEFAULT:
2303                         args->out.sob_count_before_submission = sob_initial_count;
2304                         args->out.seq = cs_seq;
2305                         break;
2306                 default:
2307                         args->out.seq = cs_seq;
2308                         break;
2309                 }
2310
2311                 args->out.status = rc;
2312         }
2313
2314         return rc;
2315 }
2316
2317 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2318                                 enum hl_cs_wait_status *status, u64 timeout_us,
2319                                 s64 *timestamp)
2320 {
2321         struct hl_device *hdev = ctx->hdev;
2322         long completion_rc;
2323         int rc = 0;
2324
2325         if (IS_ERR(fence)) {
2326                 rc = PTR_ERR(fence);
2327                 if (rc == -EINVAL)
2328                         dev_notice_ratelimited(hdev->dev,
2329                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
2330                                 seq, ctx->cs_sequence);
2331                 return rc;
2332         }
2333
2334         if (!fence) {
2335                 dev_dbg(hdev->dev,
2336                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2337                                 seq, ctx->cs_sequence);
2338
2339                 *status = CS_WAIT_STATUS_GONE;
2340                 return 0;
2341         }
2342
2343         if (!timeout_us) {
2344                 completion_rc = completion_done(&fence->completion);
2345         } else {
2346                 unsigned long timeout;
2347
2348                 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2349                                 timeout_us : usecs_to_jiffies(timeout_us);
2350                 completion_rc =
2351                         wait_for_completion_interruptible_timeout(
2352                                 &fence->completion, timeout);
2353         }
2354
2355         if (completion_rc > 0) {
2356                 *status = CS_WAIT_STATUS_COMPLETED;
2357                 if (timestamp)
2358                         *timestamp = ktime_to_ns(fence->timestamp);
2359         } else {
2360                 *status = CS_WAIT_STATUS_BUSY;
2361         }
2362
2363         if (fence->error == -ETIMEDOUT)
2364                 rc = -ETIMEDOUT;
2365         else if (fence->error == -EIO)
2366                 rc = -EIO;
2367
2368         return rc;
2369 }
2370
2371 /*
2372  * hl_cs_poll_fences - iterate CS fences to check for CS completion
2373  *
2374  * @mcs_data: multi-CS internal data
2375  * @mcs_compl: multi-CS completion structure
2376  *
2377  * @return 0 on success, otherwise non 0 error code
2378  *
2379  * The function iterates on all CS sequence in the list and set bit in
2380  * completion_bitmap for each completed CS.
2381  * While iterating, the function sets the stream map of each fence in the fence
2382  * array in the completion QID stream map to be used by CSs to perform
2383  * completion to the multi-CS context.
2384  * This function shall be called after taking context ref
2385  */
2386 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2387 {
2388         struct hl_fence **fence_ptr = mcs_data->fence_arr;
2389         struct hl_device *hdev = mcs_data->ctx->hdev;
2390         int i, rc, arr_len = mcs_data->arr_len;
2391         u64 *seq_arr = mcs_data->seq_arr;
2392         ktime_t max_ktime, first_cs_time;
2393         enum hl_cs_wait_status status;
2394
2395         memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
2396
2397         /* get all fences under the same lock */
2398         rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2399         if (rc)
2400                 return rc;
2401
2402         /*
2403          * re-initialize the completion here to handle 2 possible cases:
2404          * 1. CS will complete the multi-CS prior clearing the completion. in which
2405          *    case the fence iteration is guaranteed to catch the CS completion.
2406          * 2. the completion will occur after re-init of the completion.
2407          *    in which case we will wake up immediately in wait_for_completion.
2408          */
2409         reinit_completion(&mcs_compl->completion);
2410
2411         /*
2412          * set to maximum time to verify timestamp is valid: if at the end
2413          * this value is maintained- no timestamp was updated
2414          */
2415         max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2416         first_cs_time = max_ktime;
2417
2418         for (i = 0; i < arr_len; i++, fence_ptr++) {
2419                 struct hl_fence *fence = *fence_ptr;
2420
2421                 /*
2422                  * In order to prevent case where we wait until timeout even though a CS associated
2423                  * with the multi-CS actually completed we do things in the below order:
2424                  * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2425                  *    any CS can, potentially, complete the multi CS for the specific QID (note
2426                  *    that once completion is initialized, calling complete* and then wait on the
2427                  *    completion will cause it to return at once)
2428                  * 2. only after allowing multi-CS completion for the specific QID we check whether
2429                  *    the specific CS already completed (and thus the wait for completion part will
2430                  *    be skipped). if the CS not completed it is guaranteed that completing CS will
2431                  *    wake up the completion.
2432                  */
2433                 if (fence)
2434                         mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2435
2436                 /*
2437                  * function won't sleep as it is called with timeout 0 (i.e.
2438                  * poll the fence)
2439                  */
2440                 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
2441                                                 &status, 0, NULL);
2442                 if (rc) {
2443                         dev_err(hdev->dev,
2444                                 "wait_for_fence error :%d for CS seq %llu\n",
2445                                                                 rc, seq_arr[i]);
2446                         break;
2447                 }
2448
2449                 switch (status) {
2450                 case CS_WAIT_STATUS_BUSY:
2451                         /* CS did not finished, QID to wait on already stored */
2452                         break;
2453                 case CS_WAIT_STATUS_COMPLETED:
2454                         /*
2455                          * Using mcs_handling_done to avoid possibility of mcs_data
2456                          * returns to user indicating CS completed before it finished
2457                          * all of its mcs handling, to avoid race the next time the
2458                          * user waits for mcs.
2459                          * note: when reaching this case fence is definitely not NULL
2460                          *       but NULL check was added to overcome static analysis
2461                          */
2462                         if (fence && !fence->mcs_handling_done) {
2463                                 /*
2464                                  * in case multi CS is completed but MCS handling not done
2465                                  * we "complete" the multi CS to prevent it from waiting
2466                                  * until time-out and the "multi-CS handling done" will have
2467                                  * another chance at the next iteration
2468                                  */
2469                                 complete_all(&mcs_compl->completion);
2470                                 break;
2471                         }
2472
2473                         mcs_data->completion_bitmap |= BIT(i);
2474                         /*
2475                          * For all completed CSs we take the earliest timestamp.
2476                          * For this we have to validate that the timestamp is
2477                          * earliest of all timestamps so far.
2478                          */
2479                         if (mcs_data->update_ts &&
2480                                         (ktime_compare(fence->timestamp, first_cs_time) < 0))
2481                                 first_cs_time = fence->timestamp;
2482                         break;
2483                 case CS_WAIT_STATUS_GONE:
2484                         mcs_data->update_ts = false;
2485                         mcs_data->gone_cs = true;
2486                         /*
2487                          * It is possible to get an old sequence numbers from user
2488                          * which related to already completed CSs and their fences
2489                          * already gone. In this case, CS set as completed but
2490                          * no need to consider its QID for mcs completion.
2491                          */
2492                         mcs_data->completion_bitmap |= BIT(i);
2493                         break;
2494                 default:
2495                         dev_err(hdev->dev, "Invalid fence status\n");
2496                         return -EINVAL;
2497                 }
2498
2499         }
2500
2501         hl_fences_put(mcs_data->fence_arr, arr_len);
2502
2503         if (mcs_data->update_ts &&
2504                         (ktime_compare(first_cs_time, max_ktime) != 0))
2505                 mcs_data->timestamp = ktime_to_ns(first_cs_time);
2506
2507         return rc;
2508 }
2509
2510 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2511                                 u64 timeout_us, u64 seq,
2512                                 enum hl_cs_wait_status *status, s64 *timestamp)
2513 {
2514         struct hl_fence *fence;
2515         int rc = 0;
2516
2517         if (timestamp)
2518                 *timestamp = 0;
2519
2520         hl_ctx_get(hdev, ctx);
2521
2522         fence = hl_ctx_get_fence(ctx, seq);
2523
2524         rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2525         hl_fence_put(fence);
2526         hl_ctx_put(ctx);
2527
2528         return rc;
2529 }
2530
2531 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2532 {
2533         if (usecs <= U32_MAX)
2534                 return usecs_to_jiffies(usecs);
2535
2536         /*
2537          * If the value in nanoseconds is larger than 64 bit, use the largest
2538          * 64 bit value.
2539          */
2540         if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2541                 return nsecs_to_jiffies(U64_MAX);
2542
2543         return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2544 }
2545
2546 /*
2547  * hl_wait_multi_cs_completion_init - init completion structure
2548  *
2549  * @hdev: pointer to habanalabs device structure
2550  * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2551  *                        master QID to wait on
2552  *
2553  * @return valid completion struct pointer on success, otherwise error pointer
2554  *
2555  * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2556  * the function gets the first available completion (by marking it "used")
2557  * and initialize its values.
2558  */
2559 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2560 {
2561         struct multi_cs_completion *mcs_compl;
2562         int i;
2563
2564         /* find free multi_cs completion structure */
2565         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2566                 mcs_compl = &hdev->multi_cs_completion[i];
2567                 spin_lock(&mcs_compl->lock);
2568                 if (!mcs_compl->used) {
2569                         mcs_compl->used = 1;
2570                         mcs_compl->timestamp = 0;
2571                         /*
2572                          * init QID map to 0 to avoid completion by CSs. the actual QID map
2573                          * to multi-CS CSs will be set incrementally at a later stage
2574                          */
2575                         mcs_compl->stream_master_qid_map = 0;
2576                         spin_unlock(&mcs_compl->lock);
2577                         break;
2578                 }
2579                 spin_unlock(&mcs_compl->lock);
2580         }
2581
2582         if (i == MULTI_CS_MAX_USER_CTX) {
2583                 dev_err(hdev->dev, "no available multi-CS completion structure\n");
2584                 return ERR_PTR(-ENOMEM);
2585         }
2586         return mcs_compl;
2587 }
2588
2589 /*
2590  * hl_wait_multi_cs_completion_fini - return completion structure and set as
2591  *                                    unused
2592  *
2593  * @mcs_compl: pointer to the completion structure
2594  */
2595 static void hl_wait_multi_cs_completion_fini(
2596                                         struct multi_cs_completion *mcs_compl)
2597 {
2598         /*
2599          * free completion structure, do it under lock to be in-sync with the
2600          * thread that signals completion
2601          */
2602         spin_lock(&mcs_compl->lock);
2603         mcs_compl->used = 0;
2604         spin_unlock(&mcs_compl->lock);
2605 }
2606
2607 /*
2608  * hl_wait_multi_cs_completion - wait for first CS to complete
2609  *
2610  * @mcs_data: multi-CS internal data
2611  *
2612  * @return 0 on success, otherwise non 0 error code
2613  */
2614 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2615                                                 struct multi_cs_completion *mcs_compl)
2616 {
2617         long completion_rc;
2618
2619         completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2620                                                                         mcs_data->timeout_jiffies);
2621
2622         /* update timestamp */
2623         if (completion_rc > 0)
2624                 mcs_data->timestamp = mcs_compl->timestamp;
2625
2626         mcs_data->wait_status = completion_rc;
2627
2628         return 0;
2629 }
2630
2631 /*
2632  * hl_multi_cs_completion_init - init array of multi-CS completion structures
2633  *
2634  * @hdev: pointer to habanalabs device structure
2635  */
2636 void hl_multi_cs_completion_init(struct hl_device *hdev)
2637 {
2638         struct multi_cs_completion *mcs_cmpl;
2639         int i;
2640
2641         for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2642                 mcs_cmpl = &hdev->multi_cs_completion[i];
2643                 mcs_cmpl->used = 0;
2644                 spin_lock_init(&mcs_cmpl->lock);
2645                 init_completion(&mcs_cmpl->completion);
2646         }
2647 }
2648
2649 /*
2650  * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2651  *
2652  * @hpriv: pointer to the private data of the fd
2653  * @data: pointer to multi-CS wait ioctl in/out args
2654  *
2655  */
2656 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2657 {
2658         struct multi_cs_completion *mcs_compl;
2659         struct hl_device *hdev = hpriv->hdev;
2660         struct multi_cs_data mcs_data = {0};
2661         union hl_wait_cs_args *args = data;
2662         struct hl_ctx *ctx = hpriv->ctx;
2663         struct hl_fence **fence_arr;
2664         void __user *seq_arr;
2665         u32 size_to_copy;
2666         u64 *cs_seq_arr;
2667         u8 seq_arr_len;
2668         int rc;
2669
2670         if (!hdev->supports_wait_for_multi_cs) {
2671                 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
2672                 return -EPERM;
2673         }
2674
2675         seq_arr_len = args->in.seq_arr_len;
2676
2677         if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
2678                 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
2679                                 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
2680                 return -EINVAL;
2681         }
2682
2683         /* allocate memory for sequence array */
2684         cs_seq_arr =
2685                 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
2686         if (!cs_seq_arr)
2687                 return -ENOMEM;
2688
2689         /* copy CS sequence array from user */
2690         seq_arr = (void __user *) (uintptr_t) args->in.seq;
2691         size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
2692         if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
2693                 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
2694                 rc = -EFAULT;
2695                 goto free_seq_arr;
2696         }
2697
2698         /* allocate array for the fences */
2699         fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
2700         if (!fence_arr) {
2701                 rc = -ENOMEM;
2702                 goto free_seq_arr;
2703         }
2704
2705         /* initialize the multi-CS internal data */
2706         mcs_data.ctx = ctx;
2707         mcs_data.seq_arr = cs_seq_arr;
2708         mcs_data.fence_arr = fence_arr;
2709         mcs_data.arr_len = seq_arr_len;
2710
2711         hl_ctx_get(hdev, ctx);
2712
2713         /* wait (with timeout) for the first CS to be completed */
2714         mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
2715         mcs_compl = hl_wait_multi_cs_completion_init(hdev);
2716         if (IS_ERR(mcs_compl)) {
2717                 rc = PTR_ERR(mcs_compl);
2718                 goto put_ctx;
2719         }
2720
2721         /* poll all CS fences, extract timestamp */
2722         mcs_data.update_ts = true;
2723         rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2724         /*
2725          * skip wait for CS completion when one of the below is true:
2726          * - an error on the poll function
2727          * - one or more CS in the list completed
2728          * - the user called ioctl with timeout 0
2729          */
2730         if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
2731                 goto completion_fini;
2732
2733         while (true) {
2734                 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
2735                 if (rc || (mcs_data.wait_status == 0))
2736                         break;
2737
2738                 /*
2739                  * poll fences once again to update the CS map.
2740                  * no timestamp should be updated this time.
2741                  */
2742                 mcs_data.update_ts = false;
2743                 rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2744
2745                 if (rc || mcs_data.completion_bitmap)
2746                         break;
2747
2748                 /*
2749                  * if hl_wait_multi_cs_completion returned before timeout (i.e.
2750                  * it got a completion) it either got completed by CS in the multi CS list
2751                  * (in which case the indication will be non empty completion_bitmap) or it
2752                  * got completed by CS submitted to one of the shared stream master but
2753                  * not in the multi CS list (in which case we should wait again but modify
2754                  * the timeout and set timestamp as zero to let a CS related to the current
2755                  * multi-CS set a new, relevant, timestamp)
2756                  */
2757                 mcs_data.timeout_jiffies = mcs_data.wait_status;
2758                 mcs_compl->timestamp = 0;
2759         }
2760
2761 completion_fini:
2762         hl_wait_multi_cs_completion_fini(mcs_compl);
2763
2764 put_ctx:
2765         hl_ctx_put(ctx);
2766         kfree(fence_arr);
2767
2768 free_seq_arr:
2769         kfree(cs_seq_arr);
2770
2771         if (rc)
2772                 return rc;
2773
2774         if (mcs_data.wait_status == -ERESTARTSYS) {
2775                 dev_err_ratelimited(hdev->dev,
2776                                 "user process got signal while waiting for Multi-CS\n");
2777                 return -EINTR;
2778         }
2779
2780         /* update output args */
2781         memset(args, 0, sizeof(*args));
2782
2783         if (mcs_data.completion_bitmap) {
2784                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2785                 args->out.cs_completion_map = mcs_data.completion_bitmap;
2786
2787                 /* if timestamp not 0- it's valid */
2788                 if (mcs_data.timestamp) {
2789                         args->out.timestamp_nsec = mcs_data.timestamp;
2790                         args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2791                 }
2792
2793                 /* update if some CS was gone */
2794                 if (!mcs_data.timestamp)
2795                         args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2796         } else {
2797                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2798         }
2799
2800         return 0;
2801 }
2802
2803 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2804 {
2805         struct hl_device *hdev = hpriv->hdev;
2806         union hl_wait_cs_args *args = data;
2807         enum hl_cs_wait_status status;
2808         u64 seq = args->in.seq;
2809         s64 timestamp;
2810         int rc;
2811
2812         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
2813                                 &status, &timestamp);
2814
2815         if (rc == -ERESTARTSYS) {
2816                 dev_err_ratelimited(hdev->dev,
2817                         "user process got signal while waiting for CS handle %llu\n",
2818                         seq);
2819                 return -EINTR;
2820         }
2821
2822         memset(args, 0, sizeof(*args));
2823
2824         if (rc) {
2825                 if (rc == -ETIMEDOUT) {
2826                         dev_err_ratelimited(hdev->dev,
2827                                 "CS %llu has timed-out while user process is waiting for it\n",
2828                                 seq);
2829                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
2830                 } else if (rc == -EIO) {
2831                         dev_err_ratelimited(hdev->dev,
2832                                 "CS %llu has been aborted while user process is waiting for it\n",
2833                                 seq);
2834                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
2835                 }
2836                 return rc;
2837         }
2838
2839         if (timestamp) {
2840                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2841                 args->out.timestamp_nsec = timestamp;
2842         }
2843
2844         switch (status) {
2845         case CS_WAIT_STATUS_GONE:
2846                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2847                 fallthrough;
2848         case CS_WAIT_STATUS_COMPLETED:
2849                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2850                 break;
2851         case CS_WAIT_STATUS_BUSY:
2852         default:
2853                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2854                 break;
2855         }
2856
2857         return 0;
2858 }
2859
2860 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2861                                 struct hl_cb_mgr *cb_mgr, u64 timeout_us,
2862                                 u64 cq_counters_handle, u64 cq_counters_offset,
2863                                 u64 target_value, struct hl_user_interrupt *interrupt,
2864                                 u32 *status, u64 *timestamp)
2865 {
2866         struct hl_user_pending_interrupt *pend;
2867         unsigned long timeout, flags;
2868         long completion_rc;
2869         struct hl_cb *cb;
2870         int rc = 0;
2871         u32 handle;
2872
2873         timeout = hl_usecs64_to_jiffies(timeout_us);
2874
2875         hl_ctx_get(hdev, ctx);
2876
2877         cq_counters_handle >>= PAGE_SHIFT;
2878         handle = (u32) cq_counters_handle;
2879
2880         cb = hl_cb_get(hdev, cb_mgr, handle);
2881         if (!cb) {
2882                 hl_ctx_put(ctx);
2883                 return -EINVAL;
2884         }
2885
2886         pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2887         if (!pend) {
2888                 hl_cb_put(cb);
2889                 hl_ctx_put(ctx);
2890                 return -ENOMEM;
2891         }
2892
2893         hl_fence_init(&pend->fence, ULONG_MAX);
2894
2895         pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
2896         pend->cq_target_value = target_value;
2897
2898         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2899
2900         /* We check for completion value as interrupt could have been received
2901          * before we added the node to the wait list
2902          */
2903         if (*pend->cq_kernel_addr >= target_value) {
2904                 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2905
2906                 *status = HL_WAIT_CS_STATUS_COMPLETED;
2907                 /* There was no interrupt, we assume the completion is now. */
2908                 pend->fence.timestamp = ktime_get();
2909                 goto set_timestamp;
2910
2911         } else if (!timeout_us) {
2912                 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2913                 *status = HL_WAIT_CS_STATUS_BUSY;
2914                 pend->fence.timestamp = ktime_get();
2915                 goto set_timestamp;
2916         }
2917
2918         /* Add pending user interrupt to relevant list for the interrupt
2919          * handler to monitor
2920          */
2921         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2922         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2923
2924         /* Wait for interrupt handler to signal completion */
2925         completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
2926                                                                 timeout);
2927         if (completion_rc > 0) {
2928                 *status = HL_WAIT_CS_STATUS_COMPLETED;
2929         } else {
2930                 if (completion_rc == -ERESTARTSYS) {
2931                         dev_err_ratelimited(hdev->dev,
2932                                         "user process got signal while waiting for interrupt ID %d\n",
2933                                         interrupt->interrupt_id);
2934                         rc = -EINTR;
2935                         *status = HL_WAIT_CS_STATUS_ABORTED;
2936                 } else {
2937                         if (pend->fence.error == -EIO) {
2938                                 dev_err_ratelimited(hdev->dev,
2939                                                 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
2940                                                 pend->fence.error);
2941                                 rc = -EIO;
2942                                 *status = HL_WAIT_CS_STATUS_ABORTED;
2943                         } else {
2944                                 /* The wait has timed-out. We don't know anything beyond that
2945                                  * because the workload wasn't submitted through the driver.
2946                                  * Therefore, from driver's perspective, the workload is still
2947                                  * executing.
2948                                  */
2949                                 rc = 0;
2950                                 *status = HL_WAIT_CS_STATUS_BUSY;
2951                         }
2952                 }
2953         }
2954
2955         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2956         list_del(&pend->wait_list_node);
2957         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
2958
2959 set_timestamp:
2960         *timestamp = ktime_to_ns(pend->fence.timestamp);
2961
2962         kfree(pend);
2963         hl_cb_put(cb);
2964         hl_ctx_put(ctx);
2965
2966         return rc;
2967 }
2968
2969 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
2970                                 u64 timeout_us, u64 user_address,
2971                                 u64 target_value, struct hl_user_interrupt *interrupt,
2972
2973                                 u32 *status,
2974                                 u64 *timestamp)
2975 {
2976         struct hl_user_pending_interrupt *pend;
2977         unsigned long timeout, flags;
2978         u64 completion_value;
2979         long completion_rc;
2980         int rc = 0;
2981
2982         timeout = hl_usecs64_to_jiffies(timeout_us);
2983
2984         hl_ctx_get(hdev, ctx);
2985
2986         pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2987         if (!pend) {
2988                 hl_ctx_put(ctx);
2989                 return -ENOMEM;
2990         }
2991
2992         hl_fence_init(&pend->fence, ULONG_MAX);
2993
2994         /* Add pending user interrupt to relevant list for the interrupt
2995          * handler to monitor
2996          */
2997         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
2998         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2999         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3000
3001         /* We check for completion value as interrupt could have been received
3002          * before we added the node to the wait list
3003          */
3004         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3005                 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3006                 rc = -EFAULT;
3007                 goto remove_pending_user_interrupt;
3008         }
3009
3010         if (completion_value >= target_value) {
3011                 *status = HL_WAIT_CS_STATUS_COMPLETED;
3012                 /* There was no interrupt, we assume the completion is now. */
3013                 pend->fence.timestamp = ktime_get();
3014         } else {
3015                 *status = HL_WAIT_CS_STATUS_BUSY;
3016         }
3017
3018         if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3019                 goto remove_pending_user_interrupt;
3020
3021 wait_again:
3022         /* Wait for interrupt handler to signal completion */
3023         completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3024                                                                                 timeout);
3025
3026         /* If timeout did not expire we need to perform the comparison.
3027          * If comparison fails, keep waiting until timeout expires
3028          */
3029         if (completion_rc > 0) {
3030                 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3031                 /* reinit_completion must be called before we check for user
3032                  * completion value, otherwise, if interrupt is received after
3033                  * the comparison and before the next wait_for_completion,
3034                  * we will reach timeout and fail
3035                  */
3036                 reinit_completion(&pend->fence.completion);
3037                 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3038
3039                 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3040                         dev_err(hdev->dev, "Failed to copy completion value from user\n");
3041                         rc = -EFAULT;
3042
3043                         goto remove_pending_user_interrupt;
3044                 }
3045
3046                 if (completion_value >= target_value) {
3047                         *status = HL_WAIT_CS_STATUS_COMPLETED;
3048                 } else if (pend->fence.error) {
3049                         dev_err_ratelimited(hdev->dev,
3050                                 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3051                                 pend->fence.error);
3052                         /* set the command completion status as ABORTED */
3053                         *status = HL_WAIT_CS_STATUS_ABORTED;
3054                 } else {
3055                         timeout = completion_rc;
3056                         goto wait_again;
3057                 }
3058         } else if (completion_rc == -ERESTARTSYS) {
3059                 dev_err_ratelimited(hdev->dev,
3060                         "user process got signal while waiting for interrupt ID %d\n",
3061                         interrupt->interrupt_id);
3062                 rc = -EINTR;
3063         } else {
3064                 /* The wait has timed-out. We don't know anything beyond that
3065                  * because the workload wasn't submitted through the driver.
3066                  * Therefore, from driver's perspective, the workload is still
3067                  * executing.
3068                  */
3069                 rc = 0;
3070                 *status = HL_WAIT_CS_STATUS_BUSY;
3071         }
3072
3073 remove_pending_user_interrupt:
3074         spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3075         list_del(&pend->wait_list_node);
3076         spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3077
3078         *timestamp = ktime_to_ns(pend->fence.timestamp);
3079
3080         kfree(pend);
3081         hl_ctx_put(ctx);
3082
3083         return rc;
3084 }
3085
3086 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3087 {
3088         u16 interrupt_id, first_interrupt, last_interrupt;
3089         struct hl_device *hdev = hpriv->hdev;
3090         struct asic_fixed_properties *prop;
3091         struct hl_user_interrupt *interrupt;
3092         union hl_wait_cs_args *args = data;
3093         u32 status = HL_WAIT_CS_STATUS_BUSY;
3094         u64 timestamp;
3095         int rc;
3096
3097         prop = &hdev->asic_prop;
3098
3099         if (!prop->user_interrupt_count) {
3100                 dev_err(hdev->dev, "no user interrupts allowed");
3101                 return -EPERM;
3102         }
3103
3104         interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3105
3106         first_interrupt = prop->first_available_user_msix_interrupt;
3107         last_interrupt = prop->first_available_user_msix_interrupt +
3108                                                 prop->user_interrupt_count - 1;
3109
3110         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
3111                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
3112                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3113                 return -EINVAL;
3114         }
3115
3116         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
3117                 interrupt = &hdev->common_user_interrupt;
3118         else
3119                 interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
3120
3121         if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
3122                 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
3123                                 args->in.interrupt_timeout_us, args->in.cq_counters_handle,
3124                                 args->in.cq_counters_offset,
3125                                 args->in.target, interrupt, &status,
3126                                 &timestamp);
3127         else
3128                 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3129                                 args->in.interrupt_timeout_us, args->in.addr,
3130                                 args->in.target, interrupt, &status,
3131                                 &timestamp);
3132         if (rc)
3133                 return rc;
3134
3135         memset(args, 0, sizeof(*args));
3136         args->out.status = status;
3137
3138         if (timestamp) {
3139                 args->out.timestamp_nsec = timestamp;
3140                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3141         }
3142
3143         return 0;
3144 }
3145
3146 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3147 {
3148         union hl_wait_cs_args *args = data;
3149         u32 flags = args->in.flags;
3150         int rc;
3151
3152         /* If the device is not operational, no point in waiting for any command submission or
3153          * user interrupt
3154          */
3155         if (!hl_device_operational(hpriv->hdev, NULL))
3156                 return -EBUSY;
3157
3158         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3159                 rc = hl_interrupt_wait_ioctl(hpriv, data);
3160         else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3161                 rc = hl_multi_cs_wait_ioctl(hpriv, data);
3162         else
3163                 rc = hl_cs_wait_ioctl(hpriv, data);
3164
3165         return rc;
3166 }