drivers/misc/habanalabs/common/command_submission.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include <uapi/misc/habanalabs.h>
   9 #include "habanalabs.h"
  10
  11 #include <linux/uaccess.h>
  12 #include <linux/slab.h>
  13
  14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
  15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
  16
  17 /**
  18  * enum hl_cs_wait_status - cs wait status
  19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
  20  * @CS_WAIT_STATUS_COMPLETED: cs completed
  21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
  22  */
  23 enum hl_cs_wait_status {
  24         CS_WAIT_STATUS_BUSY,
  25         CS_WAIT_STATUS_COMPLETED,
  26         CS_WAIT_STATUS_GONE
  27 };
  28
  29 static void job_wq_completion(struct work_struct *work);
  30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
  31                                 u64 timeout_us, u64 seq,
  32                                 enum hl_cs_wait_status *status, s64 *timestamp);
  33 static void cs_do_release(struct kref *ref);
  34
  35 static void hl_sob_reset(struct kref *ref)
  36 {
  37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  38                                                         kref);
  39         struct hl_device *hdev = hw_sob->hdev;
  40
  41         hdev->asic_funcs->reset_sob(hdev, hw_sob);
  42 }
  43
  44 void hl_sob_reset_error(struct kref *ref)
  45 {
  46         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  47                                                         kref);
  48         struct hl_device *hdev = hw_sob->hdev;
  49
  50         dev_crit(hdev->dev,
  51                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
  52                 hw_sob->q_idx, hw_sob->sob_id);
  53 }
  54
  55 /**
  56  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
  57  * @sob_base: sob base id
  58  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
  59  * @mask: generated mask
  60  *
  61  * Return: 0 if given parameters are valid
  62  */
  63 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
  64 {
  65         int i;
  66
  67         if (sob_mask == 0)
  68                 return -EINVAL;
  69
  70         if (sob_mask == 0x1) {
  71                 *mask = ~(1 << (sob_base & 0x7));
  72         } else {
  73                 /* find msb in order to verify sob range is valid */
  74                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
  75                         if (BIT(i) & sob_mask)
  76                                 break;
  77
  78                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
  79                         return -EINVAL;
  80
  81                 *mask = ~sob_mask;
  82         }
  83
  84         return 0;
  85 }
  86
  87 static void sob_reset_work(struct work_struct *work)
  88 {
  89         struct hl_cs_compl *hl_cs_cmpl =
  90                 container_of(work, struct hl_cs_compl, sob_reset_work);
  91         struct hl_device *hdev = hl_cs_cmpl->hdev;
  92
  93         /*
  94          * A signal CS can get completion while the corresponding wait
  95          * for signal CS is on its way to the PQ. The wait for signal CS
  96          * will get stuck if the signal CS incremented the SOB to its
  97          * max value and there are no pending (submitted) waits on this
  98          * SOB.
  99          * We do the following to void this situation:
 100          * 1. The wait for signal CS must get a ref for the signal CS as
 101          *    soon as possible in cs_ioctl_signal_wait() and put it
 102          *    before being submitted to the PQ but after it incremented
 103          *    the SOB refcnt in init_signal_wait_cs().
 104          * 2. Signal/Wait for signal CS will decrement the SOB refcnt
 105          *    here.
 106          * These two measures guarantee that the wait for signal CS will
 107          * reset the SOB upon completion rather than the signal CS and
 108          * hence the above scenario is avoided.
 109          */
 110         kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
 111
 112         if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
 113                 hdev->asic_funcs->reset_sob_group(hdev,
 114                                 hl_cs_cmpl->sob_group);
 115
 116         kfree(hl_cs_cmpl);
 117 }
 118
 119 static void hl_fence_release(struct kref *kref)
 120 {
 121         struct hl_fence *fence =
 122                 container_of(kref, struct hl_fence, refcount);
 123         struct hl_cs_compl *hl_cs_cmpl =
 124                 container_of(fence, struct hl_cs_compl, base_fence);
 125         struct hl_device *hdev = hl_cs_cmpl->hdev;
 126
 127         /* EBUSY means the CS was never submitted and hence we don't have
 128          * an attached hw_sob object that we should handle here
 129          */
 130         if (fence->error == -EBUSY)
 131                 goto free;
 132
 133         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 134                 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
 135                 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
 136
 137                 dev_dbg(hdev->dev,
 138                         "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
 139                         hl_cs_cmpl->cs_seq,
 140                         hl_cs_cmpl->type,
 141                         hl_cs_cmpl->hw_sob->sob_id,
 142                         hl_cs_cmpl->sob_val);
 143
 144                 queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
 145
 146                 return;
 147         }
 148
 149 free:
 150         kfree(hl_cs_cmpl);
 151 }
 152
 153 void hl_fence_put(struct hl_fence *fence)
 154 {
 155         if (fence)
 156                 kref_put(&fence->refcount, hl_fence_release);
 157 }
 158
 159 void hl_fence_get(struct hl_fence *fence)
 160 {
 161         if (fence)
 162                 kref_get(&fence->refcount);
 163 }
 164
 165 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 166 {
 167         kref_init(&fence->refcount);
 168         fence->cs_sequence = sequence;
 169         fence->error = 0;
 170         fence->timestamp = ktime_set(0, 0);
 171         init_completion(&fence->completion);
 172 }
 173
 174 void cs_get(struct hl_cs *cs)
 175 {
 176         kref_get(&cs->refcount);
 177 }
 178
 179 static int cs_get_unless_zero(struct hl_cs *cs)
 180 {
 181         return kref_get_unless_zero(&cs->refcount);
 182 }
 183
 184 static void cs_put(struct hl_cs *cs)
 185 {
 186         kref_put(&cs->refcount, cs_do_release);
 187 }
 188
 189 static void cs_job_do_release(struct kref *ref)
 190 {
 191         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
 192
 193         kfree(job);
 194 }
 195
 196 static void cs_job_put(struct hl_cs_job *job)
 197 {
 198         kref_put(&job->refcount, cs_job_do_release);
 199 }
 200
 201 bool cs_needs_completion(struct hl_cs *cs)
 202 {
 203         /* In case this is a staged CS, only the last CS in sequence should
 204          * get a completion, any non staged CS will always get a completion
 205          */
 206         if (cs->staged_cs && !cs->staged_last)
 207                 return false;
 208
 209         return true;
 210 }
 211
 212 bool cs_needs_timeout(struct hl_cs *cs)
 213 {
 214         /* In case this is a staged CS, only the first CS in sequence should
 215          * get a timeout, any non staged CS will always get a timeout
 216          */
 217         if (cs->staged_cs && !cs->staged_first)
 218                 return false;
 219
 220         return true;
 221 }
 222
 223 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 224 {
 225         /*
 226          * Patched CB is created for external queues jobs, and for H/W queues
 227          * jobs if the user CB was allocated by driver and MMU is disabled.
 228          */
 229         return (job->queue_type == QUEUE_TYPE_EXT ||
 230                         (job->queue_type == QUEUE_TYPE_HW &&
 231                                         job->is_kernel_allocated_cb &&
 232                                         !hdev->mmu_enable));
 233 }
 234
 235 /*
 236  * cs_parser - parse the user command submission
 237  *
 238  * @hpriv       : pointer to the private data of the fd
 239  * @job        : pointer to the job that holds the command submission info
 240  *
 241  * The function parses the command submission of the user. It calls the
 242  * ASIC specific parser, which returns a list of memory blocks to send
 243  * to the device as different command buffers
 244  *
 245  */
 246 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 247 {
 248         struct hl_device *hdev = hpriv->hdev;
 249         struct hl_cs_parser parser;
 250         int rc;
 251
 252         parser.ctx_id = job->cs->ctx->asid;
 253         parser.cs_sequence = job->cs->sequence;
 254         parser.job_id = job->id;
 255
 256         parser.hw_queue_id = job->hw_queue_id;
 257         parser.job_userptr_list = &job->userptr_list;
 258         parser.patched_cb = NULL;
 259         parser.user_cb = job->user_cb;
 260         parser.user_cb_size = job->user_cb_size;
 261         parser.queue_type = job->queue_type;
 262         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 263         job->patched_cb = NULL;
 264         parser.completion = cs_needs_completion(job->cs);
 265
 266         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 267
 268         if (is_cb_patched(hdev, job)) {
 269                 if (!rc) {
 270                         job->patched_cb = parser.patched_cb;
 271                         job->job_cb_size = parser.patched_cb_size;
 272                         job->contains_dma_pkt = parser.contains_dma_pkt;
 273                         atomic_inc(&job->patched_cb->cs_cnt);
 274                 }
 275
 276                 /*
 277                  * Whether the parsing worked or not, we don't need the
 278                  * original CB anymore because it was already parsed and
 279                  * won't be accessed again for this CS
 280                  */
 281                 atomic_dec(&job->user_cb->cs_cnt);
 282                 hl_cb_put(job->user_cb);
 283                 job->user_cb = NULL;
 284         } else if (!rc) {
 285                 job->job_cb_size = job->user_cb_size;
 286         }
 287
 288         return rc;
 289 }
 290
 291 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 292 {
 293         struct hl_cs *cs = job->cs;
 294
 295         if (is_cb_patched(hdev, job)) {
 296                 hl_userptr_delete_list(hdev, &job->userptr_list);
 297
 298                 /*
 299                  * We might arrive here from rollback and patched CB wasn't
 300                  * created, so we need to check it's not NULL
 301                  */
 302                 if (job->patched_cb) {
 303                         atomic_dec(&job->patched_cb->cs_cnt);
 304                         hl_cb_put(job->patched_cb);
 305                 }
 306         }
 307
 308         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
 309          * enabled, the user CB isn't released in cs_parser() and thus should be
 310          * released here.
 311          * This is also true for INT queues jobs which were allocated by driver
 312          */
 313         if (job->is_kernel_allocated_cb &&
 314                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
 315                                 job->queue_type == QUEUE_TYPE_INT)) {
 316                 atomic_dec(&job->user_cb->cs_cnt);
 317                 hl_cb_put(job->user_cb);
 318         }
 319
 320         /*
 321          * This is the only place where there can be multiple threads
 322          * modifying the list at the same time
 323          */
 324         spin_lock(&cs->job_lock);
 325         list_del(&job->cs_node);
 326         spin_unlock(&cs->job_lock);
 327
 328         hl_debugfs_remove_job(hdev, job);
 329
 330         /* We decrement reference only for a CS that gets completion
 331          * because the reference was incremented only for this kind of CS
 332          * right before it was scheduled.
 333          *
 334          * In staged submission, only the last CS marked as 'staged_last'
 335          * gets completion, hence its release function will be called from here.
 336          * As for all the rest CS's in the staged submission which do not get
 337          * completion, their CS reference will be decremented by the
 338          * 'staged_last' CS during the CS release flow.
 339          * All relevant PQ CI counters will be incremented during the CS release
 340          * flow by calling 'hl_hw_queue_update_ci'.
 341          */
 342         if (cs_needs_completion(cs) &&
 343                 (job->queue_type == QUEUE_TYPE_EXT ||
 344                         job->queue_type == QUEUE_TYPE_HW))
 345                 cs_put(cs);
 346
 347         cs_job_put(job);
 348 }
 349
 350 /*
 351  * hl_staged_cs_find_first - locate the first CS in this staged submission
 352  *
 353  * @hdev: pointer to device structure
 354  * @cs_seq: staged submission sequence number
 355  *
 356  * @note: This function must be called under 'hdev->cs_mirror_lock'
 357  *
 358  * Find and return a CS pointer with the given sequence
 359  */
 360 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
 361 {
 362         struct hl_cs *cs;
 363
 364         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
 365                 if (cs->staged_cs && cs->staged_first &&
 366                                 cs->sequence == cs_seq)
 367                         return cs;
 368
 369         return NULL;
 370 }
 371
 372 /*
 373  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
 374  *
 375  * @hdev: pointer to device structure
 376  * @cs: staged submission member
 377  *
 378  */
 379 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
 380 {
 381         struct hl_cs *last_entry;
 382
 383         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
 384                                                                 staged_cs_node);
 385
 386         if (last_entry->staged_last)
 387                 return true;
 388
 389         return false;
 390 }
 391
 392 /*
 393  * staged_cs_get - get CS reference if this CS is a part of a staged CS
 394  *
 395  * @hdev: pointer to device structure
 396  * @cs: current CS
 397  * @cs_seq: staged submission sequence number
 398  *
 399  * Increment CS reference for every CS in this staged submission except for
 400  * the CS which get completion.
 401  */
 402 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
 403 {
 404         /* Only the last CS in this staged submission will get a completion.
 405          * We must increment the reference for all other CS's in this
 406          * staged submission.
 407          * Once we get a completion we will release the whole staged submission.
 408          */
 409         if (!cs->staged_last)
 410                 cs_get(cs);
 411 }
 412
 413 /*
 414  * staged_cs_put - put a CS in case it is part of staged submission
 415  *
 416  * @hdev: pointer to device structure
 417  * @cs: CS to put
 418  *
 419  * This function decrements a CS reference (for a non completion CS)
 420  */
 421 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
 422 {
 423         /* We release all CS's in a staged submission except the last
 424          * CS which we have never incremented its reference.
 425          */
 426         if (!cs_needs_completion(cs))
 427                 cs_put(cs);
 428 }
 429
 430 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 431 {
 432         bool next_entry_found = false;
 433         struct hl_cs *next;
 434
 435         if (!cs_needs_timeout(cs))
 436                 return;
 437
 438         spin_lock(&hdev->cs_mirror_lock);
 439
 440         /* We need to handle tdr only once for the complete staged submission.
 441          * Hence, we choose the CS that reaches this function first which is
 442          * the CS marked as 'staged_last'.
 443          */
 444         if (cs->staged_cs && cs->staged_last)
 445                 cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
 446
 447         spin_unlock(&hdev->cs_mirror_lock);
 448
 449         /* Don't cancel TDR in case this CS was timedout because we might be
 450          * running from the TDR context
 451          */
 452         if (cs && (cs->timedout ||
 453                         hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
 454                 return;
 455
 456         if (cs && cs->tdr_active)
 457                 cancel_delayed_work_sync(&cs->work_tdr);
 458
 459         spin_lock(&hdev->cs_mirror_lock);
 460
 461         /* queue TDR for next CS */
 462         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
 463                 if (cs_needs_timeout(next)) {
 464                         next_entry_found = true;
 465                         break;
 466                 }
 467
 468         if (next_entry_found && !next->tdr_active) {
 469                 next->tdr_active = true;
 470                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
 471         }
 472
 473         spin_unlock(&hdev->cs_mirror_lock);
 474 }
 475
 476 static void cs_do_release(struct kref *ref)
 477 {
 478         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
 479         struct hl_device *hdev = cs->ctx->hdev;
 480         struct hl_cs_job *job, *tmp;
 481
 482         cs->completed = true;
 483
 484         /*
 485          * Although if we reached here it means that all external jobs have
 486          * finished, because each one of them took refcnt to CS, we still
 487          * need to go over the internal jobs and complete them. Otherwise, we
 488          * will have leaked memory and what's worse, the CS object (and
 489          * potentially the CTX object) could be released, while the JOB
 490          * still holds a pointer to them (but no reference).
 491          */
 492         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 493                 complete_job(hdev, job);
 494
 495         if (!cs->submitted) {
 496                 /* In case the wait for signal CS was submitted, the put occurs
 497                  * in init_signal_wait_cs() or collective_wait_init_cs()
 498                  * right before hanging on the PQ.
 499                  */
 500                 if (cs->type == CS_TYPE_WAIT ||
 501                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
 502                         hl_fence_put(cs->signal_fence);
 503
 504                 goto out;
 505         }
 506
 507         /* Need to update CI for all queue jobs that does not get completion */
 508         hl_hw_queue_update_ci(cs);
 509
 510         /* remove CS from CS mirror list */
 511         spin_lock(&hdev->cs_mirror_lock);
 512         list_del_init(&cs->mirror_node);
 513         spin_unlock(&hdev->cs_mirror_lock);
 514
 515         cs_handle_tdr(hdev, cs);
 516
 517         if (cs->staged_cs) {
 518                 /* the completion CS decrements reference for the entire
 519                  * staged submission
 520                  */
 521                 if (cs->staged_last) {
 522                         struct hl_cs *staged_cs, *tmp;
 523
 524                         list_for_each_entry_safe(staged_cs, tmp,
 525                                         &cs->staged_cs_node, staged_cs_node)
 526                                 staged_cs_put(hdev, staged_cs);
 527                 }
 528
 529                 /* A staged CS will be a member in the list only after it
 530                  * was submitted. We used 'cs_mirror_lock' when inserting
 531                  * it to list so we will use it again when removing it
 532                  */
 533                 if (cs->submitted) {
 534                         spin_lock(&hdev->cs_mirror_lock);
 535                         list_del(&cs->staged_cs_node);
 536                         spin_unlock(&hdev->cs_mirror_lock);
 537                 }
 538         }
 539
 540 out:
 541         /* Must be called before hl_ctx_put because inside we use ctx to get
 542          * the device
 543          */
 544         hl_debugfs_remove_cs(cs);
 545
 546         hl_ctx_put(cs->ctx);
 547
 548         /* We need to mark an error for not submitted because in that case
 549          * the hl fence release flow is different. Mainly, we don't need
 550          * to handle hw_sob for signal/wait
 551          */
 552         if (cs->timedout)
 553                 cs->fence->error = -ETIMEDOUT;
 554         else if (cs->aborted)
 555                 cs->fence->error = -EIO;
 556         else if (!cs->submitted)
 557                 cs->fence->error = -EBUSY;
 558
 559         if (cs->timestamp)
 560                 cs->fence->timestamp = ktime_get();
 561         complete_all(&cs->fence->completion);
 562         hl_fence_put(cs->fence);
 563
 564         kfree(cs->jobs_in_queue_cnt);
 565         kfree(cs);
 566 }
 567
 568 static void cs_timedout(struct work_struct *work)
 569 {
 570         struct hl_device *hdev;
 571         int rc;
 572         struct hl_cs *cs = container_of(work, struct hl_cs,
 573                                                  work_tdr.work);
 574         rc = cs_get_unless_zero(cs);
 575         if (!rc)
 576                 return;
 577
 578         if ((!cs->submitted) || (cs->completed)) {
 579                 cs_put(cs);
 580                 return;
 581         }
 582
 583         /* Mark the CS is timed out so we won't try to cancel its TDR */
 584         cs->timedout = true;
 585
 586         hdev = cs->ctx->hdev;
 587
 588         switch (cs->type) {
 589         case CS_TYPE_SIGNAL:
 590                 dev_err(hdev->dev,
 591                         "Signal command submission %llu has not finished in time!\n",
 592                         cs->sequence);
 593                 break;
 594
 595         case CS_TYPE_WAIT:
 596                 dev_err(hdev->dev,
 597                         "Wait command submission %llu has not finished in time!\n",
 598                         cs->sequence);
 599                 break;
 600
 601         case CS_TYPE_COLLECTIVE_WAIT:
 602                 dev_err(hdev->dev,
 603                         "Collective Wait command submission %llu has not finished in time!\n",
 604                         cs->sequence);
 605                 break;
 606
 607         default:
 608                 dev_err(hdev->dev,
 609                         "Command submission %llu has not finished in time!\n",
 610                         cs->sequence);
 611                 break;
 612         }
 613
 614         cs_put(cs);
 615
 616         if (hdev->reset_on_lockup)
 617                 hl_device_reset(hdev, 0);
 618         else
 619                 hdev->needs_reset = true;
 620 }
 621
 622 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 623                         enum hl_cs_type cs_type, u64 user_sequence,
 624                         struct hl_cs **cs_new, u32 flags, u32 timeout)
 625 {
 626         struct hl_cs_counters_atomic *cntr;
 627         struct hl_fence *other = NULL;
 628         struct hl_cs_compl *cs_cmpl;
 629         struct hl_cs *cs;
 630         int rc;
 631
 632         cntr = &hdev->aggregated_cs_counters;
 633
 634         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 635         if (!cs)
 636                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 637
 638         if (!cs) {
 639                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 640                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 641                 return -ENOMEM;
 642         }
 643
 644         /* increment refcnt for context */
 645         hl_ctx_get(hdev, ctx);
 646
 647         cs->ctx = ctx;
 648         cs->submitted = false;
 649         cs->completed = false;
 650         cs->type = cs_type;
 651         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 652         cs->timeout_jiffies = timeout;
 653         INIT_LIST_HEAD(&cs->job_list);
 654         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 655         kref_init(&cs->refcount);
 656         spin_lock_init(&cs->job_lock);
 657
 658         cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 659         if (!cs_cmpl)
 660                 cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 661
 662         if (!cs_cmpl) {
 663                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 664                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 665                 rc = -ENOMEM;
 666                 goto free_cs;
 667         }
 668
 669         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 670                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 671         if (!cs->jobs_in_queue_cnt)
 672                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 673                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
 674
 675         if (!cs->jobs_in_queue_cnt) {
 676                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 677                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 678                 rc = -ENOMEM;
 679                 goto free_cs_cmpl;
 680         }
 681
 682         cs_cmpl->hdev = hdev;
 683         cs_cmpl->type = cs->type;
 684         spin_lock_init(&cs_cmpl->lock);
 685         INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
 686         cs->fence = &cs_cmpl->base_fence;
 687
 688         spin_lock(&ctx->cs_lock);
 689
 690         cs_cmpl->cs_seq = ctx->cs_sequence;
 691         other = ctx->cs_pending[cs_cmpl->cs_seq &
 692                                 (hdev->asic_prop.max_pending_cs - 1)];
 693
 694         if (other && !completion_done(&other->completion)) {
 695                 /* If the following statement is true, it means we have reached
 696                  * a point in which only part of the staged submission was
 697                  * submitted and we don't have enough room in the 'cs_pending'
 698                  * array for the rest of the submission.
 699                  * This causes a deadlock because this CS will never be
 700                  * completed as it depends on future CS's for completion.
 701                  */
 702                 if (other->cs_sequence == user_sequence)
 703                         dev_crit_ratelimited(hdev->dev,
 704                                 "Staged CS %llu deadlock due to lack of resources",
 705                                 user_sequence);
 706
 707                 dev_dbg_ratelimited(hdev->dev,
 708                         "Rejecting CS because of too many in-flights CS\n");
 709                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 710                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 711                 rc = -EAGAIN;
 712                 goto free_fence;
 713         }
 714
 715         /* init hl_fence */
 716         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 717
 718         cs->sequence = cs_cmpl->cs_seq;
 719
 720         ctx->cs_pending[cs_cmpl->cs_seq &
 721                         (hdev->asic_prop.max_pending_cs - 1)] =
 722                                                         &cs_cmpl->base_fence;
 723         ctx->cs_sequence++;
 724
 725         hl_fence_get(&cs_cmpl->base_fence);
 726
 727         hl_fence_put(other);
 728
 729         spin_unlock(&ctx->cs_lock);
 730
 731         *cs_new = cs;
 732
 733         return 0;
 734
 735 free_fence:
 736         spin_unlock(&ctx->cs_lock);
 737         kfree(cs->jobs_in_queue_cnt);
 738 free_cs_cmpl:
 739         kfree(cs_cmpl);
 740 free_cs:
 741         kfree(cs);
 742         hl_ctx_put(ctx);
 743         return rc;
 744 }
 745
 746 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 747 {
 748         struct hl_cs_job *job, *tmp;
 749
 750         staged_cs_put(hdev, cs);
 751
 752         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 753                 complete_job(hdev, job);
 754 }
 755
 756 void hl_cs_rollback_all(struct hl_device *hdev)
 757 {
 758         int i;
 759         struct hl_cs *cs, *tmp;
 760
 761         flush_workqueue(hdev->sob_reset_wq);
 762
 763         /* flush all completions before iterating over the CS mirror list in
 764          * order to avoid a race with the release functions
 765          */
 766         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 767                 flush_workqueue(hdev->cq_wq[i]);
 768
 769         /* Make sure we don't have leftovers in the CS mirror list */
 770         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
 771                 cs_get(cs);
 772                 cs->aborted = true;
 773                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
 774                                 cs->ctx->asid, cs->sequence);
 775                 cs_rollback(hdev, cs);
 776                 cs_put(cs);
 777         }
 778 }
 779
 780 void hl_pending_cb_list_flush(struct hl_ctx *ctx)
 781 {
 782         struct hl_pending_cb *pending_cb, *tmp;
 783
 784         list_for_each_entry_safe(pending_cb, tmp,
 785                         &ctx->pending_cb_list, cb_node) {
 786                 list_del(&pending_cb->cb_node);
 787                 hl_cb_put(pending_cb->cb);
 788                 kfree(pending_cb);
 789         }
 790 }
 791
 792 static void
 793 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 794 {
 795         struct hl_user_pending_interrupt *pend;
 796
 797         spin_lock(&interrupt->wait_list_lock);
 798         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
 799                 pend->fence.error = -EIO;
 800                 complete_all(&pend->fence.completion);
 801         }
 802         spin_unlock(&interrupt->wait_list_lock);
 803 }
 804
 805 void hl_release_pending_user_interrupts(struct hl_device *hdev)
 806 {
 807         struct asic_fixed_properties *prop = &hdev->asic_prop;
 808         struct hl_user_interrupt *interrupt;
 809         int i;
 810
 811         if (!prop->user_interrupt_count)
 812                 return;
 813
 814         /* We iterate through the user interrupt requests and waking up all
 815          * user threads waiting for interrupt completion. We iterate the
 816          * list under a lock, this is why all user threads, once awake,
 817          * will wait on the same lock and will release the waiting object upon
 818          * unlock.
 819          */
 820
 821         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
 822                 interrupt = &hdev->user_interrupt[i];
 823                 wake_pending_user_interrupt_threads(interrupt);
 824         }
 825
 826         interrupt = &hdev->common_user_interrupt;
 827         wake_pending_user_interrupt_threads(interrupt);
 828 }
 829
 830 static void job_wq_completion(struct work_struct *work)
 831 {
 832         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
 833                                                 finish_work);
 834         struct hl_cs *cs = job->cs;
 835         struct hl_device *hdev = cs->ctx->hdev;
 836
 837         /* job is no longer needed */
 838         complete_job(hdev, job);
 839 }
 840
 841 static int validate_queue_index(struct hl_device *hdev,
 842                                 struct hl_cs_chunk *chunk,
 843                                 enum hl_queue_type *queue_type,
 844                                 bool *is_kernel_allocated_cb)
 845 {
 846         struct asic_fixed_properties *asic = &hdev->asic_prop;
 847         struct hw_queue_properties *hw_queue_prop;
 848
 849         /* This must be checked here to prevent out-of-bounds access to
 850          * hw_queues_props array
 851          */
 852         if (chunk->queue_index >= asic->max_queues) {
 853                 dev_err(hdev->dev, "Queue index %d is invalid\n",
 854                         chunk->queue_index);
 855                 return -EINVAL;
 856         }
 857
 858         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 859
 860         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
 861                 dev_err(hdev->dev, "Queue index %d is invalid\n",
 862                         chunk->queue_index);
 863                 return -EINVAL;
 864         }
 865
 866         if (hw_queue_prop->driver_only) {
 867                 dev_err(hdev->dev,
 868                         "Queue index %d is restricted for the kernel driver\n",
 869                         chunk->queue_index);
 870                 return -EINVAL;
 871         }
 872
 873         /* When hw queue type isn't QUEUE_TYPE_HW,
 874          * USER_ALLOC_CB flag shall be referred as "don't care".
 875          */
 876         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
 877                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
 878                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
 879                                 dev_err(hdev->dev,
 880                                         "Queue index %d doesn't support user CB\n",
 881                                         chunk->queue_index);
 882                                 return -EINVAL;
 883                         }
 884
 885                         *is_kernel_allocated_cb = false;
 886                 } else {
 887                         if (!(hw_queue_prop->cb_alloc_flags &
 888                                         CB_ALLOC_KERNEL)) {
 889                                 dev_err(hdev->dev,
 890                                         "Queue index %d doesn't support kernel CB\n",
 891                                         chunk->queue_index);
 892                                 return -EINVAL;
 893                         }
 894
 895                         *is_kernel_allocated_cb = true;
 896                 }
 897         } else {
 898                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
 899                                                 & CB_ALLOC_KERNEL);
 900         }
 901
 902         *queue_type = hw_queue_prop->type;
 903         return 0;
 904 }
 905
 906 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
 907                                         struct hl_cb_mgr *cb_mgr,
 908                                         struct hl_cs_chunk *chunk)
 909 {
 910         struct hl_cb *cb;
 911         u32 cb_handle;
 912
 913         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
 914
 915         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
 916         if (!cb) {
 917                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
 918                 return NULL;
 919         }
 920
 921         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
 922                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
 923                 goto release_cb;
 924         }
 925
 926         atomic_inc(&cb->cs_cnt);
 927
 928         return cb;
 929
 930 release_cb:
 931         hl_cb_put(cb);
 932         return NULL;
 933 }
 934
 935 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 936                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
 937 {
 938         struct hl_cs_job *job;
 939
 940         job = kzalloc(sizeof(*job), GFP_ATOMIC);
 941         if (!job)
 942                 job = kzalloc(sizeof(*job), GFP_KERNEL);
 943
 944         if (!job)
 945                 return NULL;
 946
 947         kref_init(&job->refcount);
 948         job->queue_type = queue_type;
 949         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
 950
 951         if (is_cb_patched(hdev, job))
 952                 INIT_LIST_HEAD(&job->userptr_list);
 953
 954         if (job->queue_type == QUEUE_TYPE_EXT)
 955                 INIT_WORK(&job->finish_work, job_wq_completion);
 956
 957         return job;
 958 }
 959
 960 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 961 {
 962         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
 963                 return CS_TYPE_SIGNAL;
 964         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
 965                 return CS_TYPE_WAIT;
 966         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
 967                 return CS_TYPE_COLLECTIVE_WAIT;
 968         else
 969                 return CS_TYPE_DEFAULT;
 970 }
 971
 972 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 973 {
 974         struct hl_device *hdev = hpriv->hdev;
 975         struct hl_ctx *ctx = hpriv->ctx;
 976         u32 cs_type_flags, num_chunks;
 977         enum hl_device_status status;
 978         enum hl_cs_type cs_type;
 979
 980         if (!hl_device_operational(hdev, &status)) {
 981                 dev_warn_ratelimited(hdev->dev,
 982                         "Device is %s. Can't submit new CS\n",
 983                         hdev->status[status]);
 984                 return -EBUSY;
 985         }
 986
 987         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
 988                         !hdev->supports_staged_submission) {
 989                 dev_err(hdev->dev, "staged submission not supported");
 990                 return -EPERM;
 991         }
 992
 993         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
 994
 995         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
 996                 dev_err(hdev->dev,
 997                         "CS type flags are mutually exclusive, context %d\n",
 998                         ctx->asid);
 999                 return -EINVAL;
1000         }
1001
1002         cs_type = hl_cs_get_cs_type(cs_type_flags);
1003         num_chunks = args->in.num_chunks_execute;
1004
1005         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1006                                         !hdev->supports_sync_stream)) {
1007                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1008                 return -EINVAL;
1009         }
1010
1011         if (cs_type == CS_TYPE_DEFAULT) {
1012                 if (!num_chunks) {
1013                         dev_err(hdev->dev,
1014                                 "Got execute CS with 0 chunks, context %d\n",
1015                                 ctx->asid);
1016                         return -EINVAL;
1017                 }
1018         } else if (num_chunks != 1) {
1019                 dev_err(hdev->dev,
1020                         "Sync stream CS mandates one chunk only, context %d\n",
1021                         ctx->asid);
1022                 return -EINVAL;
1023         }
1024
1025         return 0;
1026 }
1027
1028 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1029                                         struct hl_cs_chunk **cs_chunk_array,
1030                                         void __user *chunks, u32 num_chunks,
1031                                         struct hl_ctx *ctx)
1032 {
1033         u32 size_to_copy;
1034
1035         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1036                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1037                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1038                 dev_err(hdev->dev,
1039                         "Number of chunks can NOT be larger than %d\n",
1040                         HL_MAX_JOBS_PER_CS);
1041                 return -EINVAL;
1042         }
1043
1044         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1045                                         GFP_ATOMIC);
1046         if (!*cs_chunk_array)
1047                 *cs_chunk_array = kmalloc_array(num_chunks,
1048                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1049         if (!*cs_chunk_array) {
1050                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1051                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1052                 return -ENOMEM;
1053         }
1054
1055         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1056         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1057                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1058                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1059                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1060                 kfree(*cs_chunk_array);
1061                 return -EFAULT;
1062         }
1063
1064         return 0;
1065 }
1066
1067 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1068                                 u64 sequence, u32 flags)
1069 {
1070         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1071                 return 0;
1072
1073         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1074         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1075
1076         if (cs->staged_first) {
1077                 /* Staged CS sequence is the first CS sequence */
1078                 INIT_LIST_HEAD(&cs->staged_cs_node);
1079                 cs->staged_sequence = cs->sequence;
1080         } else {
1081                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1082                  * under the cs_mirror_lock
1083                  */
1084                 cs->staged_sequence = sequence;
1085         }
1086
1087         /* Increment CS reference if needed */
1088         staged_cs_get(hdev, cs);
1089
1090         cs->staged_cs = true;
1091
1092         return 0;
1093 }
1094
1095 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1096                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1097                                 u32 timeout)
1098 {
1099         bool staged_mid, int_queues_only = true;
1100         struct hl_device *hdev = hpriv->hdev;
1101         struct hl_cs_chunk *cs_chunk_array;
1102         struct hl_cs_counters_atomic *cntr;
1103         struct hl_ctx *ctx = hpriv->ctx;
1104         struct hl_cs_job *job;
1105         struct hl_cs *cs;
1106         struct hl_cb *cb;
1107         u64 user_sequence;
1108         int rc, i;
1109
1110         cntr = &hdev->aggregated_cs_counters;
1111         user_sequence = *cs_seq;
1112         *cs_seq = ULLONG_MAX;
1113
1114         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1115                         hpriv->ctx);
1116         if (rc)
1117                 goto out;
1118
1119         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1120                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1121                 staged_mid = true;
1122         else
1123                 staged_mid = false;
1124
1125         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1126                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1127                         timeout);
1128         if (rc)
1129                 goto free_cs_chunk_array;
1130
1131         *cs_seq = cs->sequence;
1132
1133         hl_debugfs_add_cs(cs);
1134
1135         rc = cs_staged_submission(hdev, cs, user_sequence, flags);
1136         if (rc)
1137                 goto free_cs_object;
1138
1139         /* Validate ALL the CS chunks before submitting the CS */
1140         for (i = 0 ; i < num_chunks ; i++) {
1141                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1142                 enum hl_queue_type queue_type;
1143                 bool is_kernel_allocated_cb;
1144
1145                 rc = validate_queue_index(hdev, chunk, &queue_type,
1146                                                 &is_kernel_allocated_cb);
1147                 if (rc) {
1148                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1149                         atomic64_inc(&cntr->validation_drop_cnt);
1150                         goto free_cs_object;
1151                 }
1152
1153                 if (is_kernel_allocated_cb) {
1154                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1155                         if (!cb) {
1156                                 atomic64_inc(
1157                                         &ctx->cs_counters.validation_drop_cnt);
1158                                 atomic64_inc(&cntr->validation_drop_cnt);
1159                                 rc = -EINVAL;
1160                                 goto free_cs_object;
1161                         }
1162                 } else {
1163                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1164                 }
1165
1166                 if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
1167                         int_queues_only = false;
1168
1169                 job = hl_cs_allocate_job(hdev, queue_type,
1170                                                 is_kernel_allocated_cb);
1171                 if (!job) {
1172                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1173                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1174                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1175                         rc = -ENOMEM;
1176                         if (is_kernel_allocated_cb)
1177                                 goto release_cb;
1178
1179                         goto free_cs_object;
1180                 }
1181
1182                 job->id = i + 1;
1183                 job->cs = cs;
1184                 job->user_cb = cb;
1185                 job->user_cb_size = chunk->cb_size;
1186                 job->hw_queue_id = chunk->queue_index;
1187
1188                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1189
1190                 list_add_tail(&job->cs_node, &cs->job_list);
1191
1192                 /*
1193                  * Increment CS reference. When CS reference is 0, CS is
1194                  * done and can be signaled to user and free all its resources
1195                  * Only increment for JOB on external or H/W queues, because
1196                  * only for those JOBs we get completion
1197                  */
1198                 if (cs_needs_completion(cs) &&
1199                         (job->queue_type == QUEUE_TYPE_EXT ||
1200                                 job->queue_type == QUEUE_TYPE_HW))
1201                         cs_get(cs);
1202
1203                 hl_debugfs_add_job(hdev, job);
1204
1205                 rc = cs_parser(hpriv, job);
1206                 if (rc) {
1207                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1208                         atomic64_inc(&cntr->parsing_drop_cnt);
1209                         dev_err(hdev->dev,
1210                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1211                                 cs->ctx->asid, cs->sequence, job->id, rc);
1212                         goto free_cs_object;
1213                 }
1214         }
1215
1216         /* We allow a CS with any queue type combination as long as it does
1217          * not get a completion
1218          */
1219         if (int_queues_only && cs_needs_completion(cs)) {
1220                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1221                 atomic64_inc(&cntr->validation_drop_cnt);
1222                 dev_err(hdev->dev,
1223                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1224                         cs->ctx->asid, cs->sequence);
1225                 rc = -EINVAL;
1226                 goto free_cs_object;
1227         }
1228
1229         rc = hl_hw_queue_schedule_cs(cs);
1230         if (rc) {
1231                 if (rc != -EAGAIN)
1232                         dev_err(hdev->dev,
1233                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1234                                 cs->ctx->asid, cs->sequence, rc);
1235                 goto free_cs_object;
1236         }
1237
1238         rc = HL_CS_STATUS_SUCCESS;
1239         goto put_cs;
1240
1241 release_cb:
1242         atomic_dec(&cb->cs_cnt);
1243         hl_cb_put(cb);
1244 free_cs_object:
1245         cs_rollback(hdev, cs);
1246         *cs_seq = ULLONG_MAX;
1247         /* The path below is both for good and erroneous exits */
1248 put_cs:
1249         /* We finished with the CS in this function, so put the ref */
1250         cs_put(cs);
1251 free_cs_chunk_array:
1252         kfree(cs_chunk_array);
1253 out:
1254         return rc;
1255 }
1256
1257 static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
1258                 struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
1259 {
1260         struct hw_queue_properties *hw_queue_prop;
1261         struct hl_cs_counters_atomic *cntr;
1262         struct hl_cs_job *job;
1263
1264         hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
1265         cntr = &hdev->aggregated_cs_counters;
1266
1267         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1268         if (!job) {
1269                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1270                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1271                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1272                 return -ENOMEM;
1273         }
1274
1275         job->id = 0;
1276         job->cs = cs;
1277         job->user_cb = cb;
1278         atomic_inc(&job->user_cb->cs_cnt);
1279         job->user_cb_size = size;
1280         job->hw_queue_id = hw_queue_id;
1281         job->patched_cb = job->user_cb;
1282         job->job_cb_size = job->user_cb_size;
1283
1284         /* increment refcount as for external queues we get completion */
1285         cs_get(cs);
1286
1287         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1288
1289         list_add_tail(&job->cs_node, &cs->job_list);
1290
1291         hl_debugfs_add_job(hdev, job);
1292
1293         return 0;
1294 }
1295
1296 static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
1297 {
1298         struct hl_device *hdev = hpriv->hdev;
1299         struct hl_ctx *ctx = hpriv->ctx;
1300         struct hl_pending_cb *pending_cb, *tmp;
1301         struct list_head local_cb_list;
1302         struct hl_cs *cs;
1303         struct hl_cb *cb;
1304         u32 hw_queue_id;
1305         u32 cb_size;
1306         int process_list, rc = 0;
1307
1308         if (list_empty(&ctx->pending_cb_list))
1309                 return 0;
1310
1311         process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
1312
1313         /* Only a single thread is allowed to process the list */
1314         if (!process_list)
1315                 return 0;
1316
1317         if (list_empty(&ctx->pending_cb_list))
1318                 goto free_pending_cb_token;
1319
1320         /* move all list elements to a local list */
1321         INIT_LIST_HEAD(&local_cb_list);
1322         spin_lock(&ctx->pending_cb_lock);
1323         list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
1324                                                                 cb_node)
1325                 list_move_tail(&pending_cb->cb_node, &local_cb_list);
1326         spin_unlock(&ctx->pending_cb_lock);
1327
1328         rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
1329                                 hdev->timeout_jiffies);
1330         if (rc)
1331                 goto add_list_elements;
1332
1333         hl_debugfs_add_cs(cs);
1334
1335         /* Iterate through pending cb list, create jobs and add to CS */
1336         list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
1337                 cb = pending_cb->cb;
1338                 cb_size = pending_cb->cb_size;
1339                 hw_queue_id = pending_cb->hw_queue_id;
1340
1341                 rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
1342                                                                 hw_queue_id);
1343                 if (rc)
1344                         goto free_cs_object;
1345         }
1346
1347         rc = hl_hw_queue_schedule_cs(cs);
1348         if (rc) {
1349                 if (rc != -EAGAIN)
1350                         dev_err(hdev->dev,
1351                                 "Failed to submit CS %d.%llu (%d)\n",
1352                                 ctx->asid, cs->sequence, rc);
1353                 goto free_cs_object;
1354         }
1355
1356         /* pending cb was scheduled successfully */
1357         list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
1358                 list_del(&pending_cb->cb_node);
1359                 kfree(pending_cb);
1360         }
1361
1362         cs_put(cs);
1363
1364         goto free_pending_cb_token;
1365
1366 free_cs_object:
1367         cs_rollback(hdev, cs);
1368         cs_put(cs);
1369 add_list_elements:
1370         spin_lock(&ctx->pending_cb_lock);
1371         list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
1372                                                                 cb_node)
1373                 list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
1374         spin_unlock(&ctx->pending_cb_lock);
1375 free_pending_cb_token:
1376         atomic_set(&ctx->thread_pending_cb_token, 1);
1377
1378         return rc;
1379 }
1380
1381 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1382                                 u64 *cs_seq)
1383 {
1384         struct hl_device *hdev = hpriv->hdev;
1385         struct hl_ctx *ctx = hpriv->ctx;
1386         bool need_soft_reset = false;
1387         int rc = 0, do_ctx_switch;
1388         void __user *chunks;
1389         u32 num_chunks, tmp;
1390         int ret;
1391
1392         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1393
1394         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1395                 mutex_lock(&hpriv->restore_phase_mutex);
1396
1397                 if (do_ctx_switch) {
1398                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1399                         if (rc) {
1400                                 dev_err_ratelimited(hdev->dev,
1401                                         "Failed to switch to context %d, rejecting CS! %d\n",
1402                                         ctx->asid, rc);
1403                                 /*
1404                                  * If we timedout, or if the device is not IDLE
1405                                  * while we want to do context-switch (-EBUSY),
1406                                  * we need to soft-reset because QMAN is
1407                                  * probably stuck. However, we can't call to
1408                                  * reset here directly because of deadlock, so
1409                                  * need to do it at the very end of this
1410                                  * function
1411                                  */
1412                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1413                                         need_soft_reset = true;
1414                                 mutex_unlock(&hpriv->restore_phase_mutex);
1415                                 goto out;
1416                         }
1417                 }
1418
1419                 hdev->asic_funcs->restore_phase_topology(hdev);
1420
1421                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1422                 num_chunks = args->in.num_chunks_restore;
1423
1424                 if (!num_chunks) {
1425                         dev_dbg(hdev->dev,
1426                                 "Need to run restore phase but restore CS is empty\n");
1427                         rc = 0;
1428                 } else {
1429                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1430                                         cs_seq, 0, hdev->timeout_jiffies);
1431                 }
1432
1433                 mutex_unlock(&hpriv->restore_phase_mutex);
1434
1435                 if (rc) {
1436                         dev_err(hdev->dev,
1437                                 "Failed to submit restore CS for context %d (%d)\n",
1438                                 ctx->asid, rc);
1439                         goto out;
1440                 }
1441
1442                 /* Need to wait for restore completion before execution phase */
1443                 if (num_chunks) {
1444                         enum hl_cs_wait_status status;
1445 wait_again:
1446                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1447                                         jiffies_to_usecs(hdev->timeout_jiffies),
1448                                         *cs_seq, &status, NULL);
1449                         if (ret) {
1450                                 if (ret == -ERESTARTSYS) {
1451                                         usleep_range(100, 200);
1452                                         goto wait_again;
1453                                 }
1454
1455                                 dev_err(hdev->dev,
1456                                         "Restore CS for context %d failed to complete %d\n",
1457                                         ctx->asid, ret);
1458                                 rc = -ENOEXEC;
1459                                 goto out;
1460                         }
1461                 }
1462
1463                 ctx->thread_ctx_switch_wait_token = 1;
1464
1465         } else if (!ctx->thread_ctx_switch_wait_token) {
1466                 rc = hl_poll_timeout_memory(hdev,
1467                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1468                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1469
1470                 if (rc == -ETIMEDOUT) {
1471                         dev_err(hdev->dev,
1472                                 "context switch phase timeout (%d)\n", tmp);
1473                         goto out;
1474                 }
1475         }
1476
1477 out:
1478         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1479                 hl_device_reset(hdev, 0);
1480
1481         return rc;
1482 }
1483
1484 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1485                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
1486 {
1487         u64 *signal_seq_arr = NULL;
1488         u32 size_to_copy, signal_seq_arr_len;
1489         int rc = 0;
1490
1491         signal_seq_arr_len = chunk->num_signal_seq_arr;
1492
1493         /* currently only one signal seq is supported */
1494         if (signal_seq_arr_len != 1) {
1495                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1496                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1497                 dev_err(hdev->dev,
1498                         "Wait for signal CS supports only one signal CS seq\n");
1499                 return -EINVAL;
1500         }
1501
1502         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1503                                         sizeof(*signal_seq_arr),
1504                                         GFP_ATOMIC);
1505         if (!signal_seq_arr)
1506                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1507                                         sizeof(*signal_seq_arr),
1508                                         GFP_KERNEL);
1509         if (!signal_seq_arr) {
1510                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1511                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1512                 return -ENOMEM;
1513         }
1514
1515         size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
1516         if (copy_from_user(signal_seq_arr,
1517                                 u64_to_user_ptr(chunk->signal_seq_arr),
1518                                 size_to_copy)) {
1519                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1520                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1521                 dev_err(hdev->dev,
1522                         "Failed to copy signal seq array from user\n");
1523                 rc = -EFAULT;
1524                 goto out;
1525         }
1526
1527         /* currently it is guaranteed to have only one signal seq */
1528         *signal_seq = signal_seq_arr[0];
1529
1530 out:
1531         kfree(signal_seq_arr);
1532
1533         return rc;
1534 }
1535
1536 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1537                 struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
1538                 u32 q_idx)
1539 {
1540         struct hl_cs_counters_atomic *cntr;
1541         struct hl_cs_job *job;
1542         struct hl_cb *cb;
1543         u32 cb_size;
1544
1545         cntr = &hdev->aggregated_cs_counters;
1546
1547         job = hl_cs_allocate_job(hdev, q_type, true);
1548         if (!job) {
1549                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1550                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1551                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1552                 return -ENOMEM;
1553         }
1554
1555         if (cs->type == CS_TYPE_WAIT)
1556                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1557         else
1558                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1559
1560         cb = hl_cb_kernel_create(hdev, cb_size,
1561                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1562         if (!cb) {
1563                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1564                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1565                 kfree(job);
1566                 return -EFAULT;
1567         }
1568
1569         job->id = 0;
1570         job->cs = cs;
1571         job->user_cb = cb;
1572         atomic_inc(&job->user_cb->cs_cnt);
1573         job->user_cb_size = cb_size;
1574         job->hw_queue_id = q_idx;
1575
1576         /*
1577          * No need in parsing, user CB is the patched CB.
1578          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1579          * the CB idr anymore and to decrement its refcount as it was
1580          * incremented inside hl_cb_kernel_create().
1581          */
1582         job->patched_cb = job->user_cb;
1583         job->job_cb_size = job->user_cb_size;
1584         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1585
1586         /* increment refcount as for external queues we get completion */
1587         cs_get(cs);
1588
1589         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1590
1591         list_add_tail(&job->cs_node, &cs->job_list);
1592
1593         hl_debugfs_add_job(hdev, job);
1594
1595         return 0;
1596 }
1597
1598 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1599                                 void __user *chunks, u32 num_chunks,
1600                                 u64 *cs_seq, u32 flags, u32 timeout)
1601 {
1602         struct hl_cs_chunk *cs_chunk_array, *chunk;
1603         struct hw_queue_properties *hw_queue_prop;
1604         struct hl_device *hdev = hpriv->hdev;
1605         struct hl_cs_compl *sig_waitcs_cmpl;
1606         u32 q_idx, collective_engine_id = 0;
1607         struct hl_cs_counters_atomic *cntr;
1608         struct hl_fence *sig_fence = NULL;
1609         struct hl_ctx *ctx = hpriv->ctx;
1610         enum hl_queue_type q_type;
1611         struct hl_cs *cs;
1612         u64 signal_seq;
1613         int rc;
1614
1615         cntr = &hdev->aggregated_cs_counters;
1616         *cs_seq = ULLONG_MAX;
1617
1618         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1619                         ctx);
1620         if (rc)
1621                 goto out;
1622
1623         /* currently it is guaranteed to have only one chunk */
1624         chunk = &cs_chunk_array[0];
1625
1626         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1627                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1628                 atomic64_inc(&cntr->validation_drop_cnt);
1629                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1630                         chunk->queue_index);
1631                 rc = -EINVAL;
1632                 goto free_cs_chunk_array;
1633         }
1634
1635         q_idx = chunk->queue_index;
1636         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1637         q_type = hw_queue_prop->type;
1638
1639         if (!hw_queue_prop->supports_sync_stream) {
1640                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1641                 atomic64_inc(&cntr->validation_drop_cnt);
1642                 dev_err(hdev->dev,
1643                         "Queue index %d does not support sync stream operations\n",
1644                         q_idx);
1645                 rc = -EINVAL;
1646                 goto free_cs_chunk_array;
1647         }
1648
1649         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1650                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1651                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1652                         atomic64_inc(&cntr->validation_drop_cnt);
1653                         dev_err(hdev->dev,
1654                                 "Queue index %d is invalid\n", q_idx);
1655                         rc = -EINVAL;
1656                         goto free_cs_chunk_array;
1657                 }
1658
1659                 collective_engine_id = chunk->collective_engine_id;
1660         }
1661
1662         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1663                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
1664                 if (rc)
1665                         goto free_cs_chunk_array;
1666
1667                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
1668                 if (IS_ERR(sig_fence)) {
1669                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1670                         atomic64_inc(&cntr->validation_drop_cnt);
1671                         dev_err(hdev->dev,
1672                                 "Failed to get signal CS with seq 0x%llx\n",
1673                                 signal_seq);
1674                         rc = PTR_ERR(sig_fence);
1675                         goto free_cs_chunk_array;
1676                 }
1677
1678                 if (!sig_fence) {
1679                         /* signal CS already finished */
1680                         rc = 0;
1681                         goto free_cs_chunk_array;
1682                 }
1683
1684                 sig_waitcs_cmpl =
1685                         container_of(sig_fence, struct hl_cs_compl, base_fence);
1686
1687                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
1688                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1689                         atomic64_inc(&cntr->validation_drop_cnt);
1690                         dev_err(hdev->dev,
1691                                 "CS seq 0x%llx is not of a signal CS\n",
1692                                 signal_seq);
1693                         hl_fence_put(sig_fence);
1694                         rc = -EINVAL;
1695                         goto free_cs_chunk_array;
1696                 }
1697
1698                 if (completion_done(&sig_fence->completion)) {
1699                         /* signal CS already finished */
1700                         hl_fence_put(sig_fence);
1701                         rc = 0;
1702                         goto free_cs_chunk_array;
1703                 }
1704         }
1705
1706         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
1707         if (rc) {
1708                 if (cs_type == CS_TYPE_WAIT ||
1709                         cs_type == CS_TYPE_COLLECTIVE_WAIT)
1710                         hl_fence_put(sig_fence);
1711                 goto free_cs_chunk_array;
1712         }
1713
1714         /*
1715          * Save the signal CS fence for later initialization right before
1716          * hanging the wait CS on the queue.
1717          */
1718         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
1719                 cs->signal_fence = sig_fence;
1720
1721         hl_debugfs_add_cs(cs);
1722
1723         *cs_seq = cs->sequence;
1724
1725         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
1726                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
1727                                 q_idx);
1728         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
1729                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
1730                                 cs, q_idx, collective_engine_id);
1731         else {
1732                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1733                 atomic64_inc(&cntr->validation_drop_cnt);
1734                 rc = -EINVAL;
1735         }
1736
1737         if (rc)
1738                 goto free_cs_object;
1739
1740         rc = hl_hw_queue_schedule_cs(cs);
1741         if (rc) {
1742                 if (rc != -EAGAIN)
1743                         dev_err(hdev->dev,
1744                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1745                                 ctx->asid, cs->sequence, rc);
1746                 goto free_cs_object;
1747         }
1748
1749         rc = HL_CS_STATUS_SUCCESS;
1750         goto put_cs;
1751
1752 free_cs_object:
1753         cs_rollback(hdev, cs);
1754         *cs_seq = ULLONG_MAX;
1755         /* The path below is both for good and erroneous exits */
1756 put_cs:
1757         /* We finished with the CS in this function, so put the ref */
1758         cs_put(cs);
1759 free_cs_chunk_array:
1760         kfree(cs_chunk_array);
1761 out:
1762         return rc;
1763 }
1764
1765 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
1766 {
1767         union hl_cs_args *args = data;
1768         enum hl_cs_type cs_type;
1769         u64 cs_seq = ULONG_MAX;
1770         void __user *chunks;
1771         u32 num_chunks, flags, timeout;
1772         int rc;
1773
1774         rc = hl_cs_sanity_checks(hpriv, args);
1775         if (rc)
1776                 goto out;
1777
1778         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
1779         if (rc)
1780                 goto out;
1781
1782         rc = hl_submit_pending_cb(hpriv);
1783         if (rc)
1784                 goto out;
1785
1786         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
1787                                         ~HL_CS_FLAGS_FORCE_RESTORE);
1788         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
1789         num_chunks = args->in.num_chunks_execute;
1790         flags = args->in.cs_flags;
1791
1792         /* In case this is a staged CS, user should supply the CS sequence */
1793         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1794                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1795                 cs_seq = args->in.seq;
1796
1797         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
1798                         ? msecs_to_jiffies(args->in.timeout * 1000)
1799                         : hpriv->hdev->timeout_jiffies;
1800
1801         switch (cs_type) {
1802         case CS_TYPE_SIGNAL:
1803         case CS_TYPE_WAIT:
1804         case CS_TYPE_COLLECTIVE_WAIT:
1805                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
1806                                         &cs_seq, args->in.cs_flags, timeout);
1807                 break;
1808         default:
1809                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
1810                                                 args->in.cs_flags, timeout);
1811                 break;
1812         }
1813
1814 out:
1815         if (rc != -EAGAIN) {
1816                 memset(args, 0, sizeof(*args));
1817                 args->out.status = rc;
1818                 args->out.seq = cs_seq;
1819         }
1820
1821         return rc;
1822 }
1823
1824 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1825                                 u64 timeout_us, u64 seq,
1826                                 enum hl_cs_wait_status *status, s64 *timestamp)
1827 {
1828         struct hl_fence *fence;
1829         unsigned long timeout;
1830         int rc = 0;
1831         long completion_rc;
1832
1833         if (timestamp)
1834                 *timestamp = 0;
1835
1836         if (timeout_us == MAX_SCHEDULE_TIMEOUT)
1837                 timeout = timeout_us;
1838         else
1839                 timeout = usecs_to_jiffies(timeout_us);
1840
1841         hl_ctx_get(hdev, ctx);
1842
1843         fence = hl_ctx_get_fence(ctx, seq);
1844         if (IS_ERR(fence)) {
1845                 rc = PTR_ERR(fence);
1846                 if (rc == -EINVAL)
1847                         dev_notice_ratelimited(hdev->dev,
1848                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
1849                                 seq, ctx->cs_sequence);
1850         } else if (fence) {
1851                 if (!timeout_us)
1852                         completion_rc = completion_done(&fence->completion);
1853                 else
1854                         completion_rc =
1855                                 wait_for_completion_interruptible_timeout(
1856                                         &fence->completion, timeout);
1857
1858                 if (completion_rc > 0) {
1859                         *status = CS_WAIT_STATUS_COMPLETED;
1860                         if (timestamp)
1861                                 *timestamp = ktime_to_ns(fence->timestamp);
1862                 } else {
1863                         *status = CS_WAIT_STATUS_BUSY;
1864                 }
1865
1866                 if (fence->error == -ETIMEDOUT)
1867                         rc = -ETIMEDOUT;
1868                 else if (fence->error == -EIO)
1869                         rc = -EIO;
1870
1871                 hl_fence_put(fence);
1872         } else {
1873                 dev_dbg(hdev->dev,
1874                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
1875                         seq, ctx->cs_sequence);
1876                 *status = CS_WAIT_STATUS_GONE;
1877         }
1878
1879         hl_ctx_put(ctx);
1880
1881         return rc;
1882 }
1883
1884 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
1885 {
1886         struct hl_device *hdev = hpriv->hdev;
1887         union hl_wait_cs_args *args = data;
1888         enum hl_cs_wait_status status;
1889         u64 seq = args->in.seq;
1890         s64 timestamp;
1891         int rc;
1892
1893         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
1894                                 &status, &timestamp);
1895
1896         memset(args, 0, sizeof(*args));
1897
1898         if (rc) {
1899                 if (rc == -ERESTARTSYS) {
1900                         dev_err_ratelimited(hdev->dev,
1901                                 "user process got signal while waiting for CS handle %llu\n",
1902                                 seq);
1903                         args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
1904                         rc = -EINTR;
1905                 } else if (rc == -ETIMEDOUT) {
1906                         dev_err_ratelimited(hdev->dev,
1907                                 "CS %llu has timed-out while user process is waiting for it\n",
1908                                 seq);
1909                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
1910                 } else if (rc == -EIO) {
1911                         dev_err_ratelimited(hdev->dev,
1912                                 "CS %llu has been aborted while user process is waiting for it\n",
1913                                 seq);
1914                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
1915                 }
1916                 return rc;
1917         }
1918
1919         if (timestamp) {
1920                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
1921                 args->out.timestamp_nsec = timestamp;
1922         }
1923
1924         switch (status) {
1925         case CS_WAIT_STATUS_GONE:
1926                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
1927                 fallthrough;
1928         case CS_WAIT_STATUS_COMPLETED:
1929                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
1930                 break;
1931         case CS_WAIT_STATUS_BUSY:
1932         default:
1933                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
1934                 break;
1935         }
1936
1937         return 0;
1938 }
1939
1940 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1941                                 u32 timeout_us, u64 user_address,
1942                                 u32 target_value, u16 interrupt_offset,
1943                                 enum hl_cs_wait_status *status)
1944 {
1945         struct hl_user_pending_interrupt *pend;
1946         struct hl_user_interrupt *interrupt;
1947         unsigned long timeout;
1948         long completion_rc;
1949         u32 completion_value;
1950         int rc = 0;
1951
1952         if (timeout_us == U32_MAX)
1953                 timeout = timeout_us;
1954         else
1955                 timeout = usecs_to_jiffies(timeout_us);
1956
1957         hl_ctx_get(hdev, ctx);
1958
1959         pend = kmalloc(sizeof(*pend), GFP_KERNEL);
1960         if (!pend) {
1961                 hl_ctx_put(ctx);
1962                 return -ENOMEM;
1963         }
1964
1965         hl_fence_init(&pend->fence, ULONG_MAX);
1966
1967         if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
1968                 interrupt = &hdev->common_user_interrupt;
1969         else
1970                 interrupt = &hdev->user_interrupt[interrupt_offset];
1971
1972         spin_lock(&interrupt->wait_list_lock);
1973         if (!hl_device_operational(hdev, NULL)) {
1974                 rc = -EPERM;
1975                 goto unlock_and_free_fence;
1976         }
1977
1978         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
1979                 dev_err(hdev->dev,
1980                         "Failed to copy completion value from user\n");
1981                 rc = -EFAULT;
1982                 goto unlock_and_free_fence;
1983         }
1984
1985         if (completion_value >= target_value)
1986                 *status = CS_WAIT_STATUS_COMPLETED;
1987         else
1988                 *status = CS_WAIT_STATUS_BUSY;
1989
1990         if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
1991                 goto unlock_and_free_fence;
1992
1993         /* Add pending user interrupt to relevant list for the interrupt
1994          * handler to monitor
1995          */
1996         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
1997         spin_unlock(&interrupt->wait_list_lock);
1998
1999 wait_again:
2000         /* Wait for interrupt handler to signal completion */
2001         completion_rc =
2002                 wait_for_completion_interruptible_timeout(
2003                                 &pend->fence.completion, timeout);
2004
2005         /* If timeout did not expire we need to perform the comparison.
2006          * If comparison fails, keep waiting until timeout expires
2007          */
2008         if (completion_rc > 0) {
2009                 if (copy_from_user(&completion_value,
2010                                 u64_to_user_ptr(user_address), 4)) {
2011                         dev_err(hdev->dev,
2012                                 "Failed to copy completion value from user\n");
2013                         rc = -EFAULT;
2014                         goto remove_pending_user_interrupt;
2015                 }
2016
2017                 if (completion_value >= target_value) {
2018                         *status = CS_WAIT_STATUS_COMPLETED;
2019                 } else {
2020                         timeout = completion_rc;
2021                         goto wait_again;
2022                 }
2023         } else {
2024                 *status = CS_WAIT_STATUS_BUSY;
2025         }
2026
2027 remove_pending_user_interrupt:
2028         spin_lock(&interrupt->wait_list_lock);
2029         list_del(&pend->wait_list_node);
2030
2031 unlock_and_free_fence:
2032         spin_unlock(&interrupt->wait_list_lock);
2033         kfree(pend);
2034         hl_ctx_put(ctx);
2035
2036         return rc;
2037 }
2038
2039 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2040 {
2041         u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
2042         struct hl_device *hdev = hpriv->hdev;
2043         struct asic_fixed_properties *prop;
2044         union hl_wait_cs_args *args = data;
2045         enum hl_cs_wait_status status;
2046         int rc;
2047
2048         prop = &hdev->asic_prop;
2049
2050         if (!prop->user_interrupt_count) {
2051                 dev_err(hdev->dev, "no user interrupts allowed");
2052                 return -EPERM;
2053         }
2054
2055         interrupt_id =
2056                 FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
2057
2058         first_interrupt = prop->first_available_user_msix_interrupt;
2059         last_interrupt = prop->first_available_user_msix_interrupt +
2060                                                 prop->user_interrupt_count - 1;
2061
2062         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
2063                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
2064                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
2065                 return -EINVAL;
2066         }
2067
2068         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
2069                 interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
2070         else
2071                 interrupt_offset = interrupt_id - first_interrupt;
2072
2073         rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
2074                                 args->in.interrupt_timeout_us, args->in.addr,
2075                                 args->in.target, interrupt_offset, &status);
2076
2077         memset(args, 0, sizeof(*args));
2078
2079         if (rc) {
2080                 dev_err_ratelimited(hdev->dev,
2081                         "interrupt_wait_ioctl failed (%d)\n", rc);
2082
2083                 return rc;
2084         }
2085
2086         switch (status) {
2087         case CS_WAIT_STATUS_COMPLETED:
2088                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2089                 break;
2090         case CS_WAIT_STATUS_BUSY:
2091         default:
2092                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2093                 break;
2094         }
2095
2096         return 0;
2097 }
2098
2099 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2100 {
2101         union hl_wait_cs_args *args = data;
2102         u32 flags = args->in.flags;
2103         int rc;
2104
2105         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
2106                 rc = hl_interrupt_wait_ioctl(hpriv, data);
2107         else
2108                 rc = hl_cs_wait_ioctl(hpriv, data);
2109
2110         return rc;
2111 }