drivers/misc/habanalabs/common/command_submission.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include <uapi/misc/habanalabs.h>
   9 #include "habanalabs.h"
  10
  11 #include <linux/uaccess.h>
  12 #include <linux/slab.h>
  13
  14 #define HL_CS_FLAGS_TYPE_MASK   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
  15                                 HL_CS_FLAGS_COLLECTIVE_WAIT)
  16
  17 /**
  18  * enum hl_cs_wait_status - cs wait status
  19  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
  20  * @CS_WAIT_STATUS_COMPLETED: cs completed
  21  * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
  22  */
  23 enum hl_cs_wait_status {
  24         CS_WAIT_STATUS_BUSY,
  25         CS_WAIT_STATUS_COMPLETED,
  26         CS_WAIT_STATUS_GONE
  27 };
  28
  29 static void job_wq_completion(struct work_struct *work);
  30 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
  31                                 u64 timeout_us, u64 seq,
  32                                 enum hl_cs_wait_status *status, s64 *timestamp);
  33 static void cs_do_release(struct kref *ref);
  34
  35 static void hl_sob_reset(struct kref *ref)
  36 {
  37         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  38                                                         kref);
  39         struct hl_device *hdev = hw_sob->hdev;
  40
  41         hdev->asic_funcs->reset_sob(hdev, hw_sob);
  42 }
  43
  44 void hl_sob_reset_error(struct kref *ref)
  45 {
  46         struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
  47                                                         kref);
  48         struct hl_device *hdev = hw_sob->hdev;
  49
  50         dev_crit(hdev->dev,
  51                 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
  52                 hw_sob->q_idx, hw_sob->sob_id);
  53 }
  54
  55 /**
  56  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
  57  * @sob_base: sob base id
  58  * @sob_mask: sob user mask, each bit represents a sob offset from sob base
  59  * @mask: generated mask
  60  *
  61  * Return: 0 if given parameters are valid
  62  */
  63 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
  64 {
  65         int i;
  66
  67         if (sob_mask == 0)
  68                 return -EINVAL;
  69
  70         if (sob_mask == 0x1) {
  71                 *mask = ~(1 << (sob_base & 0x7));
  72         } else {
  73                 /* find msb in order to verify sob range is valid */
  74                 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
  75                         if (BIT(i) & sob_mask)
  76                                 break;
  77
  78                 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
  79                         return -EINVAL;
  80
  81                 *mask = ~sob_mask;
  82         }
  83
  84         return 0;
  85 }
  86
  87 static void sob_reset_work(struct work_struct *work)
  88 {
  89         struct hl_cs_compl *hl_cs_cmpl =
  90                 container_of(work, struct hl_cs_compl, sob_reset_work);
  91         struct hl_device *hdev = hl_cs_cmpl->hdev;
  92
  93         /*
  94          * A signal CS can get completion while the corresponding wait
  95          * for signal CS is on its way to the PQ. The wait for signal CS
  96          * will get stuck if the signal CS incremented the SOB to its
  97          * max value and there are no pending (submitted) waits on this
  98          * SOB.
  99          * We do the following to void this situation:
 100          * 1. The wait for signal CS must get a ref for the signal CS as
 101          *    soon as possible in cs_ioctl_signal_wait() and put it
 102          *    before being submitted to the PQ but after it incremented
 103          *    the SOB refcnt in init_signal_wait_cs().
 104          * 2. Signal/Wait for signal CS will decrement the SOB refcnt
 105          *    here.
 106          * These two measures guarantee that the wait for signal CS will
 107          * reset the SOB upon completion rather than the signal CS and
 108          * hence the above scenario is avoided.
 109          */
 110         kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
 111
 112         if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
 113                 hdev->asic_funcs->reset_sob_group(hdev,
 114                                 hl_cs_cmpl->sob_group);
 115
 116         kfree(hl_cs_cmpl);
 117 }
 118
 119 static void hl_fence_release(struct kref *kref)
 120 {
 121         struct hl_fence *fence =
 122                 container_of(kref, struct hl_fence, refcount);
 123         struct hl_cs_compl *hl_cs_cmpl =
 124                 container_of(fence, struct hl_cs_compl, base_fence);
 125         struct hl_device *hdev = hl_cs_cmpl->hdev;
 126
 127         /* EBUSY means the CS was never submitted and hence we don't have
 128          * an attached hw_sob object that we should handle here
 129          */
 130         if (fence->error == -EBUSY)
 131                 goto free;
 132
 133         if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 134                 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
 135                 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
 136
 137                 dev_dbg(hdev->dev,
 138                         "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
 139                         hl_cs_cmpl->cs_seq,
 140                         hl_cs_cmpl->type,
 141                         hl_cs_cmpl->hw_sob->sob_id,
 142                         hl_cs_cmpl->sob_val);
 143
 144                 queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
 145
 146                 return;
 147         }
 148
 149 free:
 150         kfree(hl_cs_cmpl);
 151 }
 152
 153 void hl_fence_put(struct hl_fence *fence)
 154 {
 155         if (fence)
 156                 kref_put(&fence->refcount, hl_fence_release);
 157 }
 158
 159 void hl_fence_get(struct hl_fence *fence)
 160 {
 161         if (fence)
 162                 kref_get(&fence->refcount);
 163 }
 164
 165 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 166 {
 167         kref_init(&fence->refcount);
 168         fence->cs_sequence = sequence;
 169         fence->error = 0;
 170         fence->timestamp = ktime_set(0, 0);
 171         init_completion(&fence->completion);
 172 }
 173
 174 void cs_get(struct hl_cs *cs)
 175 {
 176         kref_get(&cs->refcount);
 177 }
 178
 179 static int cs_get_unless_zero(struct hl_cs *cs)
 180 {
 181         return kref_get_unless_zero(&cs->refcount);
 182 }
 183
 184 static void cs_put(struct hl_cs *cs)
 185 {
 186         kref_put(&cs->refcount, cs_do_release);
 187 }
 188
 189 static void cs_job_do_release(struct kref *ref)
 190 {
 191         struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
 192
 193         kfree(job);
 194 }
 195
 196 static void cs_job_put(struct hl_cs_job *job)
 197 {
 198         kref_put(&job->refcount, cs_job_do_release);
 199 }
 200
 201 bool cs_needs_completion(struct hl_cs *cs)
 202 {
 203         /* In case this is a staged CS, only the last CS in sequence should
 204          * get a completion, any non staged CS will always get a completion
 205          */
 206         if (cs->staged_cs && !cs->staged_last)
 207                 return false;
 208
 209         return true;
 210 }
 211
 212 bool cs_needs_timeout(struct hl_cs *cs)
 213 {
 214         /* In case this is a staged CS, only the first CS in sequence should
 215          * get a timeout, any non staged CS will always get a timeout
 216          */
 217         if (cs->staged_cs && !cs->staged_first)
 218                 return false;
 219
 220         return true;
 221 }
 222
 223 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 224 {
 225         /*
 226          * Patched CB is created for external queues jobs, and for H/W queues
 227          * jobs if the user CB was allocated by driver and MMU is disabled.
 228          */
 229         return (job->queue_type == QUEUE_TYPE_EXT ||
 230                         (job->queue_type == QUEUE_TYPE_HW &&
 231                                         job->is_kernel_allocated_cb &&
 232                                         !hdev->mmu_enable));
 233 }
 234
 235 /*
 236  * cs_parser - parse the user command submission
 237  *
 238  * @hpriv       : pointer to the private data of the fd
 239  * @job        : pointer to the job that holds the command submission info
 240  *
 241  * The function parses the command submission of the user. It calls the
 242  * ASIC specific parser, which returns a list of memory blocks to send
 243  * to the device as different command buffers
 244  *
 245  */
 246 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 247 {
 248         struct hl_device *hdev = hpriv->hdev;
 249         struct hl_cs_parser parser;
 250         int rc;
 251
 252         parser.ctx_id = job->cs->ctx->asid;
 253         parser.cs_sequence = job->cs->sequence;
 254         parser.job_id = job->id;
 255
 256         parser.hw_queue_id = job->hw_queue_id;
 257         parser.job_userptr_list = &job->userptr_list;
 258         parser.patched_cb = NULL;
 259         parser.user_cb = job->user_cb;
 260         parser.user_cb_size = job->user_cb_size;
 261         parser.queue_type = job->queue_type;
 262         parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 263         job->patched_cb = NULL;
 264         parser.completion = cs_needs_completion(job->cs);
 265
 266         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 267
 268         if (is_cb_patched(hdev, job)) {
 269                 if (!rc) {
 270                         job->patched_cb = parser.patched_cb;
 271                         job->job_cb_size = parser.patched_cb_size;
 272                         job->contains_dma_pkt = parser.contains_dma_pkt;
 273                         atomic_inc(&job->patched_cb->cs_cnt);
 274                 }
 275
 276                 /*
 277                  * Whether the parsing worked or not, we don't need the
 278                  * original CB anymore because it was already parsed and
 279                  * won't be accessed again for this CS
 280                  */
 281                 atomic_dec(&job->user_cb->cs_cnt);
 282                 hl_cb_put(job->user_cb);
 283                 job->user_cb = NULL;
 284         } else if (!rc) {
 285                 job->job_cb_size = job->user_cb_size;
 286         }
 287
 288         return rc;
 289 }
 290
 291 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 292 {
 293         struct hl_cs *cs = job->cs;
 294
 295         if (is_cb_patched(hdev, job)) {
 296                 hl_userptr_delete_list(hdev, &job->userptr_list);
 297
 298                 /*
 299                  * We might arrive here from rollback and patched CB wasn't
 300                  * created, so we need to check it's not NULL
 301                  */
 302                 if (job->patched_cb) {
 303                         atomic_dec(&job->patched_cb->cs_cnt);
 304                         hl_cb_put(job->patched_cb);
 305                 }
 306         }
 307
 308         /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
 309          * enabled, the user CB isn't released in cs_parser() and thus should be
 310          * released here.
 311          * This is also true for INT queues jobs which were allocated by driver
 312          */
 313         if (job->is_kernel_allocated_cb &&
 314                 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
 315                                 job->queue_type == QUEUE_TYPE_INT)) {
 316                 atomic_dec(&job->user_cb->cs_cnt);
 317                 hl_cb_put(job->user_cb);
 318         }
 319
 320         /*
 321          * This is the only place where there can be multiple threads
 322          * modifying the list at the same time
 323          */
 324         spin_lock(&cs->job_lock);
 325         list_del(&job->cs_node);
 326         spin_unlock(&cs->job_lock);
 327
 328         hl_debugfs_remove_job(hdev, job);
 329
 330         /* We decrement reference only for a CS that gets completion
 331          * because the reference was incremented only for this kind of CS
 332          * right before it was scheduled.
 333          *
 334          * In staged submission, only the last CS marked as 'staged_last'
 335          * gets completion, hence its release function will be called from here.
 336          * As for all the rest CS's in the staged submission which do not get
 337          * completion, their CS reference will be decremented by the
 338          * 'staged_last' CS during the CS release flow.
 339          * All relevant PQ CI counters will be incremented during the CS release
 340          * flow by calling 'hl_hw_queue_update_ci'.
 341          */
 342         if (cs_needs_completion(cs) &&
 343                 (job->queue_type == QUEUE_TYPE_EXT ||
 344                         job->queue_type == QUEUE_TYPE_HW))
 345                 cs_put(cs);
 346
 347         cs_job_put(job);
 348 }
 349
 350 /*
 351  * hl_staged_cs_find_first - locate the first CS in this staged submission
 352  *
 353  * @hdev: pointer to device structure
 354  * @cs_seq: staged submission sequence number
 355  *
 356  * @note: This function must be called under 'hdev->cs_mirror_lock'
 357  *
 358  * Find and return a CS pointer with the given sequence
 359  */
 360 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
 361 {
 362         struct hl_cs *cs;
 363
 364         list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
 365                 if (cs->staged_cs && cs->staged_first &&
 366                                 cs->sequence == cs_seq)
 367                         return cs;
 368
 369         return NULL;
 370 }
 371
 372 /*
 373  * is_staged_cs_last_exists - returns true if the last CS in sequence exists
 374  *
 375  * @hdev: pointer to device structure
 376  * @cs: staged submission member
 377  *
 378  */
 379 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
 380 {
 381         struct hl_cs *last_entry;
 382
 383         last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
 384                                                                 staged_cs_node);
 385
 386         if (last_entry->staged_last)
 387                 return true;
 388
 389         return false;
 390 }
 391
 392 /*
 393  * staged_cs_get - get CS reference if this CS is a part of a staged CS
 394  *
 395  * @hdev: pointer to device structure
 396  * @cs: current CS
 397  * @cs_seq: staged submission sequence number
 398  *
 399  * Increment CS reference for every CS in this staged submission except for
 400  * the CS which get completion.
 401  */
 402 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
 403 {
 404         /* Only the last CS in this staged submission will get a completion.
 405          * We must increment the reference for all other CS's in this
 406          * staged submission.
 407          * Once we get a completion we will release the whole staged submission.
 408          */
 409         if (!cs->staged_last)
 410                 cs_get(cs);
 411 }
 412
 413 /*
 414  * staged_cs_put - put a CS in case it is part of staged submission
 415  *
 416  * @hdev: pointer to device structure
 417  * @cs: CS to put
 418  *
 419  * This function decrements a CS reference (for a non completion CS)
 420  */
 421 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
 422 {
 423         /* We release all CS's in a staged submission except the last
 424          * CS which we have never incremented its reference.
 425          */
 426         if (!cs_needs_completion(cs))
 427                 cs_put(cs);
 428 }
 429
 430 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 431 {
 432         bool next_entry_found = false;
 433         struct hl_cs *next;
 434
 435         if (!cs_needs_timeout(cs))
 436                 return;
 437
 438         spin_lock(&hdev->cs_mirror_lock);
 439
 440         /* We need to handle tdr only once for the complete staged submission.
 441          * Hence, we choose the CS that reaches this function first which is
 442          * the CS marked as 'staged_last'.
 443          */
 444         if (cs->staged_cs && cs->staged_last)
 445                 cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
 446
 447         spin_unlock(&hdev->cs_mirror_lock);
 448
 449         /* Don't cancel TDR in case this CS was timedout because we might be
 450          * running from the TDR context
 451          */
 452         if (cs && (cs->timedout ||
 453                         hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
 454                 return;
 455
 456         if (cs && cs->tdr_active)
 457                 cancel_delayed_work_sync(&cs->work_tdr);
 458
 459         spin_lock(&hdev->cs_mirror_lock);
 460
 461         /* queue TDR for next CS */
 462         list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
 463                 if (cs_needs_timeout(next)) {
 464                         next_entry_found = true;
 465                         break;
 466                 }
 467
 468         if (next_entry_found && !next->tdr_active) {
 469                 next->tdr_active = true;
 470                 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
 471         }
 472
 473         spin_unlock(&hdev->cs_mirror_lock);
 474 }
 475
 476 static void cs_do_release(struct kref *ref)
 477 {
 478         struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
 479         struct hl_device *hdev = cs->ctx->hdev;
 480         struct hl_cs_job *job, *tmp;
 481
 482         cs->completed = true;
 483
 484         /*
 485          * Although if we reached here it means that all external jobs have
 486          * finished, because each one of them took refcnt to CS, we still
 487          * need to go over the internal jobs and complete them. Otherwise, we
 488          * will have leaked memory and what's worse, the CS object (and
 489          * potentially the CTX object) could be released, while the JOB
 490          * still holds a pointer to them (but no reference).
 491          */
 492         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 493                 complete_job(hdev, job);
 494
 495         if (!cs->submitted) {
 496                 /* In case the wait for signal CS was submitted, the put occurs
 497                  * in init_signal_wait_cs() or collective_wait_init_cs()
 498                  * right before hanging on the PQ.
 499                  */
 500                 if (cs->type == CS_TYPE_WAIT ||
 501                                 cs->type == CS_TYPE_COLLECTIVE_WAIT)
 502                         hl_fence_put(cs->signal_fence);
 503
 504                 goto out;
 505         }
 506
 507         /* Need to update CI for all queue jobs that does not get completion */
 508         hl_hw_queue_update_ci(cs);
 509
 510         /* remove CS from CS mirror list */
 511         spin_lock(&hdev->cs_mirror_lock);
 512         list_del_init(&cs->mirror_node);
 513         spin_unlock(&hdev->cs_mirror_lock);
 514
 515         cs_handle_tdr(hdev, cs);
 516
 517         if (cs->staged_cs) {
 518                 /* the completion CS decrements reference for the entire
 519                  * staged submission
 520                  */
 521                 if (cs->staged_last) {
 522                         struct hl_cs *staged_cs, *tmp;
 523
 524                         list_for_each_entry_safe(staged_cs, tmp,
 525                                         &cs->staged_cs_node, staged_cs_node)
 526                                 staged_cs_put(hdev, staged_cs);
 527                 }
 528
 529                 /* A staged CS will be a member in the list only after it
 530                  * was submitted. We used 'cs_mirror_lock' when inserting
 531                  * it to list so we will use it again when removing it
 532                  */
 533                 if (cs->submitted) {
 534                         spin_lock(&hdev->cs_mirror_lock);
 535                         list_del(&cs->staged_cs_node);
 536                         spin_unlock(&hdev->cs_mirror_lock);
 537                 }
 538         }
 539
 540 out:
 541         /* Must be called before hl_ctx_put because inside we use ctx to get
 542          * the device
 543          */
 544         hl_debugfs_remove_cs(cs);
 545
 546         hl_ctx_put(cs->ctx);
 547
 548         /* We need to mark an error for not submitted because in that case
 549          * the hl fence release flow is different. Mainly, we don't need
 550          * to handle hw_sob for signal/wait
 551          */
 552         if (cs->timedout)
 553                 cs->fence->error = -ETIMEDOUT;
 554         else if (cs->aborted)
 555                 cs->fence->error = -EIO;
 556         else if (!cs->submitted)
 557                 cs->fence->error = -EBUSY;
 558
 559         if (unlikely(cs->skip_reset_on_timeout)) {
 560                 dev_err(hdev->dev,
 561                         "Command submission %llu completed after %llu (s)\n",
 562                         cs->sequence,
 563                         div_u64(jiffies - cs->submission_time_jiffies, HZ));
 564         }
 565
 566         if (cs->timestamp)
 567                 cs->fence->timestamp = ktime_get();
 568         complete_all(&cs->fence->completion);
 569         hl_fence_put(cs->fence);
 570
 571         kfree(cs->jobs_in_queue_cnt);
 572         kfree(cs);
 573 }
 574
 575 static void cs_timedout(struct work_struct *work)
 576 {
 577         struct hl_device *hdev;
 578         int rc;
 579         struct hl_cs *cs = container_of(work, struct hl_cs,
 580                                                  work_tdr.work);
 581         bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
 582
 583         rc = cs_get_unless_zero(cs);
 584         if (!rc)
 585                 return;
 586
 587         if ((!cs->submitted) || (cs->completed)) {
 588                 cs_put(cs);
 589                 return;
 590         }
 591
 592         /* Mark the CS is timed out so we won't try to cancel its TDR */
 593         if (likely(!skip_reset_on_timeout))
 594                 cs->timedout = true;
 595
 596         hdev = cs->ctx->hdev;
 597
 598         switch (cs->type) {
 599         case CS_TYPE_SIGNAL:
 600                 dev_err(hdev->dev,
 601                         "Signal command submission %llu has not finished in time!\n",
 602                         cs->sequence);
 603                 break;
 604
 605         case CS_TYPE_WAIT:
 606                 dev_err(hdev->dev,
 607                         "Wait command submission %llu has not finished in time!\n",
 608                         cs->sequence);
 609                 break;
 610
 611         case CS_TYPE_COLLECTIVE_WAIT:
 612                 dev_err(hdev->dev,
 613                         "Collective Wait command submission %llu has not finished in time!\n",
 614                         cs->sequence);
 615                 break;
 616
 617         default:
 618                 dev_err(hdev->dev,
 619                         "Command submission %llu has not finished in time!\n",
 620                         cs->sequence);
 621                 break;
 622         }
 623
 624         cs_put(cs);
 625
 626         if (likely(!skip_reset_on_timeout)) {
 627                 if (hdev->reset_on_lockup)
 628                         hl_device_reset(hdev, HL_RESET_TDR);
 629                 else
 630                         hdev->needs_reset = true;
 631         }
 632 }
 633
 634 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 635                         enum hl_cs_type cs_type, u64 user_sequence,
 636                         struct hl_cs **cs_new, u32 flags, u32 timeout)
 637 {
 638         struct hl_cs_counters_atomic *cntr;
 639         struct hl_fence *other = NULL;
 640         struct hl_cs_compl *cs_cmpl;
 641         struct hl_cs *cs;
 642         int rc;
 643
 644         cntr = &hdev->aggregated_cs_counters;
 645
 646         cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 647         if (!cs)
 648                 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 649
 650         if (!cs) {
 651                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 652                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 653                 return -ENOMEM;
 654         }
 655
 656         /* increment refcnt for context */
 657         hl_ctx_get(hdev, ctx);
 658
 659         cs->ctx = ctx;
 660         cs->submitted = false;
 661         cs->completed = false;
 662         cs->type = cs_type;
 663         cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 664         cs->timeout_jiffies = timeout;
 665         cs->skip_reset_on_timeout =
 666                 hdev->skip_reset_on_timeout ||
 667                 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
 668         cs->submission_time_jiffies = jiffies;
 669         INIT_LIST_HEAD(&cs->job_list);
 670         INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 671         kref_init(&cs->refcount);
 672         spin_lock_init(&cs->job_lock);
 673
 674         cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 675         if (!cs_cmpl)
 676                 cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 677
 678         if (!cs_cmpl) {
 679                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 680                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 681                 rc = -ENOMEM;
 682                 goto free_cs;
 683         }
 684
 685         cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 686                         sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 687         if (!cs->jobs_in_queue_cnt)
 688                 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 689                                 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
 690
 691         if (!cs->jobs_in_queue_cnt) {
 692                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 693                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
 694                 rc = -ENOMEM;
 695                 goto free_cs_cmpl;
 696         }
 697
 698         cs_cmpl->hdev = hdev;
 699         cs_cmpl->type = cs->type;
 700         spin_lock_init(&cs_cmpl->lock);
 701         INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
 702         cs->fence = &cs_cmpl->base_fence;
 703
 704         spin_lock(&ctx->cs_lock);
 705
 706         cs_cmpl->cs_seq = ctx->cs_sequence;
 707         other = ctx->cs_pending[cs_cmpl->cs_seq &
 708                                 (hdev->asic_prop.max_pending_cs - 1)];
 709
 710         if (other && !completion_done(&other->completion)) {
 711                 /* If the following statement is true, it means we have reached
 712                  * a point in which only part of the staged submission was
 713                  * submitted and we don't have enough room in the 'cs_pending'
 714                  * array for the rest of the submission.
 715                  * This causes a deadlock because this CS will never be
 716                  * completed as it depends on future CS's for completion.
 717                  */
 718                 if (other->cs_sequence == user_sequence)
 719                         dev_crit_ratelimited(hdev->dev,
 720                                 "Staged CS %llu deadlock due to lack of resources",
 721                                 user_sequence);
 722
 723                 dev_dbg_ratelimited(hdev->dev,
 724                         "Rejecting CS because of too many in-flights CS\n");
 725                 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 726                 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 727                 rc = -EAGAIN;
 728                 goto free_fence;
 729         }
 730
 731         /* init hl_fence */
 732         hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 733
 734         cs->sequence = cs_cmpl->cs_seq;
 735
 736         ctx->cs_pending[cs_cmpl->cs_seq &
 737                         (hdev->asic_prop.max_pending_cs - 1)] =
 738                                                         &cs_cmpl->base_fence;
 739         ctx->cs_sequence++;
 740
 741         hl_fence_get(&cs_cmpl->base_fence);
 742
 743         hl_fence_put(other);
 744
 745         spin_unlock(&ctx->cs_lock);
 746
 747         *cs_new = cs;
 748
 749         return 0;
 750
 751 free_fence:
 752         spin_unlock(&ctx->cs_lock);
 753         kfree(cs->jobs_in_queue_cnt);
 754 free_cs_cmpl:
 755         kfree(cs_cmpl);
 756 free_cs:
 757         kfree(cs);
 758         hl_ctx_put(ctx);
 759         return rc;
 760 }
 761
 762 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 763 {
 764         struct hl_cs_job *job, *tmp;
 765
 766         staged_cs_put(hdev, cs);
 767
 768         list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 769                 complete_job(hdev, job);
 770 }
 771
 772 void hl_cs_rollback_all(struct hl_device *hdev)
 773 {
 774         int i;
 775         struct hl_cs *cs, *tmp;
 776
 777         flush_workqueue(hdev->sob_reset_wq);
 778
 779         /* flush all completions before iterating over the CS mirror list in
 780          * order to avoid a race with the release functions
 781          */
 782         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 783                 flush_workqueue(hdev->cq_wq[i]);
 784
 785         /* Make sure we don't have leftovers in the CS mirror list */
 786         list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
 787                 cs_get(cs);
 788                 cs->aborted = true;
 789                 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
 790                                 cs->ctx->asid, cs->sequence);
 791                 cs_rollback(hdev, cs);
 792                 cs_put(cs);
 793         }
 794 }
 795
 796 void hl_pending_cb_list_flush(struct hl_ctx *ctx)
 797 {
 798         struct hl_pending_cb *pending_cb, *tmp;
 799
 800         list_for_each_entry_safe(pending_cb, tmp,
 801                         &ctx->pending_cb_list, cb_node) {
 802                 list_del(&pending_cb->cb_node);
 803                 hl_cb_put(pending_cb->cb);
 804                 kfree(pending_cb);
 805         }
 806 }
 807
 808 static void
 809 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 810 {
 811         struct hl_user_pending_interrupt *pend;
 812
 813         spin_lock(&interrupt->wait_list_lock);
 814         list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
 815                 pend->fence.error = -EIO;
 816                 complete_all(&pend->fence.completion);
 817         }
 818         spin_unlock(&interrupt->wait_list_lock);
 819 }
 820
 821 void hl_release_pending_user_interrupts(struct hl_device *hdev)
 822 {
 823         struct asic_fixed_properties *prop = &hdev->asic_prop;
 824         struct hl_user_interrupt *interrupt;
 825         int i;
 826
 827         if (!prop->user_interrupt_count)
 828                 return;
 829
 830         /* We iterate through the user interrupt requests and waking up all
 831          * user threads waiting for interrupt completion. We iterate the
 832          * list under a lock, this is why all user threads, once awake,
 833          * will wait on the same lock and will release the waiting object upon
 834          * unlock.
 835          */
 836
 837         for (i = 0 ; i < prop->user_interrupt_count ; i++) {
 838                 interrupt = &hdev->user_interrupt[i];
 839                 wake_pending_user_interrupt_threads(interrupt);
 840         }
 841
 842         interrupt = &hdev->common_user_interrupt;
 843         wake_pending_user_interrupt_threads(interrupt);
 844 }
 845
 846 static void job_wq_completion(struct work_struct *work)
 847 {
 848         struct hl_cs_job *job = container_of(work, struct hl_cs_job,
 849                                                 finish_work);
 850         struct hl_cs *cs = job->cs;
 851         struct hl_device *hdev = cs->ctx->hdev;
 852
 853         /* job is no longer needed */
 854         complete_job(hdev, job);
 855 }
 856
 857 static int validate_queue_index(struct hl_device *hdev,
 858                                 struct hl_cs_chunk *chunk,
 859                                 enum hl_queue_type *queue_type,
 860                                 bool *is_kernel_allocated_cb)
 861 {
 862         struct asic_fixed_properties *asic = &hdev->asic_prop;
 863         struct hw_queue_properties *hw_queue_prop;
 864
 865         /* This must be checked here to prevent out-of-bounds access to
 866          * hw_queues_props array
 867          */
 868         if (chunk->queue_index >= asic->max_queues) {
 869                 dev_err(hdev->dev, "Queue index %d is invalid\n",
 870                         chunk->queue_index);
 871                 return -EINVAL;
 872         }
 873
 874         hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 875
 876         if (hw_queue_prop->type == QUEUE_TYPE_NA) {
 877                 dev_err(hdev->dev, "Queue index %d is invalid\n",
 878                         chunk->queue_index);
 879                 return -EINVAL;
 880         }
 881
 882         if (hw_queue_prop->driver_only) {
 883                 dev_err(hdev->dev,
 884                         "Queue index %d is restricted for the kernel driver\n",
 885                         chunk->queue_index);
 886                 return -EINVAL;
 887         }
 888
 889         /* When hw queue type isn't QUEUE_TYPE_HW,
 890          * USER_ALLOC_CB flag shall be referred as "don't care".
 891          */
 892         if (hw_queue_prop->type == QUEUE_TYPE_HW) {
 893                 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
 894                         if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
 895                                 dev_err(hdev->dev,
 896                                         "Queue index %d doesn't support user CB\n",
 897                                         chunk->queue_index);
 898                                 return -EINVAL;
 899                         }
 900
 901                         *is_kernel_allocated_cb = false;
 902                 } else {
 903                         if (!(hw_queue_prop->cb_alloc_flags &
 904                                         CB_ALLOC_KERNEL)) {
 905                                 dev_err(hdev->dev,
 906                                         "Queue index %d doesn't support kernel CB\n",
 907                                         chunk->queue_index);
 908                                 return -EINVAL;
 909                         }
 910
 911                         *is_kernel_allocated_cb = true;
 912                 }
 913         } else {
 914                 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
 915                                                 & CB_ALLOC_KERNEL);
 916         }
 917
 918         *queue_type = hw_queue_prop->type;
 919         return 0;
 920 }
 921
 922 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
 923                                         struct hl_cb_mgr *cb_mgr,
 924                                         struct hl_cs_chunk *chunk)
 925 {
 926         struct hl_cb *cb;
 927         u32 cb_handle;
 928
 929         cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
 930
 931         cb = hl_cb_get(hdev, cb_mgr, cb_handle);
 932         if (!cb) {
 933                 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
 934                 return NULL;
 935         }
 936
 937         if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
 938                 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
 939                 goto release_cb;
 940         }
 941
 942         atomic_inc(&cb->cs_cnt);
 943
 944         return cb;
 945
 946 release_cb:
 947         hl_cb_put(cb);
 948         return NULL;
 949 }
 950
 951 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 952                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
 953 {
 954         struct hl_cs_job *job;
 955
 956         job = kzalloc(sizeof(*job), GFP_ATOMIC);
 957         if (!job)
 958                 job = kzalloc(sizeof(*job), GFP_KERNEL);
 959
 960         if (!job)
 961                 return NULL;
 962
 963         kref_init(&job->refcount);
 964         job->queue_type = queue_type;
 965         job->is_kernel_allocated_cb = is_kernel_allocated_cb;
 966
 967         if (is_cb_patched(hdev, job))
 968                 INIT_LIST_HEAD(&job->userptr_list);
 969
 970         if (job->queue_type == QUEUE_TYPE_EXT)
 971                 INIT_WORK(&job->finish_work, job_wq_completion);
 972
 973         return job;
 974 }
 975
 976 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 977 {
 978         if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
 979                 return CS_TYPE_SIGNAL;
 980         else if (cs_type_flags & HL_CS_FLAGS_WAIT)
 981                 return CS_TYPE_WAIT;
 982         else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
 983                 return CS_TYPE_COLLECTIVE_WAIT;
 984         else
 985                 return CS_TYPE_DEFAULT;
 986 }
 987
 988 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 989 {
 990         struct hl_device *hdev = hpriv->hdev;
 991         struct hl_ctx *ctx = hpriv->ctx;
 992         u32 cs_type_flags, num_chunks;
 993         enum hl_device_status status;
 994         enum hl_cs_type cs_type;
 995
 996         if (!hl_device_operational(hdev, &status)) {
 997                 dev_warn_ratelimited(hdev->dev,
 998                         "Device is %s. Can't submit new CS\n",
 999                         hdev->status[status]);
1000                 return -EBUSY;
1001         }
1002
1003         if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1004                         !hdev->supports_staged_submission) {
1005                 dev_err(hdev->dev, "staged submission not supported");
1006                 return -EPERM;
1007         }
1008
1009         cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1010
1011         if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1012                 dev_err(hdev->dev,
1013                         "CS type flags are mutually exclusive, context %d\n",
1014                         ctx->asid);
1015                 return -EINVAL;
1016         }
1017
1018         cs_type = hl_cs_get_cs_type(cs_type_flags);
1019         num_chunks = args->in.num_chunks_execute;
1020
1021         if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1022                                         !hdev->supports_sync_stream)) {
1023                 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1024                 return -EINVAL;
1025         }
1026
1027         if (cs_type == CS_TYPE_DEFAULT) {
1028                 if (!num_chunks) {
1029                         dev_err(hdev->dev,
1030                                 "Got execute CS with 0 chunks, context %d\n",
1031                                 ctx->asid);
1032                         return -EINVAL;
1033                 }
1034         } else if (num_chunks != 1) {
1035                 dev_err(hdev->dev,
1036                         "Sync stream CS mandates one chunk only, context %d\n",
1037                         ctx->asid);
1038                 return -EINVAL;
1039         }
1040
1041         return 0;
1042 }
1043
1044 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1045                                         struct hl_cs_chunk **cs_chunk_array,
1046                                         void __user *chunks, u32 num_chunks,
1047                                         struct hl_ctx *ctx)
1048 {
1049         u32 size_to_copy;
1050
1051         if (num_chunks > HL_MAX_JOBS_PER_CS) {
1052                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1053                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1054                 dev_err(hdev->dev,
1055                         "Number of chunks can NOT be larger than %d\n",
1056                         HL_MAX_JOBS_PER_CS);
1057                 return -EINVAL;
1058         }
1059
1060         *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1061                                         GFP_ATOMIC);
1062         if (!*cs_chunk_array)
1063                 *cs_chunk_array = kmalloc_array(num_chunks,
1064                                         sizeof(**cs_chunk_array), GFP_KERNEL);
1065         if (!*cs_chunk_array) {
1066                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1067                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1068                 return -ENOMEM;
1069         }
1070
1071         size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1072         if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1073                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1074                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1075                 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1076                 kfree(*cs_chunk_array);
1077                 return -EFAULT;
1078         }
1079
1080         return 0;
1081 }
1082
1083 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1084                                 u64 sequence, u32 flags)
1085 {
1086         if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1087                 return 0;
1088
1089         cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1090         cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1091
1092         if (cs->staged_first) {
1093                 /* Staged CS sequence is the first CS sequence */
1094                 INIT_LIST_HEAD(&cs->staged_cs_node);
1095                 cs->staged_sequence = cs->sequence;
1096         } else {
1097                 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1098                  * under the cs_mirror_lock
1099                  */
1100                 cs->staged_sequence = sequence;
1101         }
1102
1103         /* Increment CS reference if needed */
1104         staged_cs_get(hdev, cs);
1105
1106         cs->staged_cs = true;
1107
1108         return 0;
1109 }
1110
1111 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1112                                 u32 num_chunks, u64 *cs_seq, u32 flags,
1113                                 u32 timeout)
1114 {
1115         bool staged_mid, int_queues_only = true;
1116         struct hl_device *hdev = hpriv->hdev;
1117         struct hl_cs_chunk *cs_chunk_array;
1118         struct hl_cs_counters_atomic *cntr;
1119         struct hl_ctx *ctx = hpriv->ctx;
1120         struct hl_cs_job *job;
1121         struct hl_cs *cs;
1122         struct hl_cb *cb;
1123         u64 user_sequence;
1124         int rc, i;
1125
1126         cntr = &hdev->aggregated_cs_counters;
1127         user_sequence = *cs_seq;
1128         *cs_seq = ULLONG_MAX;
1129
1130         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1131                         hpriv->ctx);
1132         if (rc)
1133                 goto out;
1134
1135         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1136                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1137                 staged_mid = true;
1138         else
1139                 staged_mid = false;
1140
1141         rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1142                         staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1143                         timeout);
1144         if (rc)
1145                 goto free_cs_chunk_array;
1146
1147         *cs_seq = cs->sequence;
1148
1149         hl_debugfs_add_cs(cs);
1150
1151         rc = cs_staged_submission(hdev, cs, user_sequence, flags);
1152         if (rc)
1153                 goto free_cs_object;
1154
1155         /* Validate ALL the CS chunks before submitting the CS */
1156         for (i = 0 ; i < num_chunks ; i++) {
1157                 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1158                 enum hl_queue_type queue_type;
1159                 bool is_kernel_allocated_cb;
1160
1161                 rc = validate_queue_index(hdev, chunk, &queue_type,
1162                                                 &is_kernel_allocated_cb);
1163                 if (rc) {
1164                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1165                         atomic64_inc(&cntr->validation_drop_cnt);
1166                         goto free_cs_object;
1167                 }
1168
1169                 if (is_kernel_allocated_cb) {
1170                         cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1171                         if (!cb) {
1172                                 atomic64_inc(
1173                                         &ctx->cs_counters.validation_drop_cnt);
1174                                 atomic64_inc(&cntr->validation_drop_cnt);
1175                                 rc = -EINVAL;
1176                                 goto free_cs_object;
1177                         }
1178                 } else {
1179                         cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1180                 }
1181
1182                 if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
1183                         int_queues_only = false;
1184
1185                 job = hl_cs_allocate_job(hdev, queue_type,
1186                                                 is_kernel_allocated_cb);
1187                 if (!job) {
1188                         atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1189                         atomic64_inc(&cntr->out_of_mem_drop_cnt);
1190                         dev_err(hdev->dev, "Failed to allocate a new job\n");
1191                         rc = -ENOMEM;
1192                         if (is_kernel_allocated_cb)
1193                                 goto release_cb;
1194
1195                         goto free_cs_object;
1196                 }
1197
1198                 job->id = i + 1;
1199                 job->cs = cs;
1200                 job->user_cb = cb;
1201                 job->user_cb_size = chunk->cb_size;
1202                 job->hw_queue_id = chunk->queue_index;
1203
1204                 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1205
1206                 list_add_tail(&job->cs_node, &cs->job_list);
1207
1208                 /*
1209                  * Increment CS reference. When CS reference is 0, CS is
1210                  * done and can be signaled to user and free all its resources
1211                  * Only increment for JOB on external or H/W queues, because
1212                  * only for those JOBs we get completion
1213                  */
1214                 if (cs_needs_completion(cs) &&
1215                         (job->queue_type == QUEUE_TYPE_EXT ||
1216                                 job->queue_type == QUEUE_TYPE_HW))
1217                         cs_get(cs);
1218
1219                 hl_debugfs_add_job(hdev, job);
1220
1221                 rc = cs_parser(hpriv, job);
1222                 if (rc) {
1223                         atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1224                         atomic64_inc(&cntr->parsing_drop_cnt);
1225                         dev_err(hdev->dev,
1226                                 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1227                                 cs->ctx->asid, cs->sequence, job->id, rc);
1228                         goto free_cs_object;
1229                 }
1230         }
1231
1232         /* We allow a CS with any queue type combination as long as it does
1233          * not get a completion
1234          */
1235         if (int_queues_only && cs_needs_completion(cs)) {
1236                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1237                 atomic64_inc(&cntr->validation_drop_cnt);
1238                 dev_err(hdev->dev,
1239                         "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1240                         cs->ctx->asid, cs->sequence);
1241                 rc = -EINVAL;
1242                 goto free_cs_object;
1243         }
1244
1245         rc = hl_hw_queue_schedule_cs(cs);
1246         if (rc) {
1247                 if (rc != -EAGAIN)
1248                         dev_err(hdev->dev,
1249                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1250                                 cs->ctx->asid, cs->sequence, rc);
1251                 goto free_cs_object;
1252         }
1253
1254         rc = HL_CS_STATUS_SUCCESS;
1255         goto put_cs;
1256
1257 release_cb:
1258         atomic_dec(&cb->cs_cnt);
1259         hl_cb_put(cb);
1260 free_cs_object:
1261         cs_rollback(hdev, cs);
1262         *cs_seq = ULLONG_MAX;
1263         /* The path below is both for good and erroneous exits */
1264 put_cs:
1265         /* We finished with the CS in this function, so put the ref */
1266         cs_put(cs);
1267 free_cs_chunk_array:
1268         kfree(cs_chunk_array);
1269 out:
1270         return rc;
1271 }
1272
1273 static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
1274                 struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
1275 {
1276         struct hw_queue_properties *hw_queue_prop;
1277         struct hl_cs_counters_atomic *cntr;
1278         struct hl_cs_job *job;
1279
1280         hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
1281         cntr = &hdev->aggregated_cs_counters;
1282
1283         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1284         if (!job) {
1285                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1286                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1287                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1288                 return -ENOMEM;
1289         }
1290
1291         job->id = 0;
1292         job->cs = cs;
1293         job->user_cb = cb;
1294         atomic_inc(&job->user_cb->cs_cnt);
1295         job->user_cb_size = size;
1296         job->hw_queue_id = hw_queue_id;
1297         job->patched_cb = job->user_cb;
1298         job->job_cb_size = job->user_cb_size;
1299
1300         /* increment refcount as for external queues we get completion */
1301         cs_get(cs);
1302
1303         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1304
1305         list_add_tail(&job->cs_node, &cs->job_list);
1306
1307         hl_debugfs_add_job(hdev, job);
1308
1309         return 0;
1310 }
1311
1312 static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
1313 {
1314         struct hl_device *hdev = hpriv->hdev;
1315         struct hl_ctx *ctx = hpriv->ctx;
1316         struct hl_pending_cb *pending_cb, *tmp;
1317         struct list_head local_cb_list;
1318         struct hl_cs *cs;
1319         struct hl_cb *cb;
1320         u32 hw_queue_id;
1321         u32 cb_size;
1322         int process_list, rc = 0;
1323
1324         if (list_empty(&ctx->pending_cb_list))
1325                 return 0;
1326
1327         process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
1328
1329         /* Only a single thread is allowed to process the list */
1330         if (!process_list)
1331                 return 0;
1332
1333         if (list_empty(&ctx->pending_cb_list))
1334                 goto free_pending_cb_token;
1335
1336         /* move all list elements to a local list */
1337         INIT_LIST_HEAD(&local_cb_list);
1338         spin_lock(&ctx->pending_cb_lock);
1339         list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
1340                                                                 cb_node)
1341                 list_move_tail(&pending_cb->cb_node, &local_cb_list);
1342         spin_unlock(&ctx->pending_cb_lock);
1343
1344         rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
1345                                 hdev->timeout_jiffies);
1346         if (rc)
1347                 goto add_list_elements;
1348
1349         hl_debugfs_add_cs(cs);
1350
1351         /* Iterate through pending cb list, create jobs and add to CS */
1352         list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
1353                 cb = pending_cb->cb;
1354                 cb_size = pending_cb->cb_size;
1355                 hw_queue_id = pending_cb->hw_queue_id;
1356
1357                 rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
1358                                                                 hw_queue_id);
1359                 if (rc)
1360                         goto free_cs_object;
1361         }
1362
1363         rc = hl_hw_queue_schedule_cs(cs);
1364         if (rc) {
1365                 if (rc != -EAGAIN)
1366                         dev_err(hdev->dev,
1367                                 "Failed to submit CS %d.%llu (%d)\n",
1368                                 ctx->asid, cs->sequence, rc);
1369                 goto free_cs_object;
1370         }
1371
1372         /* pending cb was scheduled successfully */
1373         list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
1374                 list_del(&pending_cb->cb_node);
1375                 kfree(pending_cb);
1376         }
1377
1378         cs_put(cs);
1379
1380         goto free_pending_cb_token;
1381
1382 free_cs_object:
1383         cs_rollback(hdev, cs);
1384         cs_put(cs);
1385 add_list_elements:
1386         spin_lock(&ctx->pending_cb_lock);
1387         list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
1388                                                                 cb_node)
1389                 list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
1390         spin_unlock(&ctx->pending_cb_lock);
1391 free_pending_cb_token:
1392         atomic_set(&ctx->thread_pending_cb_token, 1);
1393
1394         return rc;
1395 }
1396
1397 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1398                                 u64 *cs_seq)
1399 {
1400         struct hl_device *hdev = hpriv->hdev;
1401         struct hl_ctx *ctx = hpriv->ctx;
1402         bool need_soft_reset = false;
1403         int rc = 0, do_ctx_switch;
1404         void __user *chunks;
1405         u32 num_chunks, tmp;
1406         int ret;
1407
1408         do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1409
1410         if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1411                 mutex_lock(&hpriv->restore_phase_mutex);
1412
1413                 if (do_ctx_switch) {
1414                         rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1415                         if (rc) {
1416                                 dev_err_ratelimited(hdev->dev,
1417                                         "Failed to switch to context %d, rejecting CS! %d\n",
1418                                         ctx->asid, rc);
1419                                 /*
1420                                  * If we timedout, or if the device is not IDLE
1421                                  * while we want to do context-switch (-EBUSY),
1422                                  * we need to soft-reset because QMAN is
1423                                  * probably stuck. However, we can't call to
1424                                  * reset here directly because of deadlock, so
1425                                  * need to do it at the very end of this
1426                                  * function
1427                                  */
1428                                 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1429                                         need_soft_reset = true;
1430                                 mutex_unlock(&hpriv->restore_phase_mutex);
1431                                 goto out;
1432                         }
1433                 }
1434
1435                 hdev->asic_funcs->restore_phase_topology(hdev);
1436
1437                 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1438                 num_chunks = args->in.num_chunks_restore;
1439
1440                 if (!num_chunks) {
1441                         dev_dbg(hdev->dev,
1442                                 "Need to run restore phase but restore CS is empty\n");
1443                         rc = 0;
1444                 } else {
1445                         rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1446                                         cs_seq, 0, hdev->timeout_jiffies);
1447                 }
1448
1449                 mutex_unlock(&hpriv->restore_phase_mutex);
1450
1451                 if (rc) {
1452                         dev_err(hdev->dev,
1453                                 "Failed to submit restore CS for context %d (%d)\n",
1454                                 ctx->asid, rc);
1455                         goto out;
1456                 }
1457
1458                 /* Need to wait for restore completion before execution phase */
1459                 if (num_chunks) {
1460                         enum hl_cs_wait_status status;
1461 wait_again:
1462                         ret = _hl_cs_wait_ioctl(hdev, ctx,
1463                                         jiffies_to_usecs(hdev->timeout_jiffies),
1464                                         *cs_seq, &status, NULL);
1465                         if (ret) {
1466                                 if (ret == -ERESTARTSYS) {
1467                                         usleep_range(100, 200);
1468                                         goto wait_again;
1469                                 }
1470
1471                                 dev_err(hdev->dev,
1472                                         "Restore CS for context %d failed to complete %d\n",
1473                                         ctx->asid, ret);
1474                                 rc = -ENOEXEC;
1475                                 goto out;
1476                         }
1477                 }
1478
1479                 ctx->thread_ctx_switch_wait_token = 1;
1480
1481         } else if (!ctx->thread_ctx_switch_wait_token) {
1482                 rc = hl_poll_timeout_memory(hdev,
1483                         &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1484                         100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1485
1486                 if (rc == -ETIMEDOUT) {
1487                         dev_err(hdev->dev,
1488                                 "context switch phase timeout (%d)\n", tmp);
1489                         goto out;
1490                 }
1491         }
1492
1493 out:
1494         if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1495                 hl_device_reset(hdev, 0);
1496
1497         return rc;
1498 }
1499
1500 /*
1501  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1502  * if the SOB value reaches the max value move to the other SOB reserved
1503  * to the queue.
1504  * Note that this function must be called while hw_queues_lock is taken.
1505  */
1506 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1507                         struct hl_hw_sob **hw_sob, u32 count)
1508 {
1509         struct hl_sync_stream_properties *prop;
1510         struct hl_hw_sob *sob = *hw_sob, *other_sob;
1511         u8 other_sob_offset;
1512
1513         prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1514
1515         kref_get(&sob->kref);
1516
1517         /* check for wraparound */
1518         if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1519                 /*
1520                  * Decrement as we reached the max value.
1521                  * The release function won't be called here as we've
1522                  * just incremented the refcount right before calling this
1523                  * function.
1524                  */
1525                 kref_put(&sob->kref, hl_sob_reset_error);
1526
1527                 /*
1528                  * check the other sob value, if it still in use then fail
1529                  * otherwise make the switch
1530                  */
1531                 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1532                 other_sob = &prop->hw_sob[other_sob_offset];
1533
1534                 if (kref_read(&other_sob->kref) != 1) {
1535                         dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1536                                                                 q_idx);
1537                         return -EINVAL;
1538                 }
1539
1540                 prop->next_sob_val = 1;
1541
1542                 /* only two SOBs are currently in use */
1543                 prop->curr_sob_offset = other_sob_offset;
1544                 *hw_sob = other_sob;
1545
1546                 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1547                                 prop->curr_sob_offset, q_idx);
1548         } else {
1549                 prop->next_sob_val += count;
1550         }
1551
1552         return 0;
1553 }
1554
1555 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1556                 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
1557 {
1558         u64 *signal_seq_arr = NULL;
1559         u32 size_to_copy, signal_seq_arr_len;
1560         int rc = 0;
1561
1562         signal_seq_arr_len = chunk->num_signal_seq_arr;
1563
1564         /* currently only one signal seq is supported */
1565         if (signal_seq_arr_len != 1) {
1566                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1567                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1568                 dev_err(hdev->dev,
1569                         "Wait for signal CS supports only one signal CS seq\n");
1570                 return -EINVAL;
1571         }
1572
1573         signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1574                                         sizeof(*signal_seq_arr),
1575                                         GFP_ATOMIC);
1576         if (!signal_seq_arr)
1577                 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1578                                         sizeof(*signal_seq_arr),
1579                                         GFP_KERNEL);
1580         if (!signal_seq_arr) {
1581                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1582                 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1583                 return -ENOMEM;
1584         }
1585
1586         size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
1587         if (copy_from_user(signal_seq_arr,
1588                                 u64_to_user_ptr(chunk->signal_seq_arr),
1589                                 size_to_copy)) {
1590                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1591                 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1592                 dev_err(hdev->dev,
1593                         "Failed to copy signal seq array from user\n");
1594                 rc = -EFAULT;
1595                 goto out;
1596         }
1597
1598         /* currently it is guaranteed to have only one signal seq */
1599         *signal_seq = signal_seq_arr[0];
1600
1601 out:
1602         kfree(signal_seq_arr);
1603
1604         return rc;
1605 }
1606
1607 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1608                 struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
1609                 u32 q_idx)
1610 {
1611         struct hl_cs_counters_atomic *cntr;
1612         struct hl_cs_job *job;
1613         struct hl_cb *cb;
1614         u32 cb_size;
1615
1616         cntr = &hdev->aggregated_cs_counters;
1617
1618         job = hl_cs_allocate_job(hdev, q_type, true);
1619         if (!job) {
1620                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1621                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1622                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1623                 return -ENOMEM;
1624         }
1625
1626         if (cs->type == CS_TYPE_WAIT)
1627                 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1628         else
1629                 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1630
1631         cb = hl_cb_kernel_create(hdev, cb_size,
1632                                 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1633         if (!cb) {
1634                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1635                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1636                 kfree(job);
1637                 return -EFAULT;
1638         }
1639
1640         job->id = 0;
1641         job->cs = cs;
1642         job->user_cb = cb;
1643         atomic_inc(&job->user_cb->cs_cnt);
1644         job->user_cb_size = cb_size;
1645         job->hw_queue_id = q_idx;
1646
1647         /*
1648          * No need in parsing, user CB is the patched CB.
1649          * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1650          * the CB idr anymore and to decrement its refcount as it was
1651          * incremented inside hl_cb_kernel_create().
1652          */
1653         job->patched_cb = job->user_cb;
1654         job->job_cb_size = job->user_cb_size;
1655         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1656
1657         /* increment refcount as for external queues we get completion */
1658         cs_get(cs);
1659
1660         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1661
1662         list_add_tail(&job->cs_node, &cs->job_list);
1663
1664         hl_debugfs_add_job(hdev, job);
1665
1666         return 0;
1667 }
1668
1669 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1670                                 void __user *chunks, u32 num_chunks,
1671                                 u64 *cs_seq, u32 flags, u32 timeout)
1672 {
1673         struct hl_cs_chunk *cs_chunk_array, *chunk;
1674         struct hw_queue_properties *hw_queue_prop;
1675         struct hl_device *hdev = hpriv->hdev;
1676         struct hl_cs_compl *sig_waitcs_cmpl;
1677         u32 q_idx, collective_engine_id = 0;
1678         struct hl_cs_counters_atomic *cntr;
1679         struct hl_fence *sig_fence = NULL;
1680         struct hl_ctx *ctx = hpriv->ctx;
1681         enum hl_queue_type q_type;
1682         struct hl_cs *cs;
1683         u64 signal_seq;
1684         int rc;
1685
1686         cntr = &hdev->aggregated_cs_counters;
1687         *cs_seq = ULLONG_MAX;
1688
1689         rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1690                         ctx);
1691         if (rc)
1692                 goto out;
1693
1694         /* currently it is guaranteed to have only one chunk */
1695         chunk = &cs_chunk_array[0];
1696
1697         if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1698                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1699                 atomic64_inc(&cntr->validation_drop_cnt);
1700                 dev_err(hdev->dev, "Queue index %d is invalid\n",
1701                         chunk->queue_index);
1702                 rc = -EINVAL;
1703                 goto free_cs_chunk_array;
1704         }
1705
1706         q_idx = chunk->queue_index;
1707         hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1708         q_type = hw_queue_prop->type;
1709
1710         if (!hw_queue_prop->supports_sync_stream) {
1711                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1712                 atomic64_inc(&cntr->validation_drop_cnt);
1713                 dev_err(hdev->dev,
1714                         "Queue index %d does not support sync stream operations\n",
1715                         q_idx);
1716                 rc = -EINVAL;
1717                 goto free_cs_chunk_array;
1718         }
1719
1720         if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1721                 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1722                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1723                         atomic64_inc(&cntr->validation_drop_cnt);
1724                         dev_err(hdev->dev,
1725                                 "Queue index %d is invalid\n", q_idx);
1726                         rc = -EINVAL;
1727                         goto free_cs_chunk_array;
1728                 }
1729
1730                 collective_engine_id = chunk->collective_engine_id;
1731         }
1732
1733         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1734                 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
1735                 if (rc)
1736                         goto free_cs_chunk_array;
1737
1738                 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
1739                 if (IS_ERR(sig_fence)) {
1740                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1741                         atomic64_inc(&cntr->validation_drop_cnt);
1742                         dev_err(hdev->dev,
1743                                 "Failed to get signal CS with seq 0x%llx\n",
1744                                 signal_seq);
1745                         rc = PTR_ERR(sig_fence);
1746                         goto free_cs_chunk_array;
1747                 }
1748
1749                 if (!sig_fence) {
1750                         /* signal CS already finished */
1751                         rc = 0;
1752                         goto free_cs_chunk_array;
1753                 }
1754
1755                 sig_waitcs_cmpl =
1756                         container_of(sig_fence, struct hl_cs_compl, base_fence);
1757
1758                 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
1759                         atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1760                         atomic64_inc(&cntr->validation_drop_cnt);
1761                         dev_err(hdev->dev,
1762                                 "CS seq 0x%llx is not of a signal CS\n",
1763                                 signal_seq);
1764                         hl_fence_put(sig_fence);
1765                         rc = -EINVAL;
1766                         goto free_cs_chunk_array;
1767                 }
1768
1769                 if (completion_done(&sig_fence->completion)) {
1770                         /* signal CS already finished */
1771                         hl_fence_put(sig_fence);
1772                         rc = 0;
1773                         goto free_cs_chunk_array;
1774                 }
1775         }
1776
1777         rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
1778         if (rc) {
1779                 if (cs_type == CS_TYPE_WAIT ||
1780                         cs_type == CS_TYPE_COLLECTIVE_WAIT)
1781                         hl_fence_put(sig_fence);
1782                 goto free_cs_chunk_array;
1783         }
1784
1785         /*
1786          * Save the signal CS fence for later initialization right before
1787          * hanging the wait CS on the queue.
1788          */
1789         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
1790                 cs->signal_fence = sig_fence;
1791
1792         hl_debugfs_add_cs(cs);
1793
1794         *cs_seq = cs->sequence;
1795
1796         if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
1797                 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
1798                                 q_idx);
1799         else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
1800                 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
1801                                 cs, q_idx, collective_engine_id);
1802         else {
1803                 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1804                 atomic64_inc(&cntr->validation_drop_cnt);
1805                 rc = -EINVAL;
1806         }
1807
1808         if (rc)
1809                 goto free_cs_object;
1810
1811         rc = hl_hw_queue_schedule_cs(cs);
1812         if (rc) {
1813                 if (rc != -EAGAIN)
1814                         dev_err(hdev->dev,
1815                                 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1816                                 ctx->asid, cs->sequence, rc);
1817                 goto free_cs_object;
1818         }
1819
1820         rc = HL_CS_STATUS_SUCCESS;
1821         goto put_cs;
1822
1823 free_cs_object:
1824         cs_rollback(hdev, cs);
1825         *cs_seq = ULLONG_MAX;
1826         /* The path below is both for good and erroneous exits */
1827 put_cs:
1828         /* We finished with the CS in this function, so put the ref */
1829         cs_put(cs);
1830 free_cs_chunk_array:
1831         kfree(cs_chunk_array);
1832 out:
1833         return rc;
1834 }
1835
1836 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
1837 {
1838         union hl_cs_args *args = data;
1839         enum hl_cs_type cs_type;
1840         u64 cs_seq = ULONG_MAX;
1841         void __user *chunks;
1842         u32 num_chunks, flags, timeout;
1843         int rc;
1844
1845         rc = hl_cs_sanity_checks(hpriv, args);
1846         if (rc)
1847                 goto out;
1848
1849         rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
1850         if (rc)
1851                 goto out;
1852
1853         rc = hl_submit_pending_cb(hpriv);
1854         if (rc)
1855                 goto out;
1856
1857         cs_type = hl_cs_get_cs_type(args->in.cs_flags &
1858                                         ~HL_CS_FLAGS_FORCE_RESTORE);
1859         chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
1860         num_chunks = args->in.num_chunks_execute;
1861         flags = args->in.cs_flags;
1862
1863         /* In case this is a staged CS, user should supply the CS sequence */
1864         if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1865                         !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1866                 cs_seq = args->in.seq;
1867
1868         timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
1869                         ? msecs_to_jiffies(args->in.timeout * 1000)
1870                         : hpriv->hdev->timeout_jiffies;
1871
1872         switch (cs_type) {
1873         case CS_TYPE_SIGNAL:
1874         case CS_TYPE_WAIT:
1875         case CS_TYPE_COLLECTIVE_WAIT:
1876                 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
1877                                         &cs_seq, args->in.cs_flags, timeout);
1878                 break;
1879         default:
1880                 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
1881                                                 args->in.cs_flags, timeout);
1882                 break;
1883         }
1884
1885 out:
1886         if (rc != -EAGAIN) {
1887                 memset(args, 0, sizeof(*args));
1888                 args->out.status = rc;
1889                 args->out.seq = cs_seq;
1890         }
1891
1892         return rc;
1893 }
1894
1895 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
1896                                 u64 timeout_us, u64 seq,
1897                                 enum hl_cs_wait_status *status, s64 *timestamp)
1898 {
1899         struct hl_fence *fence;
1900         unsigned long timeout;
1901         int rc = 0;
1902         long completion_rc;
1903
1904         if (timestamp)
1905                 *timestamp = 0;
1906
1907         if (timeout_us == MAX_SCHEDULE_TIMEOUT)
1908                 timeout = timeout_us;
1909         else
1910                 timeout = usecs_to_jiffies(timeout_us);
1911
1912         hl_ctx_get(hdev, ctx);
1913
1914         fence = hl_ctx_get_fence(ctx, seq);
1915         if (IS_ERR(fence)) {
1916                 rc = PTR_ERR(fence);
1917                 if (rc == -EINVAL)
1918                         dev_notice_ratelimited(hdev->dev,
1919                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
1920                                 seq, ctx->cs_sequence);
1921         } else if (fence) {
1922                 if (!timeout_us)
1923                         completion_rc = completion_done(&fence->completion);
1924                 else
1925                         completion_rc =
1926                                 wait_for_completion_interruptible_timeout(
1927                                         &fence->completion, timeout);
1928
1929                 if (completion_rc > 0) {
1930                         *status = CS_WAIT_STATUS_COMPLETED;
1931                         if (timestamp)
1932                                 *timestamp = ktime_to_ns(fence->timestamp);
1933                 } else {
1934                         *status = CS_WAIT_STATUS_BUSY;
1935                 }
1936
1937                 if (fence->error == -ETIMEDOUT)
1938                         rc = -ETIMEDOUT;
1939                 else if (fence->error == -EIO)
1940                         rc = -EIO;
1941
1942                 hl_fence_put(fence);
1943         } else {
1944                 dev_dbg(hdev->dev,
1945                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
1946                         seq, ctx->cs_sequence);
1947                 *status = CS_WAIT_STATUS_GONE;
1948         }
1949
1950         hl_ctx_put(ctx);
1951
1952         return rc;
1953 }
1954
1955 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
1956 {
1957         struct hl_device *hdev = hpriv->hdev;
1958         union hl_wait_cs_args *args = data;
1959         enum hl_cs_wait_status status;
1960         u64 seq = args->in.seq;
1961         s64 timestamp;
1962         int rc;
1963
1964         rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
1965                                 &status, &timestamp);
1966
1967         memset(args, 0, sizeof(*args));
1968
1969         if (rc) {
1970                 if (rc == -ERESTARTSYS) {
1971                         dev_err_ratelimited(hdev->dev,
1972                                 "user process got signal while waiting for CS handle %llu\n",
1973                                 seq);
1974                         args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
1975                         rc = -EINTR;
1976                 } else if (rc == -ETIMEDOUT) {
1977                         dev_err_ratelimited(hdev->dev,
1978                                 "CS %llu has timed-out while user process is waiting for it\n",
1979                                 seq);
1980                         args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
1981                 } else if (rc == -EIO) {
1982                         dev_err_ratelimited(hdev->dev,
1983                                 "CS %llu has been aborted while user process is waiting for it\n",
1984                                 seq);
1985                         args->out.status = HL_WAIT_CS_STATUS_ABORTED;
1986                 }
1987                 return rc;
1988         }
1989
1990         if (timestamp) {
1991                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
1992                 args->out.timestamp_nsec = timestamp;
1993         }
1994
1995         switch (status) {
1996         case CS_WAIT_STATUS_GONE:
1997                 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
1998                 fallthrough;
1999         case CS_WAIT_STATUS_COMPLETED:
2000                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2001                 break;
2002         case CS_WAIT_STATUS_BUSY:
2003         default:
2004                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2005                 break;
2006         }
2007
2008         return 0;
2009 }
2010
2011 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2012                                 u32 timeout_us, u64 user_address,
2013                                 u32 target_value, u16 interrupt_offset,
2014                                 enum hl_cs_wait_status *status)
2015 {
2016         struct hl_user_pending_interrupt *pend;
2017         struct hl_user_interrupt *interrupt;
2018         unsigned long timeout;
2019         long completion_rc;
2020         u32 completion_value;
2021         int rc = 0;
2022
2023         if (timeout_us == U32_MAX)
2024                 timeout = timeout_us;
2025         else
2026                 timeout = usecs_to_jiffies(timeout_us);
2027
2028         hl_ctx_get(hdev, ctx);
2029
2030         pend = kmalloc(sizeof(*pend), GFP_KERNEL);
2031         if (!pend) {
2032                 hl_ctx_put(ctx);
2033                 return -ENOMEM;
2034         }
2035
2036         hl_fence_init(&pend->fence, ULONG_MAX);
2037
2038         if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
2039                 interrupt = &hdev->common_user_interrupt;
2040         else
2041                 interrupt = &hdev->user_interrupt[interrupt_offset];
2042
2043         spin_lock(&interrupt->wait_list_lock);
2044         if (!hl_device_operational(hdev, NULL)) {
2045                 rc = -EPERM;
2046                 goto unlock_and_free_fence;
2047         }
2048
2049         if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
2050                 dev_err(hdev->dev,
2051                         "Failed to copy completion value from user\n");
2052                 rc = -EFAULT;
2053                 goto unlock_and_free_fence;
2054         }
2055
2056         if (completion_value >= target_value)
2057                 *status = CS_WAIT_STATUS_COMPLETED;
2058         else
2059                 *status = CS_WAIT_STATUS_BUSY;
2060
2061         if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
2062                 goto unlock_and_free_fence;
2063
2064         /* Add pending user interrupt to relevant list for the interrupt
2065          * handler to monitor
2066          */
2067         list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
2068         spin_unlock(&interrupt->wait_list_lock);
2069
2070 wait_again:
2071         /* Wait for interrupt handler to signal completion */
2072         completion_rc =
2073                 wait_for_completion_interruptible_timeout(
2074                                 &pend->fence.completion, timeout);
2075
2076         /* If timeout did not expire we need to perform the comparison.
2077          * If comparison fails, keep waiting until timeout expires
2078          */
2079         if (completion_rc > 0) {
2080                 if (copy_from_user(&completion_value,
2081                                 u64_to_user_ptr(user_address), 4)) {
2082                         dev_err(hdev->dev,
2083                                 "Failed to copy completion value from user\n");
2084                         rc = -EFAULT;
2085                         goto remove_pending_user_interrupt;
2086                 }
2087
2088                 if (completion_value >= target_value) {
2089                         *status = CS_WAIT_STATUS_COMPLETED;
2090                 } else {
2091                         timeout = completion_rc;
2092                         goto wait_again;
2093                 }
2094         } else {
2095                 *status = CS_WAIT_STATUS_BUSY;
2096         }
2097
2098 remove_pending_user_interrupt:
2099         spin_lock(&interrupt->wait_list_lock);
2100         list_del(&pend->wait_list_node);
2101
2102 unlock_and_free_fence:
2103         spin_unlock(&interrupt->wait_list_lock);
2104         kfree(pend);
2105         hl_ctx_put(ctx);
2106
2107         return rc;
2108 }
2109
2110 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2111 {
2112         u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
2113         struct hl_device *hdev = hpriv->hdev;
2114         struct asic_fixed_properties *prop;
2115         union hl_wait_cs_args *args = data;
2116         enum hl_cs_wait_status status;
2117         int rc;
2118
2119         prop = &hdev->asic_prop;
2120
2121         if (!prop->user_interrupt_count) {
2122                 dev_err(hdev->dev, "no user interrupts allowed");
2123                 return -EPERM;
2124         }
2125
2126         interrupt_id =
2127                 FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
2128
2129         first_interrupt = prop->first_available_user_msix_interrupt;
2130         last_interrupt = prop->first_available_user_msix_interrupt +
2131                                                 prop->user_interrupt_count - 1;
2132
2133         if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
2134                         interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
2135                 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
2136                 return -EINVAL;
2137         }
2138
2139         if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
2140                 interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
2141         else
2142                 interrupt_offset = interrupt_id - first_interrupt;
2143
2144         rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
2145                                 args->in.interrupt_timeout_us, args->in.addr,
2146                                 args->in.target, interrupt_offset, &status);
2147
2148         memset(args, 0, sizeof(*args));
2149
2150         if (rc) {
2151                 dev_err_ratelimited(hdev->dev,
2152                         "interrupt_wait_ioctl failed (%d)\n", rc);
2153
2154                 return rc;
2155         }
2156
2157         switch (status) {
2158         case CS_WAIT_STATUS_COMPLETED:
2159                 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2160                 break;
2161         case CS_WAIT_STATUS_BUSY:
2162         default:
2163                 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2164                 break;
2165         }
2166
2167         return 0;
2168 }
2169
2170 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2171 {
2172         union hl_wait_cs_args *args = data;
2173         u32 flags = args->in.flags;
2174         int rc;
2175
2176         if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
2177                 rc = hl_interrupt_wait_ioctl(hpriv, data);
2178         else
2179                 rc = hl_cs_wait_ioctl(hpriv, data);
2180
2181         return rc;
2182 }