habanalabs: create common folder
authorOded Gabbay <oded.gabbay@gmail.com>
Mon, 13 Jul 2020 09:21:04 +0000 (12:21 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Fri, 24 Jul 2020 17:31:37 +0000 (20:31 +0300)
For internal needs of our CI we need to move all the common code into a
common folder instead of putting them in the root folder of the driver.

Same applies to the common header files under include/

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
45 files changed:
drivers/misc/habanalabs/Makefile
drivers/misc/habanalabs/asid.c [deleted file]
drivers/misc/habanalabs/command_buffer.c [deleted file]
drivers/misc/habanalabs/command_submission.c [deleted file]
drivers/misc/habanalabs/common/Makefile [new file with mode: 0644]
drivers/misc/habanalabs/common/asid.c [new file with mode: 0644]
drivers/misc/habanalabs/common/command_buffer.c [new file with mode: 0644]
drivers/misc/habanalabs/common/command_submission.c [new file with mode: 0644]
drivers/misc/habanalabs/common/context.c [new file with mode: 0644]
drivers/misc/habanalabs/common/debugfs.c [new file with mode: 0644]
drivers/misc/habanalabs/common/device.c [new file with mode: 0644]
drivers/misc/habanalabs/common/firmware_if.c [new file with mode: 0644]
drivers/misc/habanalabs/common/habanalabs.h [new file with mode: 0644]
drivers/misc/habanalabs/common/habanalabs_drv.c [new file with mode: 0644]
drivers/misc/habanalabs/common/habanalabs_ioctl.c [new file with mode: 0644]
drivers/misc/habanalabs/common/hw_queue.c [new file with mode: 0644]
drivers/misc/habanalabs/common/hwmon.c [new file with mode: 0644]
drivers/misc/habanalabs/common/irq.c [new file with mode: 0644]
drivers/misc/habanalabs/common/memory.c [new file with mode: 0644]
drivers/misc/habanalabs/common/mmu.c [new file with mode: 0644]
drivers/misc/habanalabs/common/pci.c [new file with mode: 0644]
drivers/misc/habanalabs/common/sysfs.c [new file with mode: 0644]
drivers/misc/habanalabs/context.c [deleted file]
drivers/misc/habanalabs/debugfs.c [deleted file]
drivers/misc/habanalabs/device.c [deleted file]
drivers/misc/habanalabs/firmware_if.c [deleted file]
drivers/misc/habanalabs/gaudi/Makefile
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/goya/goyaP.h
drivers/misc/habanalabs/habanalabs.h [deleted file]
drivers/misc/habanalabs/habanalabs_drv.c [deleted file]
drivers/misc/habanalabs/habanalabs_ioctl.c [deleted file]
drivers/misc/habanalabs/hw_queue.c [deleted file]
drivers/misc/habanalabs/hwmon.c [deleted file]
drivers/misc/habanalabs/include/armcp_if.h [deleted file]
drivers/misc/habanalabs/include/common/armcp_if.h [new file with mode: 0644]
drivers/misc/habanalabs/include/common/hl_boot_if.h [new file with mode: 0644]
drivers/misc/habanalabs/include/common/qman_if.h [new file with mode: 0644]
drivers/misc/habanalabs/include/hl_boot_if.h [deleted file]
drivers/misc/habanalabs/include/qman_if.h [deleted file]
drivers/misc/habanalabs/irq.c [deleted file]
drivers/misc/habanalabs/memory.c [deleted file]
drivers/misc/habanalabs/mmu.c [deleted file]
drivers/misc/habanalabs/pci.c [deleted file]
drivers/misc/habanalabs/sysfs.c [deleted file]

index 421ebd9..a786c0a 100644 (file)
@@ -3,16 +3,15 @@
 # Makefile for HabanaLabs AI accelerators driver
 #
 
-obj- := habanalabs.o
+obj-$(CONFIG_HABANA_AI) := habanalabs.o
 
-habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
-               command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
-               command_submission.o mmu.o firmware_if.o pci.o
-
-habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
+include $(src)/common/Makefile
+habanalabs-y += $(HL_COMMON_FILES)
 
 include $(src)/goya/Makefile
 habanalabs-y += $(HL_GOYA_FILES)
 
 include $(src)/gaudi/Makefile
 habanalabs-y += $(HL_GAUDI_FILES)
+
+habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c
deleted file mode 100644 (file)
index a2fdf31..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-int hl_asid_init(struct hl_device *hdev)
-{
-       hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid),
-                                       sizeof(*hdev->asid_bitmap), GFP_KERNEL);
-       if (!hdev->asid_bitmap)
-               return -ENOMEM;
-
-       mutex_init(&hdev->asid_mutex);
-
-       /* ASID 0 is reserved for the kernel driver and device CPU */
-       set_bit(0, hdev->asid_bitmap);
-
-       return 0;
-}
-
-void hl_asid_fini(struct hl_device *hdev)
-{
-       mutex_destroy(&hdev->asid_mutex);
-       kfree(hdev->asid_bitmap);
-}
-
-unsigned long hl_asid_alloc(struct hl_device *hdev)
-{
-       unsigned long found;
-
-       mutex_lock(&hdev->asid_mutex);
-
-       found = find_first_zero_bit(hdev->asid_bitmap,
-                                       hdev->asic_prop.max_asid);
-       if (found == hdev->asic_prop.max_asid)
-               found = 0;
-       else
-               set_bit(found, hdev->asid_bitmap);
-
-       mutex_unlock(&hdev->asid_mutex);
-
-       return found;
-}
-
-void hl_asid_free(struct hl_device *hdev, unsigned long asid)
-{
-       if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
-                                               "Invalid ASID %lu", asid))
-               return;
-       clear_bit(asid, hdev->asid_bitmap);
-}
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c
deleted file mode 100644 (file)
index 02d13f7..0000000
+++ /dev/null
@@ -1,463 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-
-static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
-{
-       hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
-                       (void *) (uintptr_t) cb->kernel_address,
-                       cb->bus_address);
-       kfree(cb);
-}
-
-static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
-{
-       if (cb->is_pool) {
-               spin_lock(&hdev->cb_pool_lock);
-               list_add(&cb->pool_list, &hdev->cb_pool);
-               spin_unlock(&hdev->cb_pool_lock);
-       } else {
-               cb_fini(hdev, cb);
-       }
-}
-
-static void cb_release(struct kref *ref)
-{
-       struct hl_device *hdev;
-       struct hl_cb *cb;
-
-       cb = container_of(ref, struct hl_cb, refcount);
-       hdev = cb->hdev;
-
-       hl_debugfs_remove_cb(cb);
-
-       cb_do_release(hdev, cb);
-}
-
-static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
-                                       int ctx_id)
-{
-       struct hl_cb *cb;
-       void *p;
-
-       /*
-        * We use of GFP_ATOMIC here because this function can be called from
-        * the latency-sensitive code path for command submission. Due to H/W
-        * limitations in some of the ASICs, the kernel must copy the user CB
-        * that is designated for an external queue and actually enqueue
-        * the kernel's copy. Hence, we must never sleep in this code section
-        * and must use GFP_ATOMIC for all memory allocations.
-        */
-       if (ctx_id == HL_KERNEL_ASID_ID)
-               cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
-       else
-               cb = kzalloc(sizeof(*cb), GFP_KERNEL);
-
-       if (!cb)
-               return NULL;
-
-       if (ctx_id == HL_KERNEL_ASID_ID)
-               p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
-                                               &cb->bus_address, GFP_ATOMIC);
-       else
-               p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
-                                               &cb->bus_address,
-                                               GFP_USER | __GFP_ZERO);
-       if (!p) {
-               dev_err(hdev->dev,
-                       "failed to allocate %d of dma memory for CB\n",
-                       cb_size);
-               kfree(cb);
-               return NULL;
-       }
-
-       cb->kernel_address = (u64) (uintptr_t) p;
-       cb->size = cb_size;
-
-       return cb;
-}
-
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
-                       u32 cb_size, u64 *handle, int ctx_id)
-{
-       struct hl_cb *cb;
-       bool alloc_new_cb = true;
-       int rc;
-
-       /*
-        * Can't use generic function to check this because of special case
-        * where we create a CB as part of the reset process
-        */
-       if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) &&
-                                       (ctx_id != HL_KERNEL_ASID_ID))) {
-               dev_warn_ratelimited(hdev->dev,
-                       "Device is disabled or in reset. Can't create new CBs\n");
-               rc = -EBUSY;
-               goto out_err;
-       }
-
-       if (cb_size > SZ_2M) {
-               dev_err(hdev->dev, "CB size %d must be less than %d\n",
-                       cb_size, SZ_2M);
-               rc = -EINVAL;
-               goto out_err;
-       }
-
-       /* Minimum allocation must be PAGE SIZE */
-       if (cb_size < PAGE_SIZE)
-               cb_size = PAGE_SIZE;
-
-       if (ctx_id == HL_KERNEL_ASID_ID &&
-                       cb_size <= hdev->asic_prop.cb_pool_cb_size) {
-
-               spin_lock(&hdev->cb_pool_lock);
-               if (!list_empty(&hdev->cb_pool)) {
-                       cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
-                                       pool_list);
-                       list_del(&cb->pool_list);
-                       spin_unlock(&hdev->cb_pool_lock);
-                       alloc_new_cb = false;
-               } else {
-                       spin_unlock(&hdev->cb_pool_lock);
-                       dev_dbg(hdev->dev, "CB pool is empty\n");
-               }
-       }
-
-       if (alloc_new_cb) {
-               cb = hl_cb_alloc(hdev, cb_size, ctx_id);
-               if (!cb) {
-                       rc = -ENOMEM;
-                       goto out_err;
-               }
-       }
-
-       cb->hdev = hdev;
-       cb->ctx_id = ctx_id;
-
-       spin_lock(&mgr->cb_lock);
-       rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
-       spin_unlock(&mgr->cb_lock);
-
-       if (rc < 0) {
-               dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
-               goto release_cb;
-       }
-
-       cb->id = rc;
-
-       kref_init(&cb->refcount);
-       spin_lock_init(&cb->lock);
-
-       /*
-        * idr is 32-bit so we can safely OR it with a mask that is above
-        * 32 bit
-        */
-       *handle = cb->id | HL_MMAP_CB_MASK;
-       *handle <<= PAGE_SHIFT;
-
-       hl_debugfs_add_cb(cb);
-
-       return 0;
-
-release_cb:
-       cb_do_release(hdev, cb);
-out_err:
-       *handle = 0;
-
-       return rc;
-}
-
-int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
-{
-       struct hl_cb *cb;
-       u32 handle;
-       int rc = 0;
-
-       /*
-        * handle was given to user to do mmap, I need to shift it back to
-        * how the idr module gave it to me
-        */
-       cb_handle >>= PAGE_SHIFT;
-       handle = (u32) cb_handle;
-
-       spin_lock(&mgr->cb_lock);
-
-       cb = idr_find(&mgr->cb_handles, handle);
-       if (cb) {
-               idr_remove(&mgr->cb_handles, handle);
-               spin_unlock(&mgr->cb_lock);
-               kref_put(&cb->refcount, cb_release);
-       } else {
-               spin_unlock(&mgr->cb_lock);
-               dev_err(hdev->dev,
-                       "CB destroy failed, no match to handle 0x%x\n", handle);
-               rc = -EINVAL;
-       }
-
-       return rc;
-}
-
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
-{
-       union hl_cb_args *args = data;
-       struct hl_device *hdev = hpriv->hdev;
-       u64 handle = 0;
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_warn_ratelimited(hdev->dev,
-                       "Device is %s. Can't execute CB IOCTL\n",
-                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-               return -EBUSY;
-       }
-
-       switch (args->in.op) {
-       case HL_CB_OP_CREATE:
-               if (args->in.cb_size > HL_MAX_CB_SIZE) {
-                       dev_err(hdev->dev,
-                               "User requested CB size %d must be less than %d\n",
-                               args->in.cb_size, HL_MAX_CB_SIZE);
-                       rc = -EINVAL;
-               } else {
-                       rc = hl_cb_create(hdev, &hpriv->cb_mgr,
-                                               args->in.cb_size, &handle,
-                                               hpriv->ctx->asid);
-               }
-
-               memset(args, 0, sizeof(*args));
-               args->out.cb_handle = handle;
-               break;
-
-       case HL_CB_OP_DESTROY:
-               rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
-                                       args->in.cb_handle);
-               break;
-
-       default:
-               rc = -ENOTTY;
-               break;
-       }
-
-       return rc;
-}
-
-static void cb_vm_close(struct vm_area_struct *vma)
-{
-       struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
-       long new_mmap_size;
-
-       new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start);
-
-       if (new_mmap_size > 0) {
-               cb->mmap_size = new_mmap_size;
-               return;
-       }
-
-       spin_lock(&cb->lock);
-       cb->mmap = false;
-       spin_unlock(&cb->lock);
-
-       hl_cb_put(cb);
-       vma->vm_private_data = NULL;
-}
-
-static const struct vm_operations_struct cb_vm_ops = {
-       .close = cb_vm_close
-};
-
-int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_cb *cb;
-       phys_addr_t address;
-       u32 handle;
-       int rc;
-
-       handle = vma->vm_pgoff;
-
-       /* reference was taken here */
-       cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
-       if (!cb) {
-               dev_err(hdev->dev,
-                       "CB mmap failed, no match to handle 0x%x\n", handle);
-               return -EINVAL;
-       }
-
-       /* Validation check */
-       if ((vma->vm_end - vma->vm_start) != ALIGN(cb->size, PAGE_SIZE)) {
-               dev_err(hdev->dev,
-                       "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
-                       vma->vm_end - vma->vm_start, cb->size);
-               rc = -EINVAL;
-               goto put_cb;
-       }
-
-       spin_lock(&cb->lock);
-
-       if (cb->mmap) {
-               dev_err(hdev->dev,
-                       "CB mmap failed, CB already mmaped to user\n");
-               rc = -EINVAL;
-               goto release_lock;
-       }
-
-       cb->mmap = true;
-
-       spin_unlock(&cb->lock);
-
-       vma->vm_ops = &cb_vm_ops;
-
-       /*
-        * Note: We're transferring the cb reference to
-        * vma->vm_private_data here.
-        */
-
-       vma->vm_private_data = cb;
-
-       /* Calculate address for CB */
-       address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
-
-       rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
-                                       address, cb->size);
-
-       if (rc) {
-               spin_lock(&cb->lock);
-               cb->mmap = false;
-               goto release_lock;
-       }
-
-       cb->mmap_size = cb->size;
-
-       return 0;
-
-release_lock:
-       spin_unlock(&cb->lock);
-put_cb:
-       hl_cb_put(cb);
-       return rc;
-}
-
-struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
-                       u32 handle)
-{
-       struct hl_cb *cb;
-
-       spin_lock(&mgr->cb_lock);
-       cb = idr_find(&mgr->cb_handles, handle);
-
-       if (!cb) {
-               spin_unlock(&mgr->cb_lock);
-               dev_warn(hdev->dev,
-                       "CB get failed, no match to handle 0x%x\n", handle);
-               return NULL;
-       }
-
-       kref_get(&cb->refcount);
-
-       spin_unlock(&mgr->cb_lock);
-
-       return cb;
-
-}
-
-void hl_cb_put(struct hl_cb *cb)
-{
-       kref_put(&cb->refcount, cb_release);
-}
-
-void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
-{
-       spin_lock_init(&mgr->cb_lock);
-       idr_init(&mgr->cb_handles);
-}
-
-void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
-{
-       struct hl_cb *cb;
-       struct idr *idp;
-       u32 id;
-
-       idp = &mgr->cb_handles;
-
-       idr_for_each_entry(idp, cb, id) {
-               if (kref_put(&cb->refcount, cb_release) != 1)
-                       dev_err(hdev->dev,
-                               "CB %d for CTX ID %d is still alive\n",
-                               id, cb->ctx_id);
-       }
-
-       idr_destroy(&mgr->cb_handles);
-}
-
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
-{
-       u64 cb_handle;
-       struct hl_cb *cb;
-       int rc;
-
-       rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
-                       HL_KERNEL_ASID_ID);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to allocate CB for the kernel driver %d\n", rc);
-               return NULL;
-       }
-
-       cb_handle >>= PAGE_SHIFT;
-       cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
-       if (!cb)
-               goto destroy_cb;
-
-       return cb;
-
-destroy_cb:
-       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
-
-       return NULL;
-}
-
-int hl_cb_pool_init(struct hl_device *hdev)
-{
-       struct hl_cb *cb;
-       int i;
-
-       INIT_LIST_HEAD(&hdev->cb_pool);
-       spin_lock_init(&hdev->cb_pool_lock);
-
-       for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
-               cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
-                               HL_KERNEL_ASID_ID);
-               if (cb) {
-                       cb->is_pool = true;
-                       list_add(&cb->pool_list, &hdev->cb_pool);
-               } else {
-                       hl_cb_pool_fini(hdev);
-                       return -ENOMEM;
-               }
-       }
-
-       return 0;
-}
-
-int hl_cb_pool_fini(struct hl_device *hdev)
-{
-       struct hl_cb *cb, *tmp;
-
-       list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
-               list_del(&cb->pool_list);
-               cb_fini(hdev, cb);
-       }
-
-       return 0;
-}
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
deleted file mode 100644 (file)
index c605be8..0000000
+++ /dev/null
@@ -1,1232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-
-#define HL_CS_FLAGS_SIG_WAIT   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
-
-static void job_wq_completion(struct work_struct *work);
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
-               struct hl_ctx *ctx, u64 timeout_us, u64 seq);
-static void cs_do_release(struct kref *ref);
-
-static void hl_sob_reset(struct kref *ref)
-{
-       struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
-                                                       kref);
-       struct hl_device *hdev = hw_sob->hdev;
-
-       hdev->asic_funcs->reset_sob(hdev, hw_sob);
-}
-
-void hl_sob_reset_error(struct kref *ref)
-{
-       struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
-                                                       kref);
-       struct hl_device *hdev = hw_sob->hdev;
-
-       dev_crit(hdev->dev,
-                       "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
-                       hw_sob->q_idx, hw_sob->sob_id);
-}
-
-static const char *hl_fence_get_driver_name(struct dma_fence *fence)
-{
-       return "HabanaLabs";
-}
-
-static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
-{
-       struct hl_cs_compl *hl_cs_compl =
-               container_of(fence, struct hl_cs_compl, base_fence);
-
-       return dev_name(hl_cs_compl->hdev->dev);
-}
-
-static bool hl_fence_enable_signaling(struct dma_fence *fence)
-{
-       return true;
-}
-
-static void hl_fence_release(struct dma_fence *fence)
-{
-       struct hl_cs_compl *hl_cs_cmpl =
-               container_of(fence, struct hl_cs_compl, base_fence);
-       struct hl_device *hdev = hl_cs_cmpl->hdev;
-
-       /* EBUSY means the CS was never submitted and hence we don't have
-        * an attached hw_sob object that we should handle here
-        */
-       if (fence->error == -EBUSY)
-               goto free;
-
-       if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
-                       (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
-
-               dev_dbg(hdev->dev,
-                       "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
-                       hl_cs_cmpl->cs_seq,
-                       hl_cs_cmpl->type,
-                       hl_cs_cmpl->hw_sob->sob_id,
-                       hl_cs_cmpl->sob_val);
-
-               /*
-                * A signal CS can get completion while the corresponding wait
-                * for signal CS is on its way to the PQ. The wait for signal CS
-                * will get stuck if the signal CS incremented the SOB to its
-                * max value and there are no pending (submitted) waits on this
-                * SOB.
-                * We do the following to void this situation:
-                * 1. The wait for signal CS must get a ref for the signal CS as
-                *    soon as possible in cs_ioctl_signal_wait() and put it
-                *    before being submitted to the PQ but after it incremented
-                *    the SOB refcnt in init_signal_wait_cs().
-                * 2. Signal/Wait for signal CS will decrement the SOB refcnt
-                *    here.
-                * These two measures guarantee that the wait for signal CS will
-                * reset the SOB upon completion rather than the signal CS and
-                * hence the above scenario is avoided.
-                */
-               kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
-       }
-
-free:
-       kfree_rcu(hl_cs_cmpl, base_fence.rcu);
-}
-
-static const struct dma_fence_ops hl_fence_ops = {
-       .get_driver_name = hl_fence_get_driver_name,
-       .get_timeline_name = hl_fence_get_timeline_name,
-       .enable_signaling = hl_fence_enable_signaling,
-       .release = hl_fence_release
-};
-
-static void cs_get(struct hl_cs *cs)
-{
-       kref_get(&cs->refcount);
-}
-
-static int cs_get_unless_zero(struct hl_cs *cs)
-{
-       return kref_get_unless_zero(&cs->refcount);
-}
-
-static void cs_put(struct hl_cs *cs)
-{
-       kref_put(&cs->refcount, cs_do_release);
-}
-
-static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
-{
-       /*
-        * Patched CB is created for external queues jobs, and for H/W queues
-        * jobs if the user CB was allocated by driver and MMU is disabled.
-        */
-       return (job->queue_type == QUEUE_TYPE_EXT ||
-                       (job->queue_type == QUEUE_TYPE_HW &&
-                                       job->is_kernel_allocated_cb &&
-                                       !hdev->mmu_enable));
-}
-
-/*
- * cs_parser - parse the user command submission
- *
- * @hpriv      : pointer to the private data of the fd
- * @job        : pointer to the job that holds the command submission info
- *
- * The function parses the command submission of the user. It calls the
- * ASIC specific parser, which returns a list of memory blocks to send
- * to the device as different command buffers
- *
- */
-static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_cs_parser parser;
-       int rc;
-
-       parser.ctx_id = job->cs->ctx->asid;
-       parser.cs_sequence = job->cs->sequence;
-       parser.job_id = job->id;
-
-       parser.hw_queue_id = job->hw_queue_id;
-       parser.job_userptr_list = &job->userptr_list;
-       parser.patched_cb = NULL;
-       parser.user_cb = job->user_cb;
-       parser.user_cb_size = job->user_cb_size;
-       parser.queue_type = job->queue_type;
-       parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
-       job->patched_cb = NULL;
-
-       rc = hdev->asic_funcs->cs_parser(hdev, &parser);
-
-       if (is_cb_patched(hdev, job)) {
-               if (!rc) {
-                       job->patched_cb = parser.patched_cb;
-                       job->job_cb_size = parser.patched_cb_size;
-                       job->contains_dma_pkt = parser.contains_dma_pkt;
-
-                       spin_lock(&job->patched_cb->lock);
-                       job->patched_cb->cs_cnt++;
-                       spin_unlock(&job->patched_cb->lock);
-               }
-
-               /*
-                * Whether the parsing worked or not, we don't need the
-                * original CB anymore because it was already parsed and
-                * won't be accessed again for this CS
-                */
-               spin_lock(&job->user_cb->lock);
-               job->user_cb->cs_cnt--;
-               spin_unlock(&job->user_cb->lock);
-               hl_cb_put(job->user_cb);
-               job->user_cb = NULL;
-       } else if (!rc) {
-               job->job_cb_size = job->user_cb_size;
-       }
-
-       return rc;
-}
-
-static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
-{
-       struct hl_cs *cs = job->cs;
-
-       if (is_cb_patched(hdev, job)) {
-               hl_userptr_delete_list(hdev, &job->userptr_list);
-
-               /*
-                * We might arrive here from rollback and patched CB wasn't
-                * created, so we need to check it's not NULL
-                */
-               if (job->patched_cb) {
-                       spin_lock(&job->patched_cb->lock);
-                       job->patched_cb->cs_cnt--;
-                       spin_unlock(&job->patched_cb->lock);
-
-                       hl_cb_put(job->patched_cb);
-               }
-       }
-
-       /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
-        * enabled, the user CB isn't released in cs_parser() and thus should be
-        * released here.
-        */
-       if (job->queue_type == QUEUE_TYPE_HW &&
-                       job->is_kernel_allocated_cb && hdev->mmu_enable) {
-               spin_lock(&job->user_cb->lock);
-               job->user_cb->cs_cnt--;
-               spin_unlock(&job->user_cb->lock);
-
-               hl_cb_put(job->user_cb);
-       }
-
-       /*
-        * This is the only place where there can be multiple threads
-        * modifying the list at the same time
-        */
-       spin_lock(&cs->job_lock);
-       list_del(&job->cs_node);
-       spin_unlock(&cs->job_lock);
-
-       hl_debugfs_remove_job(hdev, job);
-
-       if (job->queue_type == QUEUE_TYPE_EXT ||
-                       job->queue_type == QUEUE_TYPE_HW)
-               cs_put(cs);
-
-       kfree(job);
-}
-
-static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
-{
-       hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
-                       ctx->cs_counters.device_in_reset_drop_cnt;
-       hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
-                       ctx->cs_counters.out_of_mem_drop_cnt;
-       hdev->aggregated_cs_counters.parsing_drop_cnt +=
-                       ctx->cs_counters.parsing_drop_cnt;
-       hdev->aggregated_cs_counters.queue_full_drop_cnt +=
-                       ctx->cs_counters.queue_full_drop_cnt;
-}
-
-static void cs_do_release(struct kref *ref)
-{
-       struct hl_cs *cs = container_of(ref, struct hl_cs,
-                                               refcount);
-       struct hl_device *hdev = cs->ctx->hdev;
-       struct hl_cs_job *job, *tmp;
-
-       cs->completed = true;
-
-       /*
-        * Although if we reached here it means that all external jobs have
-        * finished, because each one of them took refcnt to CS, we still
-        * need to go over the internal jobs and free them. Otherwise, we
-        * will have leaked memory and what's worse, the CS object (and
-        * potentially the CTX object) could be released, while the JOB
-        * still holds a pointer to them (but no reference).
-        */
-       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
-               free_job(hdev, job);
-
-       /* We also need to update CI for internal queues */
-       if (cs->submitted) {
-               hdev->asic_funcs->hw_queues_lock(hdev);
-
-               hdev->cs_active_cnt--;
-               if (!hdev->cs_active_cnt) {
-                       struct hl_device_idle_busy_ts *ts;
-
-                       ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
-                       ts->busy_to_idle_ts = ktime_get();
-
-                       if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
-                               hdev->idle_busy_ts_idx = 0;
-               } else if (hdev->cs_active_cnt < 0) {
-                       dev_crit(hdev->dev, "CS active cnt %d is negative\n",
-                               hdev->cs_active_cnt);
-               }
-
-               hdev->asic_funcs->hw_queues_unlock(hdev);
-
-               hl_int_hw_queue_update_ci(cs);
-
-               spin_lock(&hdev->hw_queues_mirror_lock);
-               /* remove CS from hw_queues mirror list */
-               list_del_init(&cs->mirror_node);
-               spin_unlock(&hdev->hw_queues_mirror_lock);
-
-               /*
-                * Don't cancel TDR in case this CS was timedout because we
-                * might be running from the TDR context
-                */
-               if ((!cs->timedout) &&
-                       (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
-                       struct hl_cs *next;
-
-                       if (cs->tdr_active)
-                               cancel_delayed_work_sync(&cs->work_tdr);
-
-                       spin_lock(&hdev->hw_queues_mirror_lock);
-
-                       /* queue TDR for next CS */
-                       next = list_first_entry_or_null(
-                                       &hdev->hw_queues_mirror_list,
-                                       struct hl_cs, mirror_node);
-
-                       if ((next) && (!next->tdr_active)) {
-                               next->tdr_active = true;
-                               schedule_delayed_work(&next->work_tdr,
-                                                       hdev->timeout_jiffies);
-                       }
-
-                       spin_unlock(&hdev->hw_queues_mirror_lock);
-               }
-       } else if (cs->type == CS_TYPE_WAIT) {
-               /*
-                * In case the wait for signal CS was submitted, the put occurs
-                * in init_signal_wait_cs() right before hanging on the PQ.
-                */
-               dma_fence_put(cs->signal_fence);
-       }
-
-       /*
-        * Must be called before hl_ctx_put because inside we use ctx to get
-        * the device
-        */
-       hl_debugfs_remove_cs(cs);
-
-       hl_ctx_put(cs->ctx);
-
-       /* We need to mark an error for not submitted because in that case
-        * the dma fence release flow is different. Mainly, we don't need
-        * to handle hw_sob for signal/wait
-        */
-       if (cs->timedout)
-               dma_fence_set_error(cs->fence, -ETIMEDOUT);
-       else if (cs->aborted)
-               dma_fence_set_error(cs->fence, -EIO);
-       else if (!cs->submitted)
-               dma_fence_set_error(cs->fence, -EBUSY);
-
-       dma_fence_signal(cs->fence);
-       dma_fence_put(cs->fence);
-
-       cs_counters_aggregate(hdev, cs->ctx);
-
-       kfree(cs->jobs_in_queue_cnt);
-       kfree(cs);
-}
-
-static void cs_timedout(struct work_struct *work)
-{
-       struct hl_device *hdev;
-       int ctx_asid, rc;
-       struct hl_cs *cs = container_of(work, struct hl_cs,
-                                                work_tdr.work);
-       rc = cs_get_unless_zero(cs);
-       if (!rc)
-               return;
-
-       if ((!cs->submitted) || (cs->completed)) {
-               cs_put(cs);
-               return;
-       }
-
-       /* Mark the CS is timed out so we won't try to cancel its TDR */
-       cs->timedout = true;
-
-       hdev = cs->ctx->hdev;
-       ctx_asid = cs->ctx->asid;
-
-       dev_err(hdev->dev,
-               "Command submission %llu has not finished in time!\n",
-               cs->sequence);
-
-       cs_put(cs);
-
-       if (hdev->reset_on_lockup)
-               hl_device_reset(hdev, false, false);
-}
-
-static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
-                       enum hl_cs_type cs_type, struct hl_cs **cs_new)
-{
-       struct hl_cs_compl *cs_cmpl;
-       struct dma_fence *other = NULL;
-       struct hl_cs *cs;
-       int rc;
-
-       cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
-       if (!cs)
-               return -ENOMEM;
-
-       cs->ctx = ctx;
-       cs->submitted = false;
-       cs->completed = false;
-       cs->type = cs_type;
-       INIT_LIST_HEAD(&cs->job_list);
-       INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
-       kref_init(&cs->refcount);
-       spin_lock_init(&cs->job_lock);
-
-       cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
-       if (!cs_cmpl) {
-               rc = -ENOMEM;
-               goto free_cs;
-       }
-
-       cs_cmpl->hdev = hdev;
-       cs_cmpl->type = cs->type;
-       spin_lock_init(&cs_cmpl->lock);
-       cs->fence = &cs_cmpl->base_fence;
-
-       spin_lock(&ctx->cs_lock);
-
-       cs_cmpl->cs_seq = ctx->cs_sequence;
-       other = ctx->cs_pending[cs_cmpl->cs_seq &
-                               (hdev->asic_prop.max_pending_cs - 1)];
-       if ((other) && (!dma_fence_is_signaled(other))) {
-               dev_dbg(hdev->dev,
-                       "Rejecting CS because of too many in-flights CS\n");
-               rc = -EAGAIN;
-               goto free_fence;
-       }
-
-       cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
-                       sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
-       if (!cs->jobs_in_queue_cnt) {
-               rc = -ENOMEM;
-               goto free_fence;
-       }
-
-       dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
-                       ctx->asid, ctx->cs_sequence);
-
-       cs->sequence = cs_cmpl->cs_seq;
-
-       ctx->cs_pending[cs_cmpl->cs_seq &
-                       (hdev->asic_prop.max_pending_cs - 1)] =
-                                                       &cs_cmpl->base_fence;
-       ctx->cs_sequence++;
-
-       dma_fence_get(&cs_cmpl->base_fence);
-
-       dma_fence_put(other);
-
-       spin_unlock(&ctx->cs_lock);
-
-       *cs_new = cs;
-
-       return 0;
-
-free_fence:
-       spin_unlock(&ctx->cs_lock);
-       kfree(cs_cmpl);
-free_cs:
-       kfree(cs);
-       return rc;
-}
-
-static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
-{
-       struct hl_cs_job *job, *tmp;
-
-       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
-               free_job(hdev, job);
-}
-
-void hl_cs_rollback_all(struct hl_device *hdev)
-{
-       int i;
-       struct hl_cs *cs, *tmp;
-
-       /* flush all completions */
-       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
-               flush_workqueue(hdev->cq_wq[i]);
-
-       /* Make sure we don't have leftovers in the H/W queues mirror list */
-       list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
-                               mirror_node) {
-               cs_get(cs);
-               cs->aborted = true;
-               dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
-                                       cs->ctx->asid, cs->sequence);
-               cs_rollback(hdev, cs);
-               cs_put(cs);
-       }
-}
-
-static void job_wq_completion(struct work_struct *work)
-{
-       struct hl_cs_job *job = container_of(work, struct hl_cs_job,
-                                               finish_work);
-       struct hl_cs *cs = job->cs;
-       struct hl_device *hdev = cs->ctx->hdev;
-
-       /* job is no longer needed */
-       free_job(hdev, job);
-}
-
-static int validate_queue_index(struct hl_device *hdev,
-                               struct hl_cs_chunk *chunk,
-                               enum hl_queue_type *queue_type,
-                               bool *is_kernel_allocated_cb)
-{
-       struct asic_fixed_properties *asic = &hdev->asic_prop;
-       struct hw_queue_properties *hw_queue_prop;
-
-       /* This must be checked here to prevent out-of-bounds access to
-        * hw_queues_props array
-        */
-       if (chunk->queue_index >= asic->max_queues) {
-               dev_err(hdev->dev, "Queue index %d is invalid\n",
-                       chunk->queue_index);
-               return -EINVAL;
-       }
-
-       hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
-
-       if (hw_queue_prop->type == QUEUE_TYPE_NA) {
-               dev_err(hdev->dev, "Queue index %d is invalid\n",
-                       chunk->queue_index);
-               return -EINVAL;
-       }
-
-       if (hw_queue_prop->driver_only) {
-               dev_err(hdev->dev,
-                       "Queue index %d is restricted for the kernel driver\n",
-                       chunk->queue_index);
-               return -EINVAL;
-       }
-
-       *queue_type = hw_queue_prop->type;
-       *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
-
-       return 0;
-}
-
-static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
-                                       struct hl_cb_mgr *cb_mgr,
-                                       struct hl_cs_chunk *chunk)
-{
-       struct hl_cb *cb;
-       u32 cb_handle;
-
-       cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
-
-       cb = hl_cb_get(hdev, cb_mgr, cb_handle);
-       if (!cb) {
-               dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
-               return NULL;
-       }
-
-       if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
-               dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
-               goto release_cb;
-       }
-
-       spin_lock(&cb->lock);
-       cb->cs_cnt++;
-       spin_unlock(&cb->lock);
-
-       return cb;
-
-release_cb:
-       hl_cb_put(cb);
-       return NULL;
-}
-
-struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
-               enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
-{
-       struct hl_cs_job *job;
-
-       job = kzalloc(sizeof(*job), GFP_ATOMIC);
-       if (!job)
-               return NULL;
-
-       job->queue_type = queue_type;
-       job->is_kernel_allocated_cb = is_kernel_allocated_cb;
-
-       if (is_cb_patched(hdev, job))
-               INIT_LIST_HEAD(&job->userptr_list);
-
-       if (job->queue_type == QUEUE_TYPE_EXT)
-               INIT_WORK(&job->finish_work, job_wq_completion);
-
-       return job;
-}
-
-static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-                               u32 num_chunks, u64 *cs_seq)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_cs_chunk *cs_chunk_array;
-       struct hl_cs_job *job;
-       struct hl_cs *cs;
-       struct hl_cb *cb;
-       bool int_queues_only = true;
-       u32 size_to_copy;
-       int rc, i;
-
-       *cs_seq = ULLONG_MAX;
-
-       if (num_chunks > HL_MAX_JOBS_PER_CS) {
-               dev_err(hdev->dev,
-                       "Number of chunks can NOT be larger than %d\n",
-                       HL_MAX_JOBS_PER_CS);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
-                                       GFP_ATOMIC);
-       if (!cs_chunk_array) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
-       if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
-               dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
-               rc = -EFAULT;
-               goto free_cs_chunk_array;
-       }
-
-       /* increment refcnt for context */
-       hl_ctx_get(hdev, hpriv->ctx);
-
-       rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
-       if (rc) {
-               hl_ctx_put(hpriv->ctx);
-               goto free_cs_chunk_array;
-       }
-
-       *cs_seq = cs->sequence;
-
-       hl_debugfs_add_cs(cs);
-
-       /* Validate ALL the CS chunks before submitting the CS */
-       for (i = 0 ; i < num_chunks ; i++) {
-               struct hl_cs_chunk *chunk = &cs_chunk_array[i];
-               enum hl_queue_type queue_type;
-               bool is_kernel_allocated_cb;
-
-               rc = validate_queue_index(hdev, chunk, &queue_type,
-                                               &is_kernel_allocated_cb);
-               if (rc) {
-                       hpriv->ctx->cs_counters.parsing_drop_cnt++;
-                       goto free_cs_object;
-               }
-
-               if (is_kernel_allocated_cb) {
-                       cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
-                       if (!cb) {
-                               hpriv->ctx->cs_counters.parsing_drop_cnt++;
-                               rc = -EINVAL;
-                               goto free_cs_object;
-                       }
-               } else {
-                       cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
-               }
-
-               if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
-                       int_queues_only = false;
-
-               job = hl_cs_allocate_job(hdev, queue_type,
-                                               is_kernel_allocated_cb);
-               if (!job) {
-                       hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
-                       dev_err(hdev->dev, "Failed to allocate a new job\n");
-                       rc = -ENOMEM;
-                       if (is_kernel_allocated_cb)
-                               goto release_cb;
-                       else
-                               goto free_cs_object;
-               }
-
-               job->id = i + 1;
-               job->cs = cs;
-               job->user_cb = cb;
-               job->user_cb_size = chunk->cb_size;
-               job->hw_queue_id = chunk->queue_index;
-
-               cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
-               list_add_tail(&job->cs_node, &cs->job_list);
-
-               /*
-                * Increment CS reference. When CS reference is 0, CS is
-                * done and can be signaled to user and free all its resources
-                * Only increment for JOB on external or H/W queues, because
-                * only for those JOBs we get completion
-                */
-               if (job->queue_type == QUEUE_TYPE_EXT ||
-                               job->queue_type == QUEUE_TYPE_HW)
-                       cs_get(cs);
-
-               hl_debugfs_add_job(hdev, job);
-
-               rc = cs_parser(hpriv, job);
-               if (rc) {
-                       hpriv->ctx->cs_counters.parsing_drop_cnt++;
-                       dev_err(hdev->dev,
-                               "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
-                               cs->ctx->asid, cs->sequence, job->id, rc);
-                       goto free_cs_object;
-               }
-       }
-
-       if (int_queues_only) {
-               hpriv->ctx->cs_counters.parsing_drop_cnt++;
-               dev_err(hdev->dev,
-                       "Reject CS %d.%llu because only internal queues jobs are present\n",
-                       cs->ctx->asid, cs->sequence);
-               rc = -EINVAL;
-               goto free_cs_object;
-       }
-
-       rc = hl_hw_queue_schedule_cs(cs);
-       if (rc) {
-               if (rc != -EAGAIN)
-                       dev_err(hdev->dev,
-                               "Failed to submit CS %d.%llu to H/W queues, error %d\n",
-                               cs->ctx->asid, cs->sequence, rc);
-               goto free_cs_object;
-       }
-
-       rc = HL_CS_STATUS_SUCCESS;
-       goto put_cs;
-
-release_cb:
-       spin_lock(&cb->lock);
-       cb->cs_cnt--;
-       spin_unlock(&cb->lock);
-       hl_cb_put(cb);
-free_cs_object:
-       cs_rollback(hdev, cs);
-       *cs_seq = ULLONG_MAX;
-       /* The path below is both for good and erroneous exits */
-put_cs:
-       /* We finished with the CS in this function, so put the ref */
-       cs_put(cs);
-free_cs_chunk_array:
-       kfree(cs_chunk_array);
-out:
-       return rc;
-}
-
-static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
-                               void __user *chunks, u32 num_chunks,
-                               u64 *cs_seq)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_ctx *ctx = hpriv->ctx;
-       struct hl_cs_chunk *cs_chunk_array, *chunk;
-       struct hw_queue_properties *hw_queue_prop;
-       struct dma_fence *sig_fence = NULL;
-       struct hl_cs_job *job;
-       struct hl_cs *cs;
-       struct hl_cb *cb;
-       enum hl_queue_type q_type;
-       u64 *signal_seq_arr = NULL, signal_seq;
-       u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
-       int rc;
-
-       *cs_seq = ULLONG_MAX;
-
-       if (num_chunks > HL_MAX_JOBS_PER_CS) {
-               dev_err(hdev->dev,
-                       "Number of chunks can NOT be larger than %d\n",
-                       HL_MAX_JOBS_PER_CS);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
-                                       GFP_ATOMIC);
-       if (!cs_chunk_array) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
-       if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
-               dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
-               rc = -EFAULT;
-               goto free_cs_chunk_array;
-       }
-
-       /* currently it is guaranteed to have only one chunk */
-       chunk = &cs_chunk_array[0];
-       q_idx = chunk->queue_index;
-       hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
-       q_type = hw_queue_prop->type;
-
-       if ((q_idx >= hdev->asic_prop.max_queues) ||
-                       (!hw_queue_prop->supports_sync_stream)) {
-               dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
-               rc = -EINVAL;
-               goto free_cs_chunk_array;
-       }
-
-       if (cs_type == CS_TYPE_WAIT) {
-               struct hl_cs_compl *sig_waitcs_cmpl;
-
-               signal_seq_arr_len = chunk->num_signal_seq_arr;
-
-               /* currently only one signal seq is supported */
-               if (signal_seq_arr_len != 1) {
-                       dev_err(hdev->dev,
-                               "Wait for signal CS supports only one signal CS seq\n");
-                       rc = -EINVAL;
-                       goto free_cs_chunk_array;
-               }
-
-               signal_seq_arr = kmalloc_array(signal_seq_arr_len,
-                                               sizeof(*signal_seq_arr),
-                                               GFP_ATOMIC);
-               if (!signal_seq_arr) {
-                       rc = -ENOMEM;
-                       goto free_cs_chunk_array;
-               }
-
-               size_to_copy = chunk->num_signal_seq_arr *
-                               sizeof(*signal_seq_arr);
-               if (copy_from_user(signal_seq_arr,
-                                       u64_to_user_ptr(chunk->signal_seq_arr),
-                                       size_to_copy)) {
-                       dev_err(hdev->dev,
-                               "Failed to copy signal seq array from user\n");
-                       rc = -EFAULT;
-                       goto free_signal_seq_array;
-               }
-
-               /* currently it is guaranteed to have only one signal seq */
-               signal_seq = signal_seq_arr[0];
-               sig_fence = hl_ctx_get_fence(ctx, signal_seq);
-               if (IS_ERR(sig_fence)) {
-                       dev_err(hdev->dev,
-                               "Failed to get signal CS with seq 0x%llx\n",
-                               signal_seq);
-                       rc = PTR_ERR(sig_fence);
-                       goto free_signal_seq_array;
-               }
-
-               if (!sig_fence) {
-                       /* signal CS already finished */
-                       rc = 0;
-                       goto free_signal_seq_array;
-               }
-
-               sig_waitcs_cmpl =
-                       container_of(sig_fence, struct hl_cs_compl, base_fence);
-
-               if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
-                       dev_err(hdev->dev,
-                               "CS seq 0x%llx is not of a signal CS\n",
-                               signal_seq);
-                       dma_fence_put(sig_fence);
-                       rc = -EINVAL;
-                       goto free_signal_seq_array;
-               }
-
-               if (dma_fence_is_signaled(sig_fence)) {
-                       /* signal CS already finished */
-                       dma_fence_put(sig_fence);
-                       rc = 0;
-                       goto free_signal_seq_array;
-               }
-       }
-
-       /* increment refcnt for context */
-       hl_ctx_get(hdev, ctx);
-
-       rc = allocate_cs(hdev, ctx, cs_type, &cs);
-       if (rc) {
-               if (cs_type == CS_TYPE_WAIT)
-                       dma_fence_put(sig_fence);
-               hl_ctx_put(ctx);
-               goto free_signal_seq_array;
-       }
-
-       /*
-        * Save the signal CS fence for later initialization right before
-        * hanging the wait CS on the queue.
-        */
-       if (cs->type == CS_TYPE_WAIT)
-               cs->signal_fence = sig_fence;
-
-       hl_debugfs_add_cs(cs);
-
-       *cs_seq = cs->sequence;
-
-       job = hl_cs_allocate_job(hdev, q_type, true);
-       if (!job) {
-               ctx->cs_counters.out_of_mem_drop_cnt++;
-               dev_err(hdev->dev, "Failed to allocate a new job\n");
-               rc = -ENOMEM;
-               goto put_cs;
-       }
-
-       cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
-       if (!cb) {
-               ctx->cs_counters.out_of_mem_drop_cnt++;
-               kfree(job);
-               rc = -EFAULT;
-               goto put_cs;
-       }
-
-       if (cs->type == CS_TYPE_WAIT)
-               cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
-       else
-               cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
-       job->id = 0;
-       job->cs = cs;
-       job->user_cb = cb;
-       job->user_cb->cs_cnt++;
-       job->user_cb_size = cb_size;
-       job->hw_queue_id = q_idx;
-
-       /*
-        * No need in parsing, user CB is the patched CB.
-        * We call hl_cb_destroy() out of two reasons - we don't need the CB in
-        * the CB idr anymore and to decrement its refcount as it was
-        * incremented inside hl_cb_kernel_create().
-        */
-       job->patched_cb = job->user_cb;
-       job->job_cb_size = job->user_cb_size;
-       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
-       cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
-       list_add_tail(&job->cs_node, &cs->job_list);
-
-       /* increment refcount as for external queues we get completion */
-       cs_get(cs);
-
-       hl_debugfs_add_job(hdev, job);
-
-       rc = hl_hw_queue_schedule_cs(cs);
-       if (rc) {
-               if (rc != -EAGAIN)
-                       dev_err(hdev->dev,
-                               "Failed to submit CS %d.%llu to H/W queues, error %d\n",
-                               ctx->asid, cs->sequence, rc);
-               goto free_cs_object;
-       }
-
-       rc = HL_CS_STATUS_SUCCESS;
-       goto put_cs;
-
-free_cs_object:
-       cs_rollback(hdev, cs);
-       *cs_seq = ULLONG_MAX;
-       /* The path below is both for good and erroneous exits */
-put_cs:
-       /* We finished with the CS in this function, so put the ref */
-       cs_put(cs);
-free_signal_seq_array:
-       if (cs_type == CS_TYPE_WAIT)
-               kfree(signal_seq_arr);
-free_cs_chunk_array:
-       kfree(cs_chunk_array);
-out:
-       return rc;
-}
-
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       union hl_cs_args *args = data;
-       struct hl_ctx *ctx = hpriv->ctx;
-       void __user *chunks_execute, *chunks_restore;
-       enum hl_cs_type cs_type;
-       u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
-       u64 cs_seq = ULONG_MAX;
-       int rc, do_ctx_switch;
-       bool need_soft_reset = false;
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_warn_ratelimited(hdev->dev,
-                       "Device is %s. Can't submit new CS\n",
-                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-               rc = -EBUSY;
-               goto out;
-       }
-
-       sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
-
-       if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
-               dev_err(hdev->dev,
-                       "Signal and wait CS flags are mutually exclusive, context %d\n",
-               ctx->asid);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
-                       (!hdev->supports_sync_stream))) {
-               dev_err(hdev->dev, "Sync stream CS is not supported\n");
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
-               cs_type = CS_TYPE_SIGNAL;
-       else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
-               cs_type = CS_TYPE_WAIT;
-       else
-               cs_type = CS_TYPE_DEFAULT;
-
-       chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
-       num_chunks_execute = args->in.num_chunks_execute;
-
-       if (cs_type == CS_TYPE_DEFAULT) {
-               if (!num_chunks_execute) {
-                       dev_err(hdev->dev,
-                               "Got execute CS with 0 chunks, context %d\n",
-                               ctx->asid);
-                       rc = -EINVAL;
-                       goto out;
-               }
-       } else if (num_chunks_execute != 1) {
-               dev_err(hdev->dev,
-                       "Sync stream CS mandates one chunk only, context %d\n",
-                       ctx->asid);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
-
-       if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
-               long ret;
-
-               chunks_restore =
-                       (void __user *) (uintptr_t) args->in.chunks_restore;
-               num_chunks_restore = args->in.num_chunks_restore;
-
-               mutex_lock(&hpriv->restore_phase_mutex);
-
-               if (do_ctx_switch) {
-                       rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
-                       if (rc) {
-                               dev_err_ratelimited(hdev->dev,
-                                       "Failed to switch to context %d, rejecting CS! %d\n",
-                                       ctx->asid, rc);
-                               /*
-                                * If we timedout, or if the device is not IDLE
-                                * while we want to do context-switch (-EBUSY),
-                                * we need to soft-reset because QMAN is
-                                * probably stuck. However, we can't call to
-                                * reset here directly because of deadlock, so
-                                * need to do it at the very end of this
-                                * function
-                                */
-                               if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
-                                       need_soft_reset = true;
-                               mutex_unlock(&hpriv->restore_phase_mutex);
-                               goto out;
-                       }
-               }
-
-               hdev->asic_funcs->restore_phase_topology(hdev);
-
-               if (!num_chunks_restore) {
-                       dev_dbg(hdev->dev,
-                       "Need to run restore phase but restore CS is empty\n");
-                       rc = 0;
-               } else {
-                       rc = cs_ioctl_default(hpriv, chunks_restore,
-                                               num_chunks_restore, &cs_seq);
-               }
-
-               mutex_unlock(&hpriv->restore_phase_mutex);
-
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed to submit restore CS for context %d (%d)\n",
-                               ctx->asid, rc);
-                       goto out;
-               }
-
-               /* Need to wait for restore completion before execution phase */
-               if (num_chunks_restore) {
-                       ret = _hl_cs_wait_ioctl(hdev, ctx,
-                                       jiffies_to_usecs(hdev->timeout_jiffies),
-                                       cs_seq);
-                       if (ret <= 0) {
-                               dev_err(hdev->dev,
-                                       "Restore CS for context %d failed to complete %ld\n",
-                                       ctx->asid, ret);
-                               rc = -ENOEXEC;
-                               goto out;
-                       }
-               }
-
-               ctx->thread_ctx_switch_wait_token = 1;
-       } else if (!ctx->thread_ctx_switch_wait_token) {
-               u32 tmp;
-
-               rc = hl_poll_timeout_memory(hdev,
-                       &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
-                       100, jiffies_to_usecs(hdev->timeout_jiffies), false);
-
-               if (rc == -ETIMEDOUT) {
-                       dev_err(hdev->dev,
-                               "context switch phase timeout (%d)\n", tmp);
-                       goto out;
-               }
-       }
-
-       if (cs_type == CS_TYPE_DEFAULT)
-               rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
-                                       &cs_seq);
-       else
-               rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
-                                               num_chunks_execute, &cs_seq);
-
-out:
-       if (rc != -EAGAIN) {
-               memset(args, 0, sizeof(*args));
-               args->out.status = rc;
-               args->out.seq = cs_seq;
-       }
-
-       if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
-               hl_device_reset(hdev, false, false);
-
-       return rc;
-}
-
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
-               struct hl_ctx *ctx, u64 timeout_us, u64 seq)
-{
-       struct dma_fence *fence;
-       unsigned long timeout;
-       long rc;
-
-       if (timeout_us == MAX_SCHEDULE_TIMEOUT)
-               timeout = timeout_us;
-       else
-               timeout = usecs_to_jiffies(timeout_us);
-
-       hl_ctx_get(hdev, ctx);
-
-       fence = hl_ctx_get_fence(ctx, seq);
-       if (IS_ERR(fence)) {
-               rc = PTR_ERR(fence);
-               if (rc == -EINVAL)
-                       dev_notice_ratelimited(hdev->dev,
-                               "Can't wait on CS %llu because current CS is at seq %llu\n",
-                               seq, ctx->cs_sequence);
-       } else if (fence) {
-               rc = dma_fence_wait_timeout(fence, true, timeout);
-               if (fence->error == -ETIMEDOUT)
-                       rc = -ETIMEDOUT;
-               else if (fence->error == -EIO)
-                       rc = -EIO;
-               dma_fence_put(fence);
-       } else {
-               dev_dbg(hdev->dev,
-                       "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
-                       seq, ctx->cs_sequence);
-               rc = 1;
-       }
-
-       hl_ctx_put(ctx);
-
-       return rc;
-}
-
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       union hl_wait_cs_args *args = data;
-       u64 seq = args->in.seq;
-       long rc;
-
-       rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
-
-       memset(args, 0, sizeof(*args));
-
-       if (rc < 0) {
-               if (rc == -ERESTARTSYS) {
-                       dev_err_ratelimited(hdev->dev,
-                               "user process got signal while waiting for CS handle %llu\n",
-                               seq);
-                       args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
-                       rc = -EINTR;
-               } else if (rc == -ETIMEDOUT) {
-                       dev_err_ratelimited(hdev->dev,
-                               "CS %llu has timed-out while user process is waiting for it\n",
-                               seq);
-                       args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
-               } else if (rc == -EIO) {
-                       dev_err_ratelimited(hdev->dev,
-                               "CS %llu has been aborted while user process is waiting for it\n",
-                               seq);
-                       args->out.status = HL_WAIT_CS_STATUS_ABORTED;
-               }
-               return rc;
-       }
-
-       if (rc == 0)
-               args->out.status = HL_WAIT_CS_STATUS_BUSY;
-       else
-               args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
-
-       return 0;
-}
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
new file mode 100644 (file)
index 0000000..97d03b5
--- /dev/null
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/common
+
+HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
+               common/asid.o common/habanalabs_ioctl.o \
+               common/command_buffer.o common/hw_queue.o common/irq.o \
+               common/sysfs.o common/hwmon.o common/memory.o \
+               common/command_submission.o common/mmu.o common/firmware_if.o \
+               common/pci.o
diff --git a/drivers/misc/habanalabs/common/asid.c b/drivers/misc/habanalabs/common/asid.c
new file mode 100644 (file)
index 0000000..a2fdf31
--- /dev/null
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+int hl_asid_init(struct hl_device *hdev)
+{
+       hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid),
+                                       sizeof(*hdev->asid_bitmap), GFP_KERNEL);
+       if (!hdev->asid_bitmap)
+               return -ENOMEM;
+
+       mutex_init(&hdev->asid_mutex);
+
+       /* ASID 0 is reserved for the kernel driver and device CPU */
+       set_bit(0, hdev->asid_bitmap);
+
+       return 0;
+}
+
+void hl_asid_fini(struct hl_device *hdev)
+{
+       mutex_destroy(&hdev->asid_mutex);
+       kfree(hdev->asid_bitmap);
+}
+
+unsigned long hl_asid_alloc(struct hl_device *hdev)
+{
+       unsigned long found;
+
+       mutex_lock(&hdev->asid_mutex);
+
+       found = find_first_zero_bit(hdev->asid_bitmap,
+                                       hdev->asic_prop.max_asid);
+       if (found == hdev->asic_prop.max_asid)
+               found = 0;
+       else
+               set_bit(found, hdev->asid_bitmap);
+
+       mutex_unlock(&hdev->asid_mutex);
+
+       return found;
+}
+
+void hl_asid_free(struct hl_device *hdev, unsigned long asid)
+{
+       if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
+                                               "Invalid ASID %lu", asid))
+               return;
+       clear_bit(asid, hdev->asid_bitmap);
+}
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
new file mode 100644 (file)
index 0000000..02d13f7
--- /dev/null
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
+{
+       hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
+                       (void *) (uintptr_t) cb->kernel_address,
+                       cb->bus_address);
+       kfree(cb);
+}
+
+static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
+{
+       if (cb->is_pool) {
+               spin_lock(&hdev->cb_pool_lock);
+               list_add(&cb->pool_list, &hdev->cb_pool);
+               spin_unlock(&hdev->cb_pool_lock);
+       } else {
+               cb_fini(hdev, cb);
+       }
+}
+
+static void cb_release(struct kref *ref)
+{
+       struct hl_device *hdev;
+       struct hl_cb *cb;
+
+       cb = container_of(ref, struct hl_cb, refcount);
+       hdev = cb->hdev;
+
+       hl_debugfs_remove_cb(cb);
+
+       cb_do_release(hdev, cb);
+}
+
+static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
+                                       int ctx_id)
+{
+       struct hl_cb *cb;
+       void *p;
+
+       /*
+        * We use of GFP_ATOMIC here because this function can be called from
+        * the latency-sensitive code path for command submission. Due to H/W
+        * limitations in some of the ASICs, the kernel must copy the user CB
+        * that is designated for an external queue and actually enqueue
+        * the kernel's copy. Hence, we must never sleep in this code section
+        * and must use GFP_ATOMIC for all memory allocations.
+        */
+       if (ctx_id == HL_KERNEL_ASID_ID)
+               cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
+       else
+               cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+
+       if (!cb)
+               return NULL;
+
+       if (ctx_id == HL_KERNEL_ASID_ID)
+               p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
+                                               &cb->bus_address, GFP_ATOMIC);
+       else
+               p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
+                                               &cb->bus_address,
+                                               GFP_USER | __GFP_ZERO);
+       if (!p) {
+               dev_err(hdev->dev,
+                       "failed to allocate %d of dma memory for CB\n",
+                       cb_size);
+               kfree(cb);
+               return NULL;
+       }
+
+       cb->kernel_address = (u64) (uintptr_t) p;
+       cb->size = cb_size;
+
+       return cb;
+}
+
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+                       u32 cb_size, u64 *handle, int ctx_id)
+{
+       struct hl_cb *cb;
+       bool alloc_new_cb = true;
+       int rc;
+
+       /*
+        * Can't use generic function to check this because of special case
+        * where we create a CB as part of the reset process
+        */
+       if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) &&
+                                       (ctx_id != HL_KERNEL_ASID_ID))) {
+               dev_warn_ratelimited(hdev->dev,
+                       "Device is disabled or in reset. Can't create new CBs\n");
+               rc = -EBUSY;
+               goto out_err;
+       }
+
+       if (cb_size > SZ_2M) {
+               dev_err(hdev->dev, "CB size %d must be less than %d\n",
+                       cb_size, SZ_2M);
+               rc = -EINVAL;
+               goto out_err;
+       }
+
+       /* Minimum allocation must be PAGE SIZE */
+       if (cb_size < PAGE_SIZE)
+               cb_size = PAGE_SIZE;
+
+       if (ctx_id == HL_KERNEL_ASID_ID &&
+                       cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+               spin_lock(&hdev->cb_pool_lock);
+               if (!list_empty(&hdev->cb_pool)) {
+                       cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
+                                       pool_list);
+                       list_del(&cb->pool_list);
+                       spin_unlock(&hdev->cb_pool_lock);
+                       alloc_new_cb = false;
+               } else {
+                       spin_unlock(&hdev->cb_pool_lock);
+                       dev_dbg(hdev->dev, "CB pool is empty\n");
+               }
+       }
+
+       if (alloc_new_cb) {
+               cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+               if (!cb) {
+                       rc = -ENOMEM;
+                       goto out_err;
+               }
+       }
+
+       cb->hdev = hdev;
+       cb->ctx_id = ctx_id;
+
+       spin_lock(&mgr->cb_lock);
+       rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+       spin_unlock(&mgr->cb_lock);
+
+       if (rc < 0) {
+               dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
+               goto release_cb;
+       }
+
+       cb->id = rc;
+
+       kref_init(&cb->refcount);
+       spin_lock_init(&cb->lock);
+
+       /*
+        * idr is 32-bit so we can safely OR it with a mask that is above
+        * 32 bit
+        */
+       *handle = cb->id | HL_MMAP_CB_MASK;
+       *handle <<= PAGE_SHIFT;
+
+       hl_debugfs_add_cb(cb);
+
+       return 0;
+
+release_cb:
+       cb_do_release(hdev, cb);
+out_err:
+       *handle = 0;
+
+       return rc;
+}
+
+int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
+{
+       struct hl_cb *cb;
+       u32 handle;
+       int rc = 0;
+
+       /*
+        * handle was given to user to do mmap, I need to shift it back to
+        * how the idr module gave it to me
+        */
+       cb_handle >>= PAGE_SHIFT;
+       handle = (u32) cb_handle;
+
+       spin_lock(&mgr->cb_lock);
+
+       cb = idr_find(&mgr->cb_handles, handle);
+       if (cb) {
+               idr_remove(&mgr->cb_handles, handle);
+               spin_unlock(&mgr->cb_lock);
+               kref_put(&cb->refcount, cb_release);
+       } else {
+               spin_unlock(&mgr->cb_lock);
+               dev_err(hdev->dev,
+                       "CB destroy failed, no match to handle 0x%x\n", handle);
+               rc = -EINVAL;
+       }
+
+       return rc;
+}
+
+int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       union hl_cb_args *args = data;
+       struct hl_device *hdev = hpriv->hdev;
+       u64 handle = 0;
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_warn_ratelimited(hdev->dev,
+                       "Device is %s. Can't execute CB IOCTL\n",
+                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+               return -EBUSY;
+       }
+
+       switch (args->in.op) {
+       case HL_CB_OP_CREATE:
+               if (args->in.cb_size > HL_MAX_CB_SIZE) {
+                       dev_err(hdev->dev,
+                               "User requested CB size %d must be less than %d\n",
+                               args->in.cb_size, HL_MAX_CB_SIZE);
+                       rc = -EINVAL;
+               } else {
+                       rc = hl_cb_create(hdev, &hpriv->cb_mgr,
+                                               args->in.cb_size, &handle,
+                                               hpriv->ctx->asid);
+               }
+
+               memset(args, 0, sizeof(*args));
+               args->out.cb_handle = handle;
+               break;
+
+       case HL_CB_OP_DESTROY:
+               rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
+                                       args->in.cb_handle);
+               break;
+
+       default:
+               rc = -ENOTTY;
+               break;
+       }
+
+       return rc;
+}
+
+static void cb_vm_close(struct vm_area_struct *vma)
+{
+       struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
+       long new_mmap_size;
+
+       new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start);
+
+       if (new_mmap_size > 0) {
+               cb->mmap_size = new_mmap_size;
+               return;
+       }
+
+       spin_lock(&cb->lock);
+       cb->mmap = false;
+       spin_unlock(&cb->lock);
+
+       hl_cb_put(cb);
+       vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct cb_vm_ops = {
+       .close = cb_vm_close
+};
+
+int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_cb *cb;
+       phys_addr_t address;
+       u32 handle;
+       int rc;
+
+       handle = vma->vm_pgoff;
+
+       /* reference was taken here */
+       cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
+       if (!cb) {
+               dev_err(hdev->dev,
+                       "CB mmap failed, no match to handle 0x%x\n", handle);
+               return -EINVAL;
+       }
+
+       /* Validation check */
+       if ((vma->vm_end - vma->vm_start) != ALIGN(cb->size, PAGE_SIZE)) {
+               dev_err(hdev->dev,
+                       "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
+                       vma->vm_end - vma->vm_start, cb->size);
+               rc = -EINVAL;
+               goto put_cb;
+       }
+
+       spin_lock(&cb->lock);
+
+       if (cb->mmap) {
+               dev_err(hdev->dev,
+                       "CB mmap failed, CB already mmaped to user\n");
+               rc = -EINVAL;
+               goto release_lock;
+       }
+
+       cb->mmap = true;
+
+       spin_unlock(&cb->lock);
+
+       vma->vm_ops = &cb_vm_ops;
+
+       /*
+        * Note: We're transferring the cb reference to
+        * vma->vm_private_data here.
+        */
+
+       vma->vm_private_data = cb;
+
+       /* Calculate address for CB */
+       address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
+
+       rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
+                                       address, cb->size);
+
+       if (rc) {
+               spin_lock(&cb->lock);
+               cb->mmap = false;
+               goto release_lock;
+       }
+
+       cb->mmap_size = cb->size;
+
+       return 0;
+
+release_lock:
+       spin_unlock(&cb->lock);
+put_cb:
+       hl_cb_put(cb);
+       return rc;
+}
+
+struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+                       u32 handle)
+{
+       struct hl_cb *cb;
+
+       spin_lock(&mgr->cb_lock);
+       cb = idr_find(&mgr->cb_handles, handle);
+
+       if (!cb) {
+               spin_unlock(&mgr->cb_lock);
+               dev_warn(hdev->dev,
+                       "CB get failed, no match to handle 0x%x\n", handle);
+               return NULL;
+       }
+
+       kref_get(&cb->refcount);
+
+       spin_unlock(&mgr->cb_lock);
+
+       return cb;
+
+}
+
+void hl_cb_put(struct hl_cb *cb)
+{
+       kref_put(&cb->refcount, cb_release);
+}
+
+void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
+{
+       spin_lock_init(&mgr->cb_lock);
+       idr_init(&mgr->cb_handles);
+}
+
+void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
+{
+       struct hl_cb *cb;
+       struct idr *idp;
+       u32 id;
+
+       idp = &mgr->cb_handles;
+
+       idr_for_each_entry(idp, cb, id) {
+               if (kref_put(&cb->refcount, cb_release) != 1)
+                       dev_err(hdev->dev,
+                               "CB %d for CTX ID %d is still alive\n",
+                               id, cb->ctx_id);
+       }
+
+       idr_destroy(&mgr->cb_handles);
+}
+
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+{
+       u64 cb_handle;
+       struct hl_cb *cb;
+       int rc;
+
+       rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
+                       HL_KERNEL_ASID_ID);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to allocate CB for the kernel driver %d\n", rc);
+               return NULL;
+       }
+
+       cb_handle >>= PAGE_SHIFT;
+       cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
+       /* hl_cb_get should never fail here so use kernel WARN */
+       WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
+       if (!cb)
+               goto destroy_cb;
+
+       return cb;
+
+destroy_cb:
+       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
+
+       return NULL;
+}
+
+int hl_cb_pool_init(struct hl_device *hdev)
+{
+       struct hl_cb *cb;
+       int i;
+
+       INIT_LIST_HEAD(&hdev->cb_pool);
+       spin_lock_init(&hdev->cb_pool_lock);
+
+       for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
+               cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
+                               HL_KERNEL_ASID_ID);
+               if (cb) {
+                       cb->is_pool = true;
+                       list_add(&cb->pool_list, &hdev->cb_pool);
+               } else {
+                       hl_cb_pool_fini(hdev);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
+int hl_cb_pool_fini(struct hl_device *hdev)
+{
+       struct hl_cb *cb, *tmp;
+
+       list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
+               list_del(&cb->pool_list);
+               cb_fini(hdev, cb);
+       }
+
+       return 0;
+}
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
new file mode 100644 (file)
index 0000000..c605be8
--- /dev/null
@@ -0,0 +1,1232 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+#define HL_CS_FLAGS_SIG_WAIT   (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
+
+static void job_wq_completion(struct work_struct *work);
+static long _hl_cs_wait_ioctl(struct hl_device *hdev,
+               struct hl_ctx *ctx, u64 timeout_us, u64 seq);
+static void cs_do_release(struct kref *ref);
+
+static void hl_sob_reset(struct kref *ref)
+{
+       struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
+                                                       kref);
+       struct hl_device *hdev = hw_sob->hdev;
+
+       hdev->asic_funcs->reset_sob(hdev, hw_sob);
+}
+
+void hl_sob_reset_error(struct kref *ref)
+{
+       struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
+                                                       kref);
+       struct hl_device *hdev = hw_sob->hdev;
+
+       dev_crit(hdev->dev,
+                       "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
+                       hw_sob->q_idx, hw_sob->sob_id);
+}
+
+static const char *hl_fence_get_driver_name(struct dma_fence *fence)
+{
+       return "HabanaLabs";
+}
+
+static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
+{
+       struct hl_cs_compl *hl_cs_compl =
+               container_of(fence, struct hl_cs_compl, base_fence);
+
+       return dev_name(hl_cs_compl->hdev->dev);
+}
+
+static bool hl_fence_enable_signaling(struct dma_fence *fence)
+{
+       return true;
+}
+
+static void hl_fence_release(struct dma_fence *fence)
+{
+       struct hl_cs_compl *hl_cs_cmpl =
+               container_of(fence, struct hl_cs_compl, base_fence);
+       struct hl_device *hdev = hl_cs_cmpl->hdev;
+
+       /* EBUSY means the CS was never submitted and hence we don't have
+        * an attached hw_sob object that we should handle here
+        */
+       if (fence->error == -EBUSY)
+               goto free;
+
+       if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
+                       (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
+
+               dev_dbg(hdev->dev,
+                       "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
+                       hl_cs_cmpl->cs_seq,
+                       hl_cs_cmpl->type,
+                       hl_cs_cmpl->hw_sob->sob_id,
+                       hl_cs_cmpl->sob_val);
+
+               /*
+                * A signal CS can get completion while the corresponding wait
+                * for signal CS is on its way to the PQ. The wait for signal CS
+                * will get stuck if the signal CS incremented the SOB to its
+                * max value and there are no pending (submitted) waits on this
+                * SOB.
+                * We do the following to void this situation:
+                * 1. The wait for signal CS must get a ref for the signal CS as
+                *    soon as possible in cs_ioctl_signal_wait() and put it
+                *    before being submitted to the PQ but after it incremented
+                *    the SOB refcnt in init_signal_wait_cs().
+                * 2. Signal/Wait for signal CS will decrement the SOB refcnt
+                *    here.
+                * These two measures guarantee that the wait for signal CS will
+                * reset the SOB upon completion rather than the signal CS and
+                * hence the above scenario is avoided.
+                */
+               kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+       }
+
+free:
+       kfree_rcu(hl_cs_cmpl, base_fence.rcu);
+}
+
+static const struct dma_fence_ops hl_fence_ops = {
+       .get_driver_name = hl_fence_get_driver_name,
+       .get_timeline_name = hl_fence_get_timeline_name,
+       .enable_signaling = hl_fence_enable_signaling,
+       .release = hl_fence_release
+};
+
+static void cs_get(struct hl_cs *cs)
+{
+       kref_get(&cs->refcount);
+}
+
+static int cs_get_unless_zero(struct hl_cs *cs)
+{
+       return kref_get_unless_zero(&cs->refcount);
+}
+
+static void cs_put(struct hl_cs *cs)
+{
+       kref_put(&cs->refcount, cs_do_release);
+}
+
+static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
+{
+       /*
+        * Patched CB is created for external queues jobs, and for H/W queues
+        * jobs if the user CB was allocated by driver and MMU is disabled.
+        */
+       return (job->queue_type == QUEUE_TYPE_EXT ||
+                       (job->queue_type == QUEUE_TYPE_HW &&
+                                       job->is_kernel_allocated_cb &&
+                                       !hdev->mmu_enable));
+}
+
+/*
+ * cs_parser - parse the user command submission
+ *
+ * @hpriv      : pointer to the private data of the fd
+ * @job        : pointer to the job that holds the command submission info
+ *
+ * The function parses the command submission of the user. It calls the
+ * ASIC specific parser, which returns a list of memory blocks to send
+ * to the device as different command buffers
+ *
+ */
+static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_cs_parser parser;
+       int rc;
+
+       parser.ctx_id = job->cs->ctx->asid;
+       parser.cs_sequence = job->cs->sequence;
+       parser.job_id = job->id;
+
+       parser.hw_queue_id = job->hw_queue_id;
+       parser.job_userptr_list = &job->userptr_list;
+       parser.patched_cb = NULL;
+       parser.user_cb = job->user_cb;
+       parser.user_cb_size = job->user_cb_size;
+       parser.queue_type = job->queue_type;
+       parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
+       job->patched_cb = NULL;
+
+       rc = hdev->asic_funcs->cs_parser(hdev, &parser);
+
+       if (is_cb_patched(hdev, job)) {
+               if (!rc) {
+                       job->patched_cb = parser.patched_cb;
+                       job->job_cb_size = parser.patched_cb_size;
+                       job->contains_dma_pkt = parser.contains_dma_pkt;
+
+                       spin_lock(&job->patched_cb->lock);
+                       job->patched_cb->cs_cnt++;
+                       spin_unlock(&job->patched_cb->lock);
+               }
+
+               /*
+                * Whether the parsing worked or not, we don't need the
+                * original CB anymore because it was already parsed and
+                * won't be accessed again for this CS
+                */
+               spin_lock(&job->user_cb->lock);
+               job->user_cb->cs_cnt--;
+               spin_unlock(&job->user_cb->lock);
+               hl_cb_put(job->user_cb);
+               job->user_cb = NULL;
+       } else if (!rc) {
+               job->job_cb_size = job->user_cb_size;
+       }
+
+       return rc;
+}
+
+static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+       struct hl_cs *cs = job->cs;
+
+       if (is_cb_patched(hdev, job)) {
+               hl_userptr_delete_list(hdev, &job->userptr_list);
+
+               /*
+                * We might arrive here from rollback and patched CB wasn't
+                * created, so we need to check it's not NULL
+                */
+               if (job->patched_cb) {
+                       spin_lock(&job->patched_cb->lock);
+                       job->patched_cb->cs_cnt--;
+                       spin_unlock(&job->patched_cb->lock);
+
+                       hl_cb_put(job->patched_cb);
+               }
+       }
+
+       /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
+        * enabled, the user CB isn't released in cs_parser() and thus should be
+        * released here.
+        */
+       if (job->queue_type == QUEUE_TYPE_HW &&
+                       job->is_kernel_allocated_cb && hdev->mmu_enable) {
+               spin_lock(&job->user_cb->lock);
+               job->user_cb->cs_cnt--;
+               spin_unlock(&job->user_cb->lock);
+
+               hl_cb_put(job->user_cb);
+       }
+
+       /*
+        * This is the only place where there can be multiple threads
+        * modifying the list at the same time
+        */
+       spin_lock(&cs->job_lock);
+       list_del(&job->cs_node);
+       spin_unlock(&cs->job_lock);
+
+       hl_debugfs_remove_job(hdev, job);
+
+       if (job->queue_type == QUEUE_TYPE_EXT ||
+                       job->queue_type == QUEUE_TYPE_HW)
+               cs_put(cs);
+
+       kfree(job);
+}
+
+static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+       hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
+                       ctx->cs_counters.device_in_reset_drop_cnt;
+       hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
+                       ctx->cs_counters.out_of_mem_drop_cnt;
+       hdev->aggregated_cs_counters.parsing_drop_cnt +=
+                       ctx->cs_counters.parsing_drop_cnt;
+       hdev->aggregated_cs_counters.queue_full_drop_cnt +=
+                       ctx->cs_counters.queue_full_drop_cnt;
+}
+
+static void cs_do_release(struct kref *ref)
+{
+       struct hl_cs *cs = container_of(ref, struct hl_cs,
+                                               refcount);
+       struct hl_device *hdev = cs->ctx->hdev;
+       struct hl_cs_job *job, *tmp;
+
+       cs->completed = true;
+
+       /*
+        * Although if we reached here it means that all external jobs have
+        * finished, because each one of them took refcnt to CS, we still
+        * need to go over the internal jobs and free them. Otherwise, we
+        * will have leaked memory and what's worse, the CS object (and
+        * potentially the CTX object) could be released, while the JOB
+        * still holds a pointer to them (but no reference).
+        */
+       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+               free_job(hdev, job);
+
+       /* We also need to update CI for internal queues */
+       if (cs->submitted) {
+               hdev->asic_funcs->hw_queues_lock(hdev);
+
+               hdev->cs_active_cnt--;
+               if (!hdev->cs_active_cnt) {
+                       struct hl_device_idle_busy_ts *ts;
+
+                       ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
+                       ts->busy_to_idle_ts = ktime_get();
+
+                       if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
+                               hdev->idle_busy_ts_idx = 0;
+               } else if (hdev->cs_active_cnt < 0) {
+                       dev_crit(hdev->dev, "CS active cnt %d is negative\n",
+                               hdev->cs_active_cnt);
+               }
+
+               hdev->asic_funcs->hw_queues_unlock(hdev);
+
+               hl_int_hw_queue_update_ci(cs);
+
+               spin_lock(&hdev->hw_queues_mirror_lock);
+               /* remove CS from hw_queues mirror list */
+               list_del_init(&cs->mirror_node);
+               spin_unlock(&hdev->hw_queues_mirror_lock);
+
+               /*
+                * Don't cancel TDR in case this CS was timedout because we
+                * might be running from the TDR context
+                */
+               if ((!cs->timedout) &&
+                       (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
+                       struct hl_cs *next;
+
+                       if (cs->tdr_active)
+                               cancel_delayed_work_sync(&cs->work_tdr);
+
+                       spin_lock(&hdev->hw_queues_mirror_lock);
+
+                       /* queue TDR for next CS */
+                       next = list_first_entry_or_null(
+                                       &hdev->hw_queues_mirror_list,
+                                       struct hl_cs, mirror_node);
+
+                       if ((next) && (!next->tdr_active)) {
+                               next->tdr_active = true;
+                               schedule_delayed_work(&next->work_tdr,
+                                                       hdev->timeout_jiffies);
+                       }
+
+                       spin_unlock(&hdev->hw_queues_mirror_lock);
+               }
+       } else if (cs->type == CS_TYPE_WAIT) {
+               /*
+                * In case the wait for signal CS was submitted, the put occurs
+                * in init_signal_wait_cs() right before hanging on the PQ.
+                */
+               dma_fence_put(cs->signal_fence);
+       }
+
+       /*
+        * Must be called before hl_ctx_put because inside we use ctx to get
+        * the device
+        */
+       hl_debugfs_remove_cs(cs);
+
+       hl_ctx_put(cs->ctx);
+
+       /* We need to mark an error for not submitted because in that case
+        * the dma fence release flow is different. Mainly, we don't need
+        * to handle hw_sob for signal/wait
+        */
+       if (cs->timedout)
+               dma_fence_set_error(cs->fence, -ETIMEDOUT);
+       else if (cs->aborted)
+               dma_fence_set_error(cs->fence, -EIO);
+       else if (!cs->submitted)
+               dma_fence_set_error(cs->fence, -EBUSY);
+
+       dma_fence_signal(cs->fence);
+       dma_fence_put(cs->fence);
+
+       cs_counters_aggregate(hdev, cs->ctx);
+
+       kfree(cs->jobs_in_queue_cnt);
+       kfree(cs);
+}
+
+static void cs_timedout(struct work_struct *work)
+{
+       struct hl_device *hdev;
+       int ctx_asid, rc;
+       struct hl_cs *cs = container_of(work, struct hl_cs,
+                                                work_tdr.work);
+       rc = cs_get_unless_zero(cs);
+       if (!rc)
+               return;
+
+       if ((!cs->submitted) || (cs->completed)) {
+               cs_put(cs);
+               return;
+       }
+
+       /* Mark the CS is timed out so we won't try to cancel its TDR */
+       cs->timedout = true;
+
+       hdev = cs->ctx->hdev;
+       ctx_asid = cs->ctx->asid;
+
+       dev_err(hdev->dev,
+               "Command submission %llu has not finished in time!\n",
+               cs->sequence);
+
+       cs_put(cs);
+
+       if (hdev->reset_on_lockup)
+               hl_device_reset(hdev, false, false);
+}
+
+static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
+                       enum hl_cs_type cs_type, struct hl_cs **cs_new)
+{
+       struct hl_cs_compl *cs_cmpl;
+       struct dma_fence *other = NULL;
+       struct hl_cs *cs;
+       int rc;
+
+       cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+       if (!cs)
+               return -ENOMEM;
+
+       cs->ctx = ctx;
+       cs->submitted = false;
+       cs->completed = false;
+       cs->type = cs_type;
+       INIT_LIST_HEAD(&cs->job_list);
+       INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
+       kref_init(&cs->refcount);
+       spin_lock_init(&cs->job_lock);
+
+       cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+       if (!cs_cmpl) {
+               rc = -ENOMEM;
+               goto free_cs;
+       }
+
+       cs_cmpl->hdev = hdev;
+       cs_cmpl->type = cs->type;
+       spin_lock_init(&cs_cmpl->lock);
+       cs->fence = &cs_cmpl->base_fence;
+
+       spin_lock(&ctx->cs_lock);
+
+       cs_cmpl->cs_seq = ctx->cs_sequence;
+       other = ctx->cs_pending[cs_cmpl->cs_seq &
+                               (hdev->asic_prop.max_pending_cs - 1)];
+       if ((other) && (!dma_fence_is_signaled(other))) {
+               dev_dbg(hdev->dev,
+                       "Rejecting CS because of too many in-flights CS\n");
+               rc = -EAGAIN;
+               goto free_fence;
+       }
+
+       cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+                       sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+       if (!cs->jobs_in_queue_cnt) {
+               rc = -ENOMEM;
+               goto free_fence;
+       }
+
+       dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
+                       ctx->asid, ctx->cs_sequence);
+
+       cs->sequence = cs_cmpl->cs_seq;
+
+       ctx->cs_pending[cs_cmpl->cs_seq &
+                       (hdev->asic_prop.max_pending_cs - 1)] =
+                                                       &cs_cmpl->base_fence;
+       ctx->cs_sequence++;
+
+       dma_fence_get(&cs_cmpl->base_fence);
+
+       dma_fence_put(other);
+
+       spin_unlock(&ctx->cs_lock);
+
+       *cs_new = cs;
+
+       return 0;
+
+free_fence:
+       spin_unlock(&ctx->cs_lock);
+       kfree(cs_cmpl);
+free_cs:
+       kfree(cs);
+       return rc;
+}
+
+static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
+{
+       struct hl_cs_job *job, *tmp;
+
+       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+               free_job(hdev, job);
+}
+
+void hl_cs_rollback_all(struct hl_device *hdev)
+{
+       int i;
+       struct hl_cs *cs, *tmp;
+
+       /* flush all completions */
+       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+               flush_workqueue(hdev->cq_wq[i]);
+
+       /* Make sure we don't have leftovers in the H/W queues mirror list */
+       list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
+                               mirror_node) {
+               cs_get(cs);
+               cs->aborted = true;
+               dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
+                                       cs->ctx->asid, cs->sequence);
+               cs_rollback(hdev, cs);
+               cs_put(cs);
+       }
+}
+
+static void job_wq_completion(struct work_struct *work)
+{
+       struct hl_cs_job *job = container_of(work, struct hl_cs_job,
+                                               finish_work);
+       struct hl_cs *cs = job->cs;
+       struct hl_device *hdev = cs->ctx->hdev;
+
+       /* job is no longer needed */
+       free_job(hdev, job);
+}
+
+static int validate_queue_index(struct hl_device *hdev,
+                               struct hl_cs_chunk *chunk,
+                               enum hl_queue_type *queue_type,
+                               bool *is_kernel_allocated_cb)
+{
+       struct asic_fixed_properties *asic = &hdev->asic_prop;
+       struct hw_queue_properties *hw_queue_prop;
+
+       /* This must be checked here to prevent out-of-bounds access to
+        * hw_queues_props array
+        */
+       if (chunk->queue_index >= asic->max_queues) {
+               dev_err(hdev->dev, "Queue index %d is invalid\n",
+                       chunk->queue_index);
+               return -EINVAL;
+       }
+
+       hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
+
+       if (hw_queue_prop->type == QUEUE_TYPE_NA) {
+               dev_err(hdev->dev, "Queue index %d is invalid\n",
+                       chunk->queue_index);
+               return -EINVAL;
+       }
+
+       if (hw_queue_prop->driver_only) {
+               dev_err(hdev->dev,
+                       "Queue index %d is restricted for the kernel driver\n",
+                       chunk->queue_index);
+               return -EINVAL;
+       }
+
+       *queue_type = hw_queue_prop->type;
+       *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
+
+       return 0;
+}
+
+static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
+                                       struct hl_cb_mgr *cb_mgr,
+                                       struct hl_cs_chunk *chunk)
+{
+       struct hl_cb *cb;
+       u32 cb_handle;
+
+       cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
+
+       cb = hl_cb_get(hdev, cb_mgr, cb_handle);
+       if (!cb) {
+               dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
+               return NULL;
+       }
+
+       if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
+               dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
+               goto release_cb;
+       }
+
+       spin_lock(&cb->lock);
+       cb->cs_cnt++;
+       spin_unlock(&cb->lock);
+
+       return cb;
+
+release_cb:
+       hl_cb_put(cb);
+       return NULL;
+}
+
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+               enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
+{
+       struct hl_cs_job *job;
+
+       job = kzalloc(sizeof(*job), GFP_ATOMIC);
+       if (!job)
+               return NULL;
+
+       job->queue_type = queue_type;
+       job->is_kernel_allocated_cb = is_kernel_allocated_cb;
+
+       if (is_cb_patched(hdev, job))
+               INIT_LIST_HEAD(&job->userptr_list);
+
+       if (job->queue_type == QUEUE_TYPE_EXT)
+               INIT_WORK(&job->finish_work, job_wq_completion);
+
+       return job;
+}
+
+static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
+                               u32 num_chunks, u64 *cs_seq)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_cs_chunk *cs_chunk_array;
+       struct hl_cs_job *job;
+       struct hl_cs *cs;
+       struct hl_cb *cb;
+       bool int_queues_only = true;
+       u32 size_to_copy;
+       int rc, i;
+
+       *cs_seq = ULLONG_MAX;
+
+       if (num_chunks > HL_MAX_JOBS_PER_CS) {
+               dev_err(hdev->dev,
+                       "Number of chunks can NOT be larger than %d\n",
+                       HL_MAX_JOBS_PER_CS);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+                                       GFP_ATOMIC);
+       if (!cs_chunk_array) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
+       if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+               dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
+               rc = -EFAULT;
+               goto free_cs_chunk_array;
+       }
+
+       /* increment refcnt for context */
+       hl_ctx_get(hdev, hpriv->ctx);
+
+       rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
+       if (rc) {
+               hl_ctx_put(hpriv->ctx);
+               goto free_cs_chunk_array;
+       }
+
+       *cs_seq = cs->sequence;
+
+       hl_debugfs_add_cs(cs);
+
+       /* Validate ALL the CS chunks before submitting the CS */
+       for (i = 0 ; i < num_chunks ; i++) {
+               struct hl_cs_chunk *chunk = &cs_chunk_array[i];
+               enum hl_queue_type queue_type;
+               bool is_kernel_allocated_cb;
+
+               rc = validate_queue_index(hdev, chunk, &queue_type,
+                                               &is_kernel_allocated_cb);
+               if (rc) {
+                       hpriv->ctx->cs_counters.parsing_drop_cnt++;
+                       goto free_cs_object;
+               }
+
+               if (is_kernel_allocated_cb) {
+                       cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
+                       if (!cb) {
+                               hpriv->ctx->cs_counters.parsing_drop_cnt++;
+                               rc = -EINVAL;
+                               goto free_cs_object;
+                       }
+               } else {
+                       cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
+               }
+
+               if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
+                       int_queues_only = false;
+
+               job = hl_cs_allocate_job(hdev, queue_type,
+                                               is_kernel_allocated_cb);
+               if (!job) {
+                       hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
+                       dev_err(hdev->dev, "Failed to allocate a new job\n");
+                       rc = -ENOMEM;
+                       if (is_kernel_allocated_cb)
+                               goto release_cb;
+                       else
+                               goto free_cs_object;
+               }
+
+               job->id = i + 1;
+               job->cs = cs;
+               job->user_cb = cb;
+               job->user_cb_size = chunk->cb_size;
+               job->hw_queue_id = chunk->queue_index;
+
+               cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+               list_add_tail(&job->cs_node, &cs->job_list);
+
+               /*
+                * Increment CS reference. When CS reference is 0, CS is
+                * done and can be signaled to user and free all its resources
+                * Only increment for JOB on external or H/W queues, because
+                * only for those JOBs we get completion
+                */
+               if (job->queue_type == QUEUE_TYPE_EXT ||
+                               job->queue_type == QUEUE_TYPE_HW)
+                       cs_get(cs);
+
+               hl_debugfs_add_job(hdev, job);
+
+               rc = cs_parser(hpriv, job);
+               if (rc) {
+                       hpriv->ctx->cs_counters.parsing_drop_cnt++;
+                       dev_err(hdev->dev,
+                               "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
+                               cs->ctx->asid, cs->sequence, job->id, rc);
+                       goto free_cs_object;
+               }
+       }
+
+       if (int_queues_only) {
+               hpriv->ctx->cs_counters.parsing_drop_cnt++;
+               dev_err(hdev->dev,
+                       "Reject CS %d.%llu because only internal queues jobs are present\n",
+                       cs->ctx->asid, cs->sequence);
+               rc = -EINVAL;
+               goto free_cs_object;
+       }
+
+       rc = hl_hw_queue_schedule_cs(cs);
+       if (rc) {
+               if (rc != -EAGAIN)
+                       dev_err(hdev->dev,
+                               "Failed to submit CS %d.%llu to H/W queues, error %d\n",
+                               cs->ctx->asid, cs->sequence, rc);
+               goto free_cs_object;
+       }
+
+       rc = HL_CS_STATUS_SUCCESS;
+       goto put_cs;
+
+release_cb:
+       spin_lock(&cb->lock);
+       cb->cs_cnt--;
+       spin_unlock(&cb->lock);
+       hl_cb_put(cb);
+free_cs_object:
+       cs_rollback(hdev, cs);
+       *cs_seq = ULLONG_MAX;
+       /* The path below is both for good and erroneous exits */
+put_cs:
+       /* We finished with the CS in this function, so put the ref */
+       cs_put(cs);
+free_cs_chunk_array:
+       kfree(cs_chunk_array);
+out:
+       return rc;
+}
+
+static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
+                               void __user *chunks, u32 num_chunks,
+                               u64 *cs_seq)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ctx *ctx = hpriv->ctx;
+       struct hl_cs_chunk *cs_chunk_array, *chunk;
+       struct hw_queue_properties *hw_queue_prop;
+       struct dma_fence *sig_fence = NULL;
+       struct hl_cs_job *job;
+       struct hl_cs *cs;
+       struct hl_cb *cb;
+       enum hl_queue_type q_type;
+       u64 *signal_seq_arr = NULL, signal_seq;
+       u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
+       int rc;
+
+       *cs_seq = ULLONG_MAX;
+
+       if (num_chunks > HL_MAX_JOBS_PER_CS) {
+               dev_err(hdev->dev,
+                       "Number of chunks can NOT be larger than %d\n",
+                       HL_MAX_JOBS_PER_CS);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+                                       GFP_ATOMIC);
+       if (!cs_chunk_array) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
+       if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+               dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
+               rc = -EFAULT;
+               goto free_cs_chunk_array;
+       }
+
+       /* currently it is guaranteed to have only one chunk */
+       chunk = &cs_chunk_array[0];
+       q_idx = chunk->queue_index;
+       hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+       q_type = hw_queue_prop->type;
+
+       if ((q_idx >= hdev->asic_prop.max_queues) ||
+                       (!hw_queue_prop->supports_sync_stream)) {
+               dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
+               rc = -EINVAL;
+               goto free_cs_chunk_array;
+       }
+
+       if (cs_type == CS_TYPE_WAIT) {
+               struct hl_cs_compl *sig_waitcs_cmpl;
+
+               signal_seq_arr_len = chunk->num_signal_seq_arr;
+
+               /* currently only one signal seq is supported */
+               if (signal_seq_arr_len != 1) {
+                       dev_err(hdev->dev,
+                               "Wait for signal CS supports only one signal CS seq\n");
+                       rc = -EINVAL;
+                       goto free_cs_chunk_array;
+               }
+
+               signal_seq_arr = kmalloc_array(signal_seq_arr_len,
+                                               sizeof(*signal_seq_arr),
+                                               GFP_ATOMIC);
+               if (!signal_seq_arr) {
+                       rc = -ENOMEM;
+                       goto free_cs_chunk_array;
+               }
+
+               size_to_copy = chunk->num_signal_seq_arr *
+                               sizeof(*signal_seq_arr);
+               if (copy_from_user(signal_seq_arr,
+                                       u64_to_user_ptr(chunk->signal_seq_arr),
+                                       size_to_copy)) {
+                       dev_err(hdev->dev,
+                               "Failed to copy signal seq array from user\n");
+                       rc = -EFAULT;
+                       goto free_signal_seq_array;
+               }
+
+               /* currently it is guaranteed to have only one signal seq */
+               signal_seq = signal_seq_arr[0];
+               sig_fence = hl_ctx_get_fence(ctx, signal_seq);
+               if (IS_ERR(sig_fence)) {
+                       dev_err(hdev->dev,
+                               "Failed to get signal CS with seq 0x%llx\n",
+                               signal_seq);
+                       rc = PTR_ERR(sig_fence);
+                       goto free_signal_seq_array;
+               }
+
+               if (!sig_fence) {
+                       /* signal CS already finished */
+                       rc = 0;
+                       goto free_signal_seq_array;
+               }
+
+               sig_waitcs_cmpl =
+                       container_of(sig_fence, struct hl_cs_compl, base_fence);
+
+               if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+                       dev_err(hdev->dev,
+                               "CS seq 0x%llx is not of a signal CS\n",
+                               signal_seq);
+                       dma_fence_put(sig_fence);
+                       rc = -EINVAL;
+                       goto free_signal_seq_array;
+               }
+
+               if (dma_fence_is_signaled(sig_fence)) {
+                       /* signal CS already finished */
+                       dma_fence_put(sig_fence);
+                       rc = 0;
+                       goto free_signal_seq_array;
+               }
+       }
+
+       /* increment refcnt for context */
+       hl_ctx_get(hdev, ctx);
+
+       rc = allocate_cs(hdev, ctx, cs_type, &cs);
+       if (rc) {
+               if (cs_type == CS_TYPE_WAIT)
+                       dma_fence_put(sig_fence);
+               hl_ctx_put(ctx);
+               goto free_signal_seq_array;
+       }
+
+       /*
+        * Save the signal CS fence for later initialization right before
+        * hanging the wait CS on the queue.
+        */
+       if (cs->type == CS_TYPE_WAIT)
+               cs->signal_fence = sig_fence;
+
+       hl_debugfs_add_cs(cs);
+
+       *cs_seq = cs->sequence;
+
+       job = hl_cs_allocate_job(hdev, q_type, true);
+       if (!job) {
+               ctx->cs_counters.out_of_mem_drop_cnt++;
+               dev_err(hdev->dev, "Failed to allocate a new job\n");
+               rc = -ENOMEM;
+               goto put_cs;
+       }
+
+       cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+       if (!cb) {
+               ctx->cs_counters.out_of_mem_drop_cnt++;
+               kfree(job);
+               rc = -EFAULT;
+               goto put_cs;
+       }
+
+       if (cs->type == CS_TYPE_WAIT)
+               cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+       else
+               cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+       job->id = 0;
+       job->cs = cs;
+       job->user_cb = cb;
+       job->user_cb->cs_cnt++;
+       job->user_cb_size = cb_size;
+       job->hw_queue_id = q_idx;
+
+       /*
+        * No need in parsing, user CB is the patched CB.
+        * We call hl_cb_destroy() out of two reasons - we don't need the CB in
+        * the CB idr anymore and to decrement its refcount as it was
+        * incremented inside hl_cb_kernel_create().
+        */
+       job->patched_cb = job->user_cb;
+       job->job_cb_size = job->user_cb_size;
+       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+       cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+       list_add_tail(&job->cs_node, &cs->job_list);
+
+       /* increment refcount as for external queues we get completion */
+       cs_get(cs);
+
+       hl_debugfs_add_job(hdev, job);
+
+       rc = hl_hw_queue_schedule_cs(cs);
+       if (rc) {
+               if (rc != -EAGAIN)
+                       dev_err(hdev->dev,
+                               "Failed to submit CS %d.%llu to H/W queues, error %d\n",
+                               ctx->asid, cs->sequence, rc);
+               goto free_cs_object;
+       }
+
+       rc = HL_CS_STATUS_SUCCESS;
+       goto put_cs;
+
+free_cs_object:
+       cs_rollback(hdev, cs);
+       *cs_seq = ULLONG_MAX;
+       /* The path below is both for good and erroneous exits */
+put_cs:
+       /* We finished with the CS in this function, so put the ref */
+       cs_put(cs);
+free_signal_seq_array:
+       if (cs_type == CS_TYPE_WAIT)
+               kfree(signal_seq_arr);
+free_cs_chunk_array:
+       kfree(cs_chunk_array);
+out:
+       return rc;
+}
+
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       union hl_cs_args *args = data;
+       struct hl_ctx *ctx = hpriv->ctx;
+       void __user *chunks_execute, *chunks_restore;
+       enum hl_cs_type cs_type;
+       u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
+       u64 cs_seq = ULONG_MAX;
+       int rc, do_ctx_switch;
+       bool need_soft_reset = false;
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_warn_ratelimited(hdev->dev,
+                       "Device is %s. Can't submit new CS\n",
+                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+               rc = -EBUSY;
+               goto out;
+       }
+
+       sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
+
+       if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
+               dev_err(hdev->dev,
+                       "Signal and wait CS flags are mutually exclusive, context %d\n",
+               ctx->asid);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
+                       (!hdev->supports_sync_stream))) {
+               dev_err(hdev->dev, "Sync stream CS is not supported\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
+               cs_type = CS_TYPE_SIGNAL;
+       else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
+               cs_type = CS_TYPE_WAIT;
+       else
+               cs_type = CS_TYPE_DEFAULT;
+
+       chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
+       num_chunks_execute = args->in.num_chunks_execute;
+
+       if (cs_type == CS_TYPE_DEFAULT) {
+               if (!num_chunks_execute) {
+                       dev_err(hdev->dev,
+                               "Got execute CS with 0 chunks, context %d\n",
+                               ctx->asid);
+                       rc = -EINVAL;
+                       goto out;
+               }
+       } else if (num_chunks_execute != 1) {
+               dev_err(hdev->dev,
+                       "Sync stream CS mandates one chunk only, context %d\n",
+                       ctx->asid);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+
+       if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+               long ret;
+
+               chunks_restore =
+                       (void __user *) (uintptr_t) args->in.chunks_restore;
+               num_chunks_restore = args->in.num_chunks_restore;
+
+               mutex_lock(&hpriv->restore_phase_mutex);
+
+               if (do_ctx_switch) {
+                       rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
+                       if (rc) {
+                               dev_err_ratelimited(hdev->dev,
+                                       "Failed to switch to context %d, rejecting CS! %d\n",
+                                       ctx->asid, rc);
+                               /*
+                                * If we timedout, or if the device is not IDLE
+                                * while we want to do context-switch (-EBUSY),
+                                * we need to soft-reset because QMAN is
+                                * probably stuck. However, we can't call to
+                                * reset here directly because of deadlock, so
+                                * need to do it at the very end of this
+                                * function
+                                */
+                               if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
+                                       need_soft_reset = true;
+                               mutex_unlock(&hpriv->restore_phase_mutex);
+                               goto out;
+                       }
+               }
+
+               hdev->asic_funcs->restore_phase_topology(hdev);
+
+               if (!num_chunks_restore) {
+                       dev_dbg(hdev->dev,
+                       "Need to run restore phase but restore CS is empty\n");
+                       rc = 0;
+               } else {
+                       rc = cs_ioctl_default(hpriv, chunks_restore,
+                                               num_chunks_restore, &cs_seq);
+               }
+
+               mutex_unlock(&hpriv->restore_phase_mutex);
+
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed to submit restore CS for context %d (%d)\n",
+                               ctx->asid, rc);
+                       goto out;
+               }
+
+               /* Need to wait for restore completion before execution phase */
+               if (num_chunks_restore) {
+                       ret = _hl_cs_wait_ioctl(hdev, ctx,
+                                       jiffies_to_usecs(hdev->timeout_jiffies),
+                                       cs_seq);
+                       if (ret <= 0) {
+                               dev_err(hdev->dev,
+                                       "Restore CS for context %d failed to complete %ld\n",
+                                       ctx->asid, ret);
+                               rc = -ENOEXEC;
+                               goto out;
+                       }
+               }
+
+               ctx->thread_ctx_switch_wait_token = 1;
+       } else if (!ctx->thread_ctx_switch_wait_token) {
+               u32 tmp;
+
+               rc = hl_poll_timeout_memory(hdev,
+                       &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+                       100, jiffies_to_usecs(hdev->timeout_jiffies), false);
+
+               if (rc == -ETIMEDOUT) {
+                       dev_err(hdev->dev,
+                               "context switch phase timeout (%d)\n", tmp);
+                       goto out;
+               }
+       }
+
+       if (cs_type == CS_TYPE_DEFAULT)
+               rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
+                                       &cs_seq);
+       else
+               rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
+                                               num_chunks_execute, &cs_seq);
+
+out:
+       if (rc != -EAGAIN) {
+               memset(args, 0, sizeof(*args));
+               args->out.status = rc;
+               args->out.seq = cs_seq;
+       }
+
+       if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
+               hl_device_reset(hdev, false, false);
+
+       return rc;
+}
+
+static long _hl_cs_wait_ioctl(struct hl_device *hdev,
+               struct hl_ctx *ctx, u64 timeout_us, u64 seq)
+{
+       struct dma_fence *fence;
+       unsigned long timeout;
+       long rc;
+
+       if (timeout_us == MAX_SCHEDULE_TIMEOUT)
+               timeout = timeout_us;
+       else
+               timeout = usecs_to_jiffies(timeout_us);
+
+       hl_ctx_get(hdev, ctx);
+
+       fence = hl_ctx_get_fence(ctx, seq);
+       if (IS_ERR(fence)) {
+               rc = PTR_ERR(fence);
+               if (rc == -EINVAL)
+                       dev_notice_ratelimited(hdev->dev,
+                               "Can't wait on CS %llu because current CS is at seq %llu\n",
+                               seq, ctx->cs_sequence);
+       } else if (fence) {
+               rc = dma_fence_wait_timeout(fence, true, timeout);
+               if (fence->error == -ETIMEDOUT)
+                       rc = -ETIMEDOUT;
+               else if (fence->error == -EIO)
+                       rc = -EIO;
+               dma_fence_put(fence);
+       } else {
+               dev_dbg(hdev->dev,
+                       "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
+                       seq, ctx->cs_sequence);
+               rc = 1;
+       }
+
+       hl_ctx_put(ctx);
+
+       return rc;
+}
+
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       union hl_wait_cs_args *args = data;
+       u64 seq = args->in.seq;
+       long rc;
+
+       rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
+
+       memset(args, 0, sizeof(*args));
+
+       if (rc < 0) {
+               if (rc == -ERESTARTSYS) {
+                       dev_err_ratelimited(hdev->dev,
+                               "user process got signal while waiting for CS handle %llu\n",
+                               seq);
+                       args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
+                       rc = -EINTR;
+               } else if (rc == -ETIMEDOUT) {
+                       dev_err_ratelimited(hdev->dev,
+                               "CS %llu has timed-out while user process is waiting for it\n",
+                               seq);
+                       args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
+               } else if (rc == -EIO) {
+                       dev_err_ratelimited(hdev->dev,
+                               "CS %llu has been aborted while user process is waiting for it\n",
+                               seq);
+                       args->out.status = HL_WAIT_CS_STATUS_ABORTED;
+               }
+               return rc;
+       }
+
+       if (rc == 0)
+               args->out.status = HL_WAIT_CS_STATUS_BUSY;
+       else
+               args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+
+       return 0;
+}
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
new file mode 100644 (file)
index 0000000..1e3e5b1
--- /dev/null
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+static void hl_ctx_fini(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+       int i;
+
+       /*
+        * If we arrived here, there are no jobs waiting for this context
+        * on its queues so we can safely remove it.
+        * This is because for each CS, we increment the ref count and for
+        * every CS that was finished we decrement it and we won't arrive
+        * to this function unless the ref count is 0
+        */
+
+       for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
+               dma_fence_put(ctx->cs_pending[i]);
+
+       kfree(ctx->cs_pending);
+
+       if (ctx->asid != HL_KERNEL_ASID_ID) {
+               /* The engines are stopped as there is no executing CS, but the
+                * Coresight might be still working by accessing addresses
+                * related to the stopped engines. Hence stop it explicitly.
+                * Stop only if this is the compute context, as there can be
+                * only one compute context
+                */
+               if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
+                       hl_device_set_debug_mode(hdev, false);
+
+               hl_vm_ctx_fini(ctx);
+               hl_asid_free(hdev, ctx->asid);
+       } else {
+               hl_mmu_ctx_fini(ctx);
+       }
+}
+
+void hl_ctx_do_release(struct kref *ref)
+{
+       struct hl_ctx *ctx;
+
+       ctx = container_of(ref, struct hl_ctx, refcount);
+
+       hl_ctx_fini(ctx);
+
+       if (ctx->hpriv)
+               hl_hpriv_put(ctx->hpriv);
+
+       kfree(ctx);
+}
+
+int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
+{
+       struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
+       struct hl_ctx *ctx;
+       int rc;
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
+
+       mutex_lock(&mgr->ctx_lock);
+       rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
+       mutex_unlock(&mgr->ctx_lock);
+
+       if (rc < 0) {
+               dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
+               goto free_ctx;
+       }
+
+       ctx->handle = rc;
+
+       rc = hl_ctx_init(hdev, ctx, false);
+       if (rc)
+               goto remove_from_idr;
+
+       hl_hpriv_get(hpriv);
+       ctx->hpriv = hpriv;
+
+       /* TODO: remove for multiple contexts per process */
+       hpriv->ctx = ctx;
+
+       /* TODO: remove the following line for multiple process support */
+       hdev->compute_ctx = ctx;
+
+       return 0;
+
+remove_from_idr:
+       mutex_lock(&mgr->ctx_lock);
+       idr_remove(&mgr->ctx_handles, ctx->handle);
+       mutex_unlock(&mgr->ctx_lock);
+free_ctx:
+       kfree(ctx);
+out_err:
+       return rc;
+}
+
+void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+       if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
+               return;
+
+       dev_warn(hdev->dev,
+               "user process released device but its command submissions are still executing\n");
+}
+
+int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
+{
+       int rc = 0;
+
+       ctx->hdev = hdev;
+
+       kref_init(&ctx->refcount);
+
+       ctx->cs_sequence = 1;
+       spin_lock_init(&ctx->cs_lock);
+       atomic_set(&ctx->thread_ctx_switch_token, 1);
+       ctx->thread_ctx_switch_wait_token = 0;
+       ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+                               sizeof(struct dma_fence *),
+                               GFP_KERNEL);
+       if (!ctx->cs_pending)
+               return -ENOMEM;
+
+       if (is_kernel_ctx) {
+               ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
+               rc = hl_mmu_ctx_init(ctx);
+               if (rc) {
+                       dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+                       goto mem_ctx_err;
+               }
+       } else {
+               ctx->asid = hl_asid_alloc(hdev);
+               if (!ctx->asid) {
+                       dev_err(hdev->dev, "No free ASID, failed to create context\n");
+                       return -ENOMEM;
+               }
+
+               rc = hl_vm_ctx_init(ctx);
+               if (rc) {
+                       dev_err(hdev->dev, "Failed to init mem ctx module\n");
+                       rc = -ENOMEM;
+                       goto mem_ctx_err;
+               }
+       }
+
+       return 0;
+
+mem_ctx_err:
+       if (ctx->asid != HL_KERNEL_ASID_ID)
+               hl_asid_free(hdev, ctx->asid);
+
+       return rc;
+}
+
+void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+       kref_get(&ctx->refcount);
+}
+
+int hl_ctx_put(struct hl_ctx *ctx)
+{
+       return kref_put(&ctx->refcount, hl_ctx_do_release);
+}
+
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+{
+       struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
+       struct dma_fence *fence;
+
+       spin_lock(&ctx->cs_lock);
+
+       if (seq >= ctx->cs_sequence) {
+               spin_unlock(&ctx->cs_lock);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
+               spin_unlock(&ctx->cs_lock);
+               return NULL;
+       }
+
+       fence = dma_fence_get(
+                       ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
+       spin_unlock(&ctx->cs_lock);
+
+       return fence;
+}
+
+/*
+ * hl_ctx_mgr_init - initialize the context manager
+ *
+ * @mgr: pointer to context manager structure
+ *
+ * This manager is an object inside the hpriv object of the user process.
+ * The function is called when a user process opens the FD.
+ */
+void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
+{
+       mutex_init(&mgr->ctx_lock);
+       idr_init(&mgr->ctx_handles);
+}
+
+/*
+ * hl_ctx_mgr_fini - finalize the context manager
+ *
+ * @hdev: pointer to device structure
+ * @mgr: pointer to context manager structure
+ *
+ * This function goes over all the contexts in the manager and frees them.
+ * It is called when a process closes the FD.
+ */
+void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
+{
+       struct hl_ctx *ctx;
+       struct idr *idp;
+       u32 id;
+
+       idp = &mgr->ctx_handles;
+
+       idr_for_each_entry(idp, ctx, id)
+               hl_ctx_free(hdev, ctx);
+
+       idr_destroy(&mgr->ctx_handles);
+       mutex_destroy(&mgr->ctx_lock);
+}
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
new file mode 100644 (file)
index 0000000..fc4372c
--- /dev/null
@@ -0,0 +1,1411 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#define MMU_ADDR_BUF_SIZE      40
+#define MMU_ASID_BUF_SIZE      10
+#define MMU_KBUF_SIZE          (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
+
+static struct dentry *hl_debug_root;
+
+static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
+                               u8 i2c_reg, u32 *val)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               return -EBUSY;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.i2c_bus = i2c_bus;
+       pkt.i2c_addr = i2c_addr;
+       pkt.i2c_reg = i2c_reg;
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       HL_DEVICE_TIMEOUT_USEC, (long *) val);
+
+       if (rc)
+               dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
+
+       return rc;
+}
+
+static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
+                               u8 i2c_reg, u32 val)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               return -EBUSY;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.i2c_bus = i2c_bus;
+       pkt.i2c_addr = i2c_addr;
+       pkt.i2c_reg = i2c_reg;
+       pkt.value = cpu_to_le64(val);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       HL_DEVICE_TIMEOUT_USEC, NULL);
+
+       if (rc)
+               dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
+
+       return rc;
+}
+
+static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               return;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.led_index = cpu_to_le32(led);
+       pkt.value = cpu_to_le64(state);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                               HL_DEVICE_TIMEOUT_USEC, NULL);
+
+       if (rc)
+               dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
+}
+
+static int command_buffers_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_cb *cb;
+       bool first = true;
+
+       spin_lock(&dev_entry->cb_spinlock);
+
+       list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
+               if (first) {
+                       first = false;
+                       seq_puts(s, "\n");
+                       seq_puts(s, " CB ID   CTX ID   CB size    CB RefCnt    mmap?   CS counter\n");
+                       seq_puts(s, "---------------------------------------------------------------\n");
+               }
+               seq_printf(s,
+                       "   %03d        %d    0x%08x      %d          %d          %d\n",
+                       cb->id, cb->ctx_id, cb->size,
+                       kref_read(&cb->refcount),
+                       cb->mmap, cb->cs_cnt);
+       }
+
+       spin_unlock(&dev_entry->cb_spinlock);
+
+       if (!first)
+               seq_puts(s, "\n");
+
+       return 0;
+}
+
+static int command_submission_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_cs *cs;
+       bool first = true;
+
+       spin_lock(&dev_entry->cs_spinlock);
+
+       list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
+               if (first) {
+                       first = false;
+                       seq_puts(s, "\n");
+                       seq_puts(s, " CS ID   CTX ASID   CS RefCnt   Submitted    Completed\n");
+                       seq_puts(s, "------------------------------------------------------\n");
+               }
+               seq_printf(s,
+                       "   %llu       %d          %d           %d            %d\n",
+                       cs->sequence, cs->ctx->asid,
+                       kref_read(&cs->refcount),
+                       cs->submitted, cs->completed);
+       }
+
+       spin_unlock(&dev_entry->cs_spinlock);
+
+       if (!first)
+               seq_puts(s, "\n");
+
+       return 0;
+}
+
+static int command_submission_jobs_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_cs_job *job;
+       bool first = true;
+
+       spin_lock(&dev_entry->cs_job_spinlock);
+
+       list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
+               if (first) {
+                       first = false;
+                       seq_puts(s, "\n");
+                       seq_puts(s, " JOB ID   CS ID    CTX ASID   H/W Queue\n");
+                       seq_puts(s, "---------------------------------------\n");
+               }
+               if (job->cs)
+                       seq_printf(s,
+                               "    %02d       %llu         %d         %d\n",
+                               job->id, job->cs->sequence, job->cs->ctx->asid,
+                               job->hw_queue_id);
+               else
+                       seq_printf(s,
+                               "    %02d       0         %d         %d\n",
+                               job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
+       }
+
+       spin_unlock(&dev_entry->cs_job_spinlock);
+
+       if (!first)
+               seq_puts(s, "\n");
+
+       return 0;
+}
+
+static int userptr_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_userptr *userptr;
+       char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+                               "DMA_FROM_DEVICE", "DMA_NONE"};
+       bool first = true;
+
+       spin_lock(&dev_entry->userptr_spinlock);
+
+       list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
+               if (first) {
+                       first = false;
+                       seq_puts(s, "\n");
+                       seq_puts(s, " user virtual address     size             dma dir\n");
+                       seq_puts(s, "----------------------------------------------------------\n");
+               }
+               seq_printf(s,
+                       "    0x%-14llx      %-10u    %-30s\n",
+                       userptr->addr, userptr->size, dma_dir[userptr->dir]);
+       }
+
+       spin_unlock(&dev_entry->userptr_spinlock);
+
+       if (!first)
+               seq_puts(s, "\n");
+
+       return 0;
+}
+
+static int vm_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_ctx *ctx;
+       struct hl_vm *vm;
+       struct hl_vm_hash_node *hnode;
+       struct hl_userptr *userptr;
+       struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+       enum vm_type_t *vm_type;
+       bool once = true;
+       u64 j;
+       int i;
+
+       if (!dev_entry->hdev->mmu_enable)
+               return 0;
+
+       spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+
+       list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
+               once = false;
+               seq_puts(s, "\n\n----------------------------------------------------");
+               seq_puts(s, "\n----------------------------------------------------\n\n");
+               seq_printf(s, "ctx asid: %u\n", ctx->asid);
+
+               seq_puts(s, "\nmappings:\n\n");
+               seq_puts(s, "    virtual address        size          handle\n");
+               seq_puts(s, "----------------------------------------------------\n");
+               mutex_lock(&ctx->mem_hash_lock);
+               hash_for_each(ctx->mem_hash, i, hnode, node) {
+                       vm_type = hnode->ptr;
+
+                       if (*vm_type == VM_TYPE_USERPTR) {
+                               userptr = hnode->ptr;
+                               seq_printf(s,
+                                       "    0x%-14llx      %-10u\n",
+                                       hnode->vaddr, userptr->size);
+                       } else {
+                               phys_pg_pack = hnode->ptr;
+                               seq_printf(s,
+                                       "    0x%-14llx      %-10llu       %-4u\n",
+                                       hnode->vaddr, phys_pg_pack->total_size,
+                                       phys_pg_pack->handle);
+                       }
+               }
+               mutex_unlock(&ctx->mem_hash_lock);
+
+               vm = &ctx->hdev->vm;
+               spin_lock(&vm->idr_lock);
+
+               if (!idr_is_empty(&vm->phys_pg_pack_handles))
+                       seq_puts(s, "\n\nallocations:\n");
+
+               idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
+                       if (phys_pg_pack->asid != ctx->asid)
+                               continue;
+
+                       seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
+                       seq_printf(s, "page size: %u\n\n",
+                                               phys_pg_pack->page_size);
+                       seq_puts(s, "   physical address\n");
+                       seq_puts(s, "---------------------\n");
+                       for (j = 0 ; j < phys_pg_pack->npages ; j++) {
+                               seq_printf(s, "    0x%-14llx\n",
+                                               phys_pg_pack->pages[j]);
+                       }
+               }
+               spin_unlock(&vm->idr_lock);
+
+       }
+
+       spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+
+       if (!once)
+               seq_puts(s, "\n");
+
+       return 0;
+}
+
+/* these inline functions are copied from mmu.c */
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+       return ctx->hdev->asic_prop.mmu_pgt_addr +
+                       (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+                                       u64 virt_addr, u64 mask, u64 shift)
+{
+       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+                       ((virt_addr & mask) >> shift);
+}
+
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
+                                       mmu_specs->hop0_shift);
+}
+
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
+                                       mmu_specs->hop1_shift);
+}
+
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
+                                       mmu_specs->hop2_shift);
+}
+
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
+                                       mmu_specs->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
+                                       mmu_specs->hop4_shift);
+}
+
+static inline u64 get_next_hop_addr(u64 curr_pte)
+{
+       if (curr_pte & PAGE_PRESENT_MASK)
+               return curr_pte & HOP_PHYS_ADDR_MASK;
+       else
+               return ULLONG_MAX;
+}
+
+static int mmu_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_device *hdev = dev_entry->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
+       struct hl_ctx *ctx;
+       bool is_dram_addr;
+
+       u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
+               hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
+               hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
+               hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
+               hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
+               virt_addr = dev_entry->mmu_addr;
+
+       if (!hdev->mmu_enable)
+               return 0;
+
+       if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
+               ctx = hdev->kernel_ctx;
+       else
+               ctx = hdev->compute_ctx;
+
+       if (!ctx) {
+               dev_err(hdev->dev, "no ctx available\n");
+               return 0;
+       }
+
+       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                                               prop->dmmu.start_addr,
+                                               prop->dmmu.end_addr);
+
+       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+       mutex_lock(&ctx->mmu_lock);
+
+       /* the following lookup is copied from unmap() in mmu.c */
+
+       hop0_addr = get_hop0_addr(ctx);
+       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+       hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+       hop1_addr = get_next_hop_addr(hop0_pte);
+
+       if (hop1_addr == ULLONG_MAX)
+               goto not_mapped;
+
+       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+       hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+       hop2_addr = get_next_hop_addr(hop1_pte);
+
+       if (hop2_addr == ULLONG_MAX)
+               goto not_mapped;
+
+       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+       hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+       hop3_addr = get_next_hop_addr(hop2_pte);
+
+       if (hop3_addr == ULLONG_MAX)
+               goto not_mapped;
+
+       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+       hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+
+       if (!(hop3_pte & LAST_MASK)) {
+               hop4_addr = get_next_hop_addr(hop3_pte);
+
+               if (hop4_addr == ULLONG_MAX)
+                       goto not_mapped;
+
+               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+                                                       virt_addr);
+               hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+               if (!(hop4_pte & PAGE_PRESENT_MASK))
+                       goto not_mapped;
+       } else {
+               if (!(hop3_pte & PAGE_PRESENT_MASK))
+                       goto not_mapped;
+       }
+
+       seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
+                       dev_entry->mmu_asid, dev_entry->mmu_addr);
+
+       seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
+       seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
+       seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
+
+       seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
+       seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
+       seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
+
+       seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
+       seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
+       seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
+
+       seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
+       seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
+       seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
+
+       if (!(hop3_pte & LAST_MASK)) {
+               seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
+               seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
+               seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
+       }
+
+       goto out;
+
+not_mapped:
+       dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+                       virt_addr);
+out:
+       mutex_unlock(&ctx->mmu_lock);
+
+       return 0;
+}
+
+static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
+               size_t count, loff_t *f_pos)
+{
+       struct seq_file *s = file->private_data;
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_device *hdev = dev_entry->hdev;
+       char kbuf[MMU_KBUF_SIZE];
+       char *c;
+       ssize_t rc;
+
+       if (!hdev->mmu_enable)
+               return count;
+
+       if (count > sizeof(kbuf) - 1)
+               goto err;
+       if (copy_from_user(kbuf, buf, count))
+               goto err;
+       kbuf[count] = 0;
+
+       c = strchr(kbuf, ' ');
+       if (!c)
+               goto err;
+       *c = '\0';
+
+       rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
+       if (rc)
+               goto err;
+
+       if (strncmp(c+1, "0x", 2))
+               goto err;
+       rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
+       if (rc)
+               goto err;
+
+       return count;
+
+err:
+       dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
+
+       return -EINVAL;
+}
+
+static int engines_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct hl_device *hdev = dev_entry->hdev;
+
+       if (atomic_read(&hdev->in_reset)) {
+               dev_warn_ratelimited(hdev->dev,
+                               "Can't check device idle during reset\n");
+               return 0;
+       }
+
+       hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+
+       return 0;
+}
+
+static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+       if (!hdev->mmu_enable)
+               goto out;
+
+       if (hdev->dram_supports_virtual_memory &&
+               (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
+               return true;
+
+       if (addr >= prop->pmmu.start_addr &&
+               addr < prop->pmmu.end_addr)
+               return true;
+
+       if (addr >= prop->pmmu_huge.start_addr &&
+               addr < prop->pmmu_huge.end_addr)
+               return true;
+out:
+       return false;
+}
+
+static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
+                               u64 *phys_addr)
+{
+       struct hl_ctx *ctx = hdev->compute_ctx;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
+       u64 hop_addr, hop_pte_addr, hop_pte;
+       u64 offset_mask = HOP4_MASK | FLAGS_MASK;
+       int rc = 0;
+       bool is_dram_addr;
+
+       if (!ctx) {
+               dev_err(hdev->dev, "no ctx available\n");
+               return -EINVAL;
+       }
+
+       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                                               prop->dmmu.start_addr,
+                                               prop->dmmu.end_addr);
+
+       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+       mutex_lock(&ctx->mmu_lock);
+
+       /* hop 0 */
+       hop_addr = get_hop0_addr(ctx);
+       hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+       /* hop 1 */
+       hop_addr = get_next_hop_addr(hop_pte);
+       if (hop_addr == ULLONG_MAX)
+               goto not_mapped;
+       hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+       /* hop 2 */
+       hop_addr = get_next_hop_addr(hop_pte);
+       if (hop_addr == ULLONG_MAX)
+               goto not_mapped;
+       hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+       /* hop 3 */
+       hop_addr = get_next_hop_addr(hop_pte);
+       if (hop_addr == ULLONG_MAX)
+               goto not_mapped;
+       hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+       if (!(hop_pte & LAST_MASK)) {
+               /* hop 4 */
+               hop_addr = get_next_hop_addr(hop_pte);
+               if (hop_addr == ULLONG_MAX)
+                       goto not_mapped;
+               hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
+                                                       virt_addr);
+               hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+               offset_mask = FLAGS_MASK;
+       }
+
+       if (!(hop_pte & PAGE_PRESENT_MASK))
+               goto not_mapped;
+
+       *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
+
+       goto out;
+
+not_mapped:
+       dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+                       virt_addr);
+       rc = -EINVAL;
+out:
+       mutex_unlock(&ctx->mmu_lock);
+       return rc;
+}
+
+static ssize_t hl_data_read32(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[32];
+       u64 addr = entry->addr;
+       u32 val;
+       ssize_t rc;
+
+       if (atomic_read(&hdev->in_reset)) {
+               dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
+               return 0;
+       }
+
+       if (*ppos)
+               return 0;
+
+       if (hl_is_device_va(hdev, addr)) {
+               rc = device_va_to_pa(hdev, addr, &addr);
+               if (rc)
+                       return rc;
+       }
+
+       rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+               return rc;
+       }
+
+       sprintf(tmp_buf, "0x%08x\n", val);
+       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+                       strlen(tmp_buf));
+}
+
+static ssize_t hl_data_write32(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u64 addr = entry->addr;
+       u32 value;
+       ssize_t rc;
+
+       if (atomic_read(&hdev->in_reset)) {
+               dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
+               return 0;
+       }
+
+       rc = kstrtouint_from_user(buf, count, 16, &value);
+       if (rc)
+               return rc;
+
+       if (hl_is_device_va(hdev, addr)) {
+               rc = device_va_to_pa(hdev, addr, &addr);
+               if (rc)
+                       return rc;
+       }
+
+       rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
+                       value, addr);
+               return rc;
+       }
+
+       return count;
+}
+
+static ssize_t hl_data_read64(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[32];
+       u64 addr = entry->addr;
+       u64 val;
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       if (hl_is_device_va(hdev, addr)) {
+               rc = device_va_to_pa(hdev, addr, &addr);
+               if (rc)
+                       return rc;
+       }
+
+       rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+               return rc;
+       }
+
+       sprintf(tmp_buf, "0x%016llx\n", val);
+       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+                       strlen(tmp_buf));
+}
+
+static ssize_t hl_data_write64(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u64 addr = entry->addr;
+       u64 value;
+       ssize_t rc;
+
+       rc = kstrtoull_from_user(buf, count, 16, &value);
+       if (rc)
+               return rc;
+
+       if (hl_is_device_va(hdev, addr)) {
+               rc = device_va_to_pa(hdev, addr, &addr);
+               if (rc)
+                       return rc;
+       }
+
+       rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
+                       value, addr);
+               return rc;
+       }
+
+       return count;
+}
+
+static ssize_t hl_get_power_state(struct file *f, char __user *buf,
+               size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[200];
+       int i;
+
+       if (*ppos)
+               return 0;
+
+       if (hdev->pdev->current_state == PCI_D0)
+               i = 1;
+       else if (hdev->pdev->current_state == PCI_D3hot)
+               i = 2;
+       else
+               i = 3;
+
+       sprintf(tmp_buf,
+               "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
+       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+                       strlen(tmp_buf));
+}
+
+static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       if (value == 1) {
+               pci_set_power_state(hdev->pdev, PCI_D0);
+               pci_restore_state(hdev->pdev);
+               rc = pci_enable_device(hdev->pdev);
+       } else if (value == 2) {
+               pci_save_state(hdev->pdev);
+               pci_disable_device(hdev->pdev);
+               pci_set_power_state(hdev->pdev, PCI_D3hot);
+       } else {
+               dev_dbg(hdev->dev, "invalid power state value %u\n", value);
+               return -EINVAL;
+       }
+
+       return count;
+}
+
+static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[32];
+       u32 val;
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
+                       entry->i2c_reg, &val);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to read from I2C bus %d, addr %d, reg %d\n",
+                       entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+               return rc;
+       }
+
+       sprintf(tmp_buf, "0x%02x\n", val);
+       rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
+                       strlen(tmp_buf));
+
+       return rc;
+}
+
+static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       rc = kstrtouint_from_user(buf, count, 16, &value);
+       if (rc)
+               return rc;
+
+       rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
+                       entry->i2c_reg, value);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
+                       value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+               return rc;
+       }
+
+       return count;
+}
+
+static ssize_t hl_led0_write(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       value = value ? 1 : 0;
+
+       hl_debugfs_led_set(hdev, 0, value);
+
+       return count;
+}
+
+static ssize_t hl_led1_write(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       value = value ? 1 : 0;
+
+       hl_debugfs_led_set(hdev, 1, value);
+
+       return count;
+}
+
+static ssize_t hl_led2_write(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       value = value ? 1 : 0;
+
+       hl_debugfs_led_set(hdev, 2, value);
+
+       return count;
+}
+
+static ssize_t hl_device_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       static const char *help =
+               "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
+       return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
+}
+
+static ssize_t hl_device_write(struct file *f, const char __user *buf,
+                                    size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char data[30] = {0};
+
+       /* don't allow partial writes */
+       if (*ppos != 0)
+               return 0;
+
+       simple_write_to_buffer(data, 29, ppos, buf, count);
+
+       if (strncmp("disable", data, strlen("disable")) == 0) {
+               hdev->disabled = true;
+       } else if (strncmp("enable", data, strlen("enable")) == 0) {
+               hdev->disabled = false;
+       } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
+               hdev->asic_funcs->suspend(hdev);
+       } else if (strncmp("resume", data, strlen("resume")) == 0) {
+               hdev->asic_funcs->resume(hdev);
+       } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
+               hdev->device_cpu_disabled = true;
+       } else {
+               dev_err(hdev->dev,
+                       "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
+               count = -EINVAL;
+       }
+
+       return count;
+}
+
+static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[200];
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       sprintf(tmp_buf, "%d\n", hdev->clock_gating);
+       rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+                       strlen(tmp_buf) + 1);
+
+       return rc;
+}
+
+static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
+                                    size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       if (atomic_read(&hdev->in_reset)) {
+               dev_warn_ratelimited(hdev->dev,
+                               "Can't change clock gating during reset\n");
+               return 0;
+       }
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       if (value) {
+               hdev->clock_gating = 1;
+               if (hdev->asic_funcs->enable_clock_gating)
+                       hdev->asic_funcs->enable_clock_gating(hdev);
+       } else {
+               if (hdev->asic_funcs->disable_clock_gating)
+                       hdev->asic_funcs->disable_clock_gating(hdev);
+               hdev->clock_gating = 0;
+       }
+
+       return count;
+}
+
+static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[200];
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
+       rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+                       strlen(tmp_buf) + 1);
+
+       return rc;
+}
+
+static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
+                                    size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       if (atomic_read(&hdev->in_reset)) {
+               dev_warn_ratelimited(hdev->dev,
+                               "Can't change stop on error during reset\n");
+               return 0;
+       }
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       hdev->stop_on_err = value ? 1 : 0;
+
+       hl_device_reset(hdev, false, false);
+
+       return count;
+}
+
+static const struct file_operations hl_data32b_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_data_read32,
+       .write = hl_data_write32
+};
+
+static const struct file_operations hl_data64b_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_data_read64,
+       .write = hl_data_write64
+};
+
+static const struct file_operations hl_i2c_data_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_i2c_data_read,
+       .write = hl_i2c_data_write
+};
+
+static const struct file_operations hl_power_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_get_power_state,
+       .write = hl_set_power_state
+};
+
+static const struct file_operations hl_led0_fops = {
+       .owner = THIS_MODULE,
+       .write = hl_led0_write
+};
+
+static const struct file_operations hl_led1_fops = {
+       .owner = THIS_MODULE,
+       .write = hl_led1_write
+};
+
+static const struct file_operations hl_led2_fops = {
+       .owner = THIS_MODULE,
+       .write = hl_led2_write
+};
+
+static const struct file_operations hl_device_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_device_read,
+       .write = hl_device_write
+};
+
+static const struct file_operations hl_clk_gate_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_clk_gate_read,
+       .write = hl_clk_gate_write
+};
+
+static const struct file_operations hl_stop_on_err_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_stop_on_err_read,
+       .write = hl_stop_on_err_write
+};
+
+static const struct hl_info_list hl_debugfs_list[] = {
+       {"command_buffers", command_buffers_show, NULL},
+       {"command_submission", command_submission_show, NULL},
+       {"command_submission_jobs", command_submission_jobs_show, NULL},
+       {"userptr", userptr_show, NULL},
+       {"vm", vm_show, NULL},
+       {"mmu", mmu_show, mmu_asid_va_write},
+       {"engines", engines_show, NULL}
+};
+
+static int hl_debugfs_open(struct inode *inode, struct file *file)
+{
+       struct hl_debugfs_entry *node = inode->i_private;
+
+       return single_open(file, node->info_ent->show, node);
+}
+
+static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
+               size_t count, loff_t *f_pos)
+{
+       struct hl_debugfs_entry *node = file->f_inode->i_private;
+
+       if (node->info_ent->write)
+               return node->info_ent->write(file, buf, count, f_pos);
+       else
+               return -EINVAL;
+
+}
+
+static const struct file_operations hl_debugfs_fops = {
+       .owner = THIS_MODULE,
+       .open = hl_debugfs_open,
+       .read = seq_read,
+       .write = hl_debugfs_write,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+void hl_debugfs_add_device(struct hl_device *hdev)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+       int count = ARRAY_SIZE(hl_debugfs_list);
+       struct hl_debugfs_entry *entry;
+       struct dentry *ent;
+       int i;
+
+       dev_entry->hdev = hdev;
+       dev_entry->entry_arr = kmalloc_array(count,
+                                       sizeof(struct hl_debugfs_entry),
+                                       GFP_KERNEL);
+       if (!dev_entry->entry_arr)
+               return;
+
+       INIT_LIST_HEAD(&dev_entry->file_list);
+       INIT_LIST_HEAD(&dev_entry->cb_list);
+       INIT_LIST_HEAD(&dev_entry->cs_list);
+       INIT_LIST_HEAD(&dev_entry->cs_job_list);
+       INIT_LIST_HEAD(&dev_entry->userptr_list);
+       INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
+       mutex_init(&dev_entry->file_mutex);
+       spin_lock_init(&dev_entry->cb_spinlock);
+       spin_lock_init(&dev_entry->cs_spinlock);
+       spin_lock_init(&dev_entry->cs_job_spinlock);
+       spin_lock_init(&dev_entry->userptr_spinlock);
+       spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
+
+       dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
+                                               hl_debug_root);
+
+       debugfs_create_x64("addr",
+                               0644,
+                               dev_entry->root,
+                               &dev_entry->addr);
+
+       debugfs_create_file("data32",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_data32b_fops);
+
+       debugfs_create_file("data64",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_data64b_fops);
+
+       debugfs_create_file("set_power_state",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_power_fops);
+
+       debugfs_create_u8("i2c_bus",
+                               0644,
+                               dev_entry->root,
+                               &dev_entry->i2c_bus);
+
+       debugfs_create_u8("i2c_addr",
+                               0644,
+                               dev_entry->root,
+                               &dev_entry->i2c_addr);
+
+       debugfs_create_u8("i2c_reg",
+                               0644,
+                               dev_entry->root,
+                               &dev_entry->i2c_reg);
+
+       debugfs_create_file("i2c_data",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_i2c_data_fops);
+
+       debugfs_create_file("led0",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_led0_fops);
+
+       debugfs_create_file("led1",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_led1_fops);
+
+       debugfs_create_file("led2",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_led2_fops);
+
+       debugfs_create_file("device",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_device_fops);
+
+       debugfs_create_file("clk_gate",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_clk_gate_fops);
+
+       debugfs_create_file("stop_on_err",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_stop_on_err_fops);
+
+       for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+
+               ent = debugfs_create_file(hl_debugfs_list[i].name,
+                                       0444,
+                                       dev_entry->root,
+                                       entry,
+                                       &hl_debugfs_fops);
+               entry->dent = ent;
+               entry->info_ent = &hl_debugfs_list[i];
+               entry->dev_entry = dev_entry;
+       }
+}
+
+void hl_debugfs_remove_device(struct hl_device *hdev)
+{
+       struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
+
+       debugfs_remove_recursive(entry->root);
+
+       mutex_destroy(&entry->file_mutex);
+       kfree(entry->entry_arr);
+}
+
+void hl_debugfs_add_file(struct hl_fpriv *hpriv)
+{
+       struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
+
+       mutex_lock(&dev_entry->file_mutex);
+       list_add(&hpriv->debugfs_list, &dev_entry->file_list);
+       mutex_unlock(&dev_entry->file_mutex);
+}
+
+void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
+{
+       struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
+
+       mutex_lock(&dev_entry->file_mutex);
+       list_del(&hpriv->debugfs_list);
+       mutex_unlock(&dev_entry->file_mutex);
+}
+
+void hl_debugfs_add_cb(struct hl_cb *cb)
+{
+       struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->cb_spinlock);
+       list_add(&cb->debugfs_list, &dev_entry->cb_list);
+       spin_unlock(&dev_entry->cb_spinlock);
+}
+
+void hl_debugfs_remove_cb(struct hl_cb *cb)
+{
+       struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->cb_spinlock);
+       list_del(&cb->debugfs_list);
+       spin_unlock(&dev_entry->cb_spinlock);
+}
+
+void hl_debugfs_add_cs(struct hl_cs *cs)
+{
+       struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->cs_spinlock);
+       list_add(&cs->debugfs_list, &dev_entry->cs_list);
+       spin_unlock(&dev_entry->cs_spinlock);
+}
+
+void hl_debugfs_remove_cs(struct hl_cs *cs)
+{
+       struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->cs_spinlock);
+       list_del(&cs->debugfs_list);
+       spin_unlock(&dev_entry->cs_spinlock);
+}
+
+void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->cs_job_spinlock);
+       list_add(&job->debugfs_list, &dev_entry->cs_job_list);
+       spin_unlock(&dev_entry->cs_job_spinlock);
+}
+
+void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->cs_job_spinlock);
+       list_del(&job->debugfs_list);
+       spin_unlock(&dev_entry->cs_job_spinlock);
+}
+
+void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->userptr_spinlock);
+       list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
+       spin_unlock(&dev_entry->userptr_spinlock);
+}
+
+void hl_debugfs_remove_userptr(struct hl_device *hdev,
+                               struct hl_userptr *userptr)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->userptr_spinlock);
+       list_del(&userptr->debugfs_list);
+       spin_unlock(&dev_entry->userptr_spinlock);
+}
+
+void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+       list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
+       spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+}
+
+void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+       list_del(&ctx->debugfs_list);
+       spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+}
+
+void __init hl_debugfs_init(void)
+{
+       hl_debug_root = debugfs_create_dir("habanalabs", NULL);
+}
+
+void hl_debugfs_fini(void)
+{
+       debugfs_remove_recursive(hl_debug_root);
+}
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
new file mode 100644 (file)
index 0000000..84800ef
--- /dev/null
@@ -0,0 +1,1506 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#define pr_fmt(fmt)                    "habanalabs: " fmt
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+#include <linux/sched/signal.h>
+#include <linux/hwmon.h>
+#include <uapi/misc/habanalabs.h>
+
+#define HL_PLDM_PENDING_RESET_PER_SEC  (HL_PENDING_RESET_PER_SEC * 10)
+
+bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
+{
+       if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
+               return true;
+       else
+               return false;
+}
+
+enum hl_device_status hl_device_status(struct hl_device *hdev)
+{
+       enum hl_device_status status;
+
+       if (hdev->disabled)
+               status = HL_DEVICE_STATUS_MALFUNCTION;
+       else if (atomic_read(&hdev->in_reset))
+               status = HL_DEVICE_STATUS_IN_RESET;
+       else
+               status = HL_DEVICE_STATUS_OPERATIONAL;
+
+       return status;
+}
+
+static void hpriv_release(struct kref *ref)
+{
+       struct hl_fpriv *hpriv;
+       struct hl_device *hdev;
+
+       hpriv = container_of(ref, struct hl_fpriv, refcount);
+
+       hdev = hpriv->hdev;
+
+       put_pid(hpriv->taskpid);
+
+       hl_debugfs_remove_file(hpriv);
+
+       mutex_destroy(&hpriv->restore_phase_mutex);
+
+       mutex_lock(&hdev->fpriv_list_lock);
+       list_del(&hpriv->dev_node);
+       hdev->compute_ctx = NULL;
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       kfree(hpriv);
+}
+
+void hl_hpriv_get(struct hl_fpriv *hpriv)
+{
+       kref_get(&hpriv->refcount);
+}
+
+void hl_hpriv_put(struct hl_fpriv *hpriv)
+{
+       kref_put(&hpriv->refcount, hpriv_release);
+}
+
+/*
+ * hl_device_release - release function for habanalabs device
+ *
+ * @inode: pointer to inode structure
+ * @filp: pointer to file structure
+ *
+ * Called when process closes an habanalabs device
+ */
+static int hl_device_release(struct inode *inode, struct file *filp)
+{
+       struct hl_fpriv *hpriv = filp->private_data;
+
+       hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+       hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+
+       filp->private_data = NULL;
+
+       hl_hpriv_put(hpriv);
+
+       return 0;
+}
+
+static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
+{
+       struct hl_fpriv *hpriv = filp->private_data;
+       struct hl_device *hdev;
+
+       filp->private_data = NULL;
+
+       hdev = hpriv->hdev;
+
+       mutex_lock(&hdev->fpriv_list_lock);
+       list_del(&hpriv->dev_node);
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       kfree(hpriv);
+
+       return 0;
+}
+
+/*
+ * hl_mmap - mmap function for habanalabs device
+ *
+ * @*filp: pointer to file structure
+ * @*vma: pointer to vm_area_struct of the process
+ *
+ * Called when process does an mmap on habanalabs device. Call the device's mmap
+ * function at the end of the common code.
+ */
+static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+       struct hl_fpriv *hpriv = filp->private_data;
+
+       if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
+               vma->vm_pgoff ^= HL_MMAP_CB_MASK;
+               return hl_cb_mmap(hpriv, vma);
+       }
+
+       return -EINVAL;
+}
+
+static const struct file_operations hl_ops = {
+       .owner = THIS_MODULE,
+       .open = hl_device_open,
+       .release = hl_device_release,
+       .mmap = hl_mmap,
+       .unlocked_ioctl = hl_ioctl,
+       .compat_ioctl = hl_ioctl
+};
+
+static const struct file_operations hl_ctrl_ops = {
+       .owner = THIS_MODULE,
+       .open = hl_device_open_ctrl,
+       .release = hl_device_release_ctrl,
+       .unlocked_ioctl = hl_ioctl_control,
+       .compat_ioctl = hl_ioctl_control
+};
+
+static void device_release_func(struct device *dev)
+{
+       kfree(dev);
+}
+
+/*
+ * device_init_cdev - Initialize cdev and device for habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @hclass: pointer to the class object of the device
+ * @minor: minor number of the specific device
+ * @fpos: file operations to install for this device
+ * @name: name of the device as it will appear in the filesystem
+ * @cdev: pointer to the char device object that will be initialized
+ * @dev: pointer to the device object that will be initialized
+ *
+ * Initialize a cdev and a Linux device for habanalabs's device.
+ */
+static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
+                               int minor, const struct file_operations *fops,
+                               char *name, struct cdev *cdev,
+                               struct device **dev)
+{
+       cdev_init(cdev, fops);
+       cdev->owner = THIS_MODULE;
+
+       *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
+       if (!*dev)
+               return -ENOMEM;
+
+       device_initialize(*dev);
+       (*dev)->devt = MKDEV(hdev->major, minor);
+       (*dev)->class = hclass;
+       (*dev)->release = device_release_func;
+       dev_set_drvdata(*dev, hdev);
+       dev_set_name(*dev, "%s", name);
+
+       return 0;
+}
+
+static int device_cdev_sysfs_add(struct hl_device *hdev)
+{
+       int rc;
+
+       rc = cdev_device_add(&hdev->cdev, hdev->dev);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "failed to add a char device to the system\n");
+               return rc;
+       }
+
+       rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "failed to add a control char device to the system\n");
+               goto delete_cdev_device;
+       }
+
+       /* hl_sysfs_init() must be done after adding the device to the system */
+       rc = hl_sysfs_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "failed to initialize sysfs\n");
+               goto delete_ctrl_cdev_device;
+       }
+
+       hdev->cdev_sysfs_created = true;
+
+       return 0;
+
+delete_ctrl_cdev_device:
+       cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+delete_cdev_device:
+       cdev_device_del(&hdev->cdev, hdev->dev);
+       return rc;
+}
+
+static void device_cdev_sysfs_del(struct hl_device *hdev)
+{
+       /* device_release() won't be called so must free devices explicitly */
+       if (!hdev->cdev_sysfs_created) {
+               kfree(hdev->dev_ctrl);
+               kfree(hdev->dev);
+               return;
+       }
+
+       hl_sysfs_fini(hdev);
+       cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+       cdev_device_del(&hdev->cdev, hdev->dev);
+}
+
+/*
+ * device_early_init - do some early initialization for the habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Install the relevant function pointers and call the early_init function,
+ * if such a function exists
+ */
+static int device_early_init(struct hl_device *hdev)
+{
+       int i, rc;
+       char workq_name[32];
+
+       switch (hdev->asic_type) {
+       case ASIC_GOYA:
+               goya_set_asic_funcs(hdev);
+               strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
+               break;
+       case ASIC_GAUDI:
+               gaudi_set_asic_funcs(hdev);
+               sprintf(hdev->asic_name, "GAUDI");
+               break;
+       default:
+               dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
+                       hdev->asic_type);
+               return -EINVAL;
+       }
+
+       rc = hdev->asic_funcs->early_init(hdev);
+       if (rc)
+               return rc;
+
+       rc = hl_asid_init(hdev);
+       if (rc)
+               goto early_fini;
+
+       if (hdev->asic_prop.completion_queues_count) {
+               hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
+                               sizeof(*hdev->cq_wq),
+                               GFP_ATOMIC);
+               if (!hdev->cq_wq) {
+                       rc = -ENOMEM;
+                       goto asid_fini;
+               }
+       }
+
+       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
+               snprintf(workq_name, 32, "hl-free-jobs-%u", i);
+               hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
+               if (hdev->cq_wq == NULL) {
+                       dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+                       rc = -ENOMEM;
+                       goto free_cq_wq;
+               }
+       }
+
+       hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
+       if (hdev->eq_wq == NULL) {
+               dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
+               rc = -ENOMEM;
+               goto free_cq_wq;
+       }
+
+       hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
+                                       GFP_KERNEL);
+       if (!hdev->hl_chip_info) {
+               rc = -ENOMEM;
+               goto free_eq_wq;
+       }
+
+       hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
+                                       sizeof(struct hl_device_idle_busy_ts),
+                                       (GFP_KERNEL | __GFP_ZERO));
+       if (!hdev->idle_busy_ts_arr) {
+               rc = -ENOMEM;
+               goto free_chip_info;
+       }
+
+       hl_cb_mgr_init(&hdev->kernel_cb_mgr);
+
+       mutex_init(&hdev->send_cpu_message_lock);
+       mutex_init(&hdev->debug_lock);
+       mutex_init(&hdev->mmu_cache_lock);
+       INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
+       spin_lock_init(&hdev->hw_queues_mirror_lock);
+       INIT_LIST_HEAD(&hdev->fpriv_list);
+       mutex_init(&hdev->fpriv_list_lock);
+       atomic_set(&hdev->in_reset, 0);
+
+       return 0;
+
+free_chip_info:
+       kfree(hdev->hl_chip_info);
+free_eq_wq:
+       destroy_workqueue(hdev->eq_wq);
+free_cq_wq:
+       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+               if (hdev->cq_wq[i])
+                       destroy_workqueue(hdev->cq_wq[i]);
+       kfree(hdev->cq_wq);
+asid_fini:
+       hl_asid_fini(hdev);
+early_fini:
+       if (hdev->asic_funcs->early_fini)
+               hdev->asic_funcs->early_fini(hdev);
+
+       return rc;
+}
+
+/*
+ * device_early_fini - finalize all that was done in device_early_init
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ */
+static void device_early_fini(struct hl_device *hdev)
+{
+       int i;
+
+       mutex_destroy(&hdev->mmu_cache_lock);
+       mutex_destroy(&hdev->debug_lock);
+       mutex_destroy(&hdev->send_cpu_message_lock);
+
+       mutex_destroy(&hdev->fpriv_list_lock);
+
+       hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
+
+       kfree(hdev->idle_busy_ts_arr);
+       kfree(hdev->hl_chip_info);
+
+       destroy_workqueue(hdev->eq_wq);
+
+       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+               destroy_workqueue(hdev->cq_wq[i]);
+       kfree(hdev->cq_wq);
+
+       hl_asid_fini(hdev);
+
+       if (hdev->asic_funcs->early_fini)
+               hdev->asic_funcs->early_fini(hdev);
+}
+
+static void set_freq_to_low_job(struct work_struct *work)
+{
+       struct hl_device *hdev = container_of(work, struct hl_device,
+                                               work_freq.work);
+
+       mutex_lock(&hdev->fpriv_list_lock);
+
+       if (!hdev->compute_ctx)
+               hl_device_set_frequency(hdev, PLL_LOW);
+
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       schedule_delayed_work(&hdev->work_freq,
+                       usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+}
+
+static void hl_device_heartbeat(struct work_struct *work)
+{
+       struct hl_device *hdev = container_of(work, struct hl_device,
+                                               work_heartbeat.work);
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               goto reschedule;
+
+       if (!hdev->asic_funcs->send_heartbeat(hdev))
+               goto reschedule;
+
+       dev_err(hdev->dev, "Device heartbeat failed!\n");
+       hl_device_reset(hdev, true, false);
+
+       return;
+
+reschedule:
+       schedule_delayed_work(&hdev->work_heartbeat,
+                       usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+}
+
+/*
+ * device_late_init - do late stuff initialization for the habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Do stuff that either needs the device H/W queues to be active or needs
+ * to happen after all the rest of the initialization is finished
+ */
+static int device_late_init(struct hl_device *hdev)
+{
+       int rc;
+
+       if (hdev->asic_funcs->late_init) {
+               rc = hdev->asic_funcs->late_init(hdev);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "failed late initialization for the H/W\n");
+                       return rc;
+               }
+       }
+
+       hdev->high_pll = hdev->asic_prop.high_pll;
+
+       /* force setting to low frequency */
+       hdev->curr_pll_profile = PLL_LOW;
+
+       if (hdev->pm_mng_profile == PM_AUTO)
+               hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
+       else
+               hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
+
+       INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
+       schedule_delayed_work(&hdev->work_freq,
+       usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+
+       if (hdev->heartbeat) {
+               INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+               schedule_delayed_work(&hdev->work_heartbeat,
+                               usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+       }
+
+       hdev->late_init_done = true;
+
+       return 0;
+}
+
+/*
+ * device_late_fini - finalize all that was done in device_late_init
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ */
+static void device_late_fini(struct hl_device *hdev)
+{
+       if (!hdev->late_init_done)
+               return;
+
+       cancel_delayed_work_sync(&hdev->work_freq);
+       if (hdev->heartbeat)
+               cancel_delayed_work_sync(&hdev->work_heartbeat);
+
+       if (hdev->asic_funcs->late_fini)
+               hdev->asic_funcs->late_fini(hdev);
+
+       hdev->late_init_done = false;
+}
+
+uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
+{
+       struct hl_device_idle_busy_ts *ts;
+       ktime_t zero_ktime, curr = ktime_get();
+       u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
+       s64 period_us, last_start_us, last_end_us, last_busy_time_us,
+               total_busy_time_us = 0, total_busy_time_ms;
+
+       zero_ktime = ktime_set(0, 0);
+       period_us = period_ms * USEC_PER_MSEC;
+       ts = &hdev->idle_busy_ts_arr[last_index];
+
+       /* check case that device is currently in idle */
+       if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
+                       !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
+
+               last_index--;
+               /* Handle case idle_busy_ts_idx was 0 */
+               if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+                       last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+
+               ts = &hdev->idle_busy_ts_arr[last_index];
+       }
+
+       while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
+               /* Check if we are in last sample case. i.e. if the sample
+                * begun before the sampling period. This could be a real
+                * sample or 0 so need to handle both cases
+                */
+               last_start_us = ktime_to_us(
+                               ktime_sub(curr, ts->idle_to_busy_ts));
+
+               if (last_start_us > period_us) {
+
+                       /* First check two cases:
+                        * 1. If the device is currently busy
+                        * 2. If the device was idle during the whole sampling
+                        *    period
+                        */
+
+                       if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
+                               /* Check if the device is currently busy */
+                               if (ktime_compare(ts->idle_to_busy_ts,
+                                               zero_ktime))
+                                       return 100;
+
+                               /* We either didn't have any activity or we
+                                * reached an entry which is 0. Either way,
+                                * exit and return what was accumulated so far
+                                */
+                               break;
+                       }
+
+                       /* If sample has finished, check it is relevant */
+                       last_end_us = ktime_to_us(
+                                       ktime_sub(curr, ts->busy_to_idle_ts));
+
+                       if (last_end_us > period_us)
+                               break;
+
+                       /* It is relevant so add it but with adjustment */
+                       last_busy_time_us = ktime_to_us(
+                                               ktime_sub(ts->busy_to_idle_ts,
+                                               ts->idle_to_busy_ts));
+                       total_busy_time_us += last_busy_time_us -
+                                       (last_start_us - period_us);
+                       break;
+               }
+
+               /* Check if the sample is finished or still open */
+               if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
+                       last_busy_time_us = ktime_to_us(
+                                               ktime_sub(ts->busy_to_idle_ts,
+                                               ts->idle_to_busy_ts));
+               else
+                       last_busy_time_us = ktime_to_us(
+                                       ktime_sub(curr, ts->idle_to_busy_ts));
+
+               total_busy_time_us += last_busy_time_us;
+
+               last_index--;
+               /* Handle case idle_busy_ts_idx was 0 */
+               if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+                       last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+
+               ts = &hdev->idle_busy_ts_arr[last_index];
+
+               overlap_cnt++;
+       }
+
+       total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
+                                               USEC_PER_MSEC);
+
+       return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
+}
+
+/*
+ * hl_device_set_frequency - set the frequency of the device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @freq: the new frequency value
+ *
+ * Change the frequency if needed. This function has no protection against
+ * concurrency, therefore it is assumed that the calling function has protected
+ * itself against the case of calling this function from multiple threads with
+ * different values
+ *
+ * Returns 0 if no change was done, otherwise returns 1
+ */
+int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
+{
+       if ((hdev->pm_mng_profile == PM_MANUAL) ||
+                       (hdev->curr_pll_profile == freq))
+               return 0;
+
+       dev_dbg(hdev->dev, "Changing device frequency to %s\n",
+               freq == PLL_HIGH ? "high" : "low");
+
+       hdev->asic_funcs->set_pll_profile(hdev, freq);
+
+       hdev->curr_pll_profile = freq;
+
+       return 1;
+}
+
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+{
+       int rc = 0;
+
+       mutex_lock(&hdev->debug_lock);
+
+       if (!enable) {
+               if (!hdev->in_debug) {
+                       dev_err(hdev->dev,
+                               "Failed to disable debug mode because device was not in debug mode\n");
+                       rc = -EFAULT;
+                       goto out;
+               }
+
+               if (!hdev->hard_reset_pending)
+                       hdev->asic_funcs->halt_coresight(hdev);
+
+               hdev->in_debug = 0;
+
+               if (!hdev->hard_reset_pending)
+                       hdev->asic_funcs->enable_clock_gating(hdev);
+
+               goto out;
+       }
+
+       if (hdev->in_debug) {
+               dev_err(hdev->dev,
+                       "Failed to enable debug mode because device is already in debug mode\n");
+               rc = -EFAULT;
+               goto out;
+       }
+
+       hdev->asic_funcs->disable_clock_gating(hdev);
+       hdev->in_debug = 1;
+
+out:
+       mutex_unlock(&hdev->debug_lock);
+
+       return rc;
+}
+
+/*
+ * hl_device_suspend - initiate device suspend
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Puts the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int hl_device_suspend(struct hl_device *hdev)
+{
+       int rc;
+
+       pci_save_state(hdev->pdev);
+
+       /* Block future CS/VM/JOB completion operations */
+       rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+       if (rc) {
+               dev_err(hdev->dev, "Can't suspend while in reset\n");
+               return -EIO;
+       }
+
+       /* This blocks all other stuff that is not blocked by in_reset */
+       hdev->disabled = true;
+
+       /*
+        * Flush anyone that is inside the critical section of enqueue
+        * jobs to the H/W
+        */
+       hdev->asic_funcs->hw_queues_lock(hdev);
+       hdev->asic_funcs->hw_queues_unlock(hdev);
+
+       /* Flush processes that are sending message to CPU */
+       mutex_lock(&hdev->send_cpu_message_lock);
+       mutex_unlock(&hdev->send_cpu_message_lock);
+
+       rc = hdev->asic_funcs->suspend(hdev);
+       if (rc)
+               dev_err(hdev->dev,
+                       "Failed to disable PCI access of device CPU\n");
+
+       /* Shut down the device */
+       pci_disable_device(hdev->pdev);
+       pci_set_power_state(hdev->pdev, PCI_D3hot);
+
+       return 0;
+}
+
+/*
+ * hl_device_resume - initiate device resume
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Bring the hw back to operating state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver resume.
+ */
+int hl_device_resume(struct hl_device *hdev)
+{
+       int rc;
+
+       pci_set_power_state(hdev->pdev, PCI_D0);
+       pci_restore_state(hdev->pdev);
+       rc = pci_enable_device_mem(hdev->pdev);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to enable PCI device in resume\n");
+               return rc;
+       }
+
+       pci_set_master(hdev->pdev);
+
+       rc = hdev->asic_funcs->resume(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to resume device after suspend\n");
+               goto disable_device;
+       }
+
+
+       hdev->disabled = false;
+       atomic_set(&hdev->in_reset, 0);
+
+       rc = hl_device_reset(hdev, true, false);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to reset device during resume\n");
+               goto disable_device;
+       }
+
+       return 0;
+
+disable_device:
+       pci_clear_master(hdev->pdev);
+       pci_disable_device(hdev->pdev);
+
+       return rc;
+}
+
+static int device_kill_open_processes(struct hl_device *hdev)
+{
+       u16 pending_total, pending_cnt;
+       struct hl_fpriv *hpriv;
+       struct task_struct *task = NULL;
+
+       if (hdev->pldm)
+               pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
+       else
+               pending_total = HL_PENDING_RESET_PER_SEC;
+
+       /* Giving time for user to close FD, and for processes that are inside
+        * hl_device_open to finish
+        */
+       if (!list_empty(&hdev->fpriv_list))
+               ssleep(1);
+
+       mutex_lock(&hdev->fpriv_list_lock);
+
+       /* This section must be protected because we are dereferencing
+        * pointers that are freed if the process exits
+        */
+       list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
+               task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
+               if (task) {
+                       dev_info(hdev->dev, "Killing user process pid=%d\n",
+                               task_pid_nr(task));
+                       send_sig(SIGKILL, task, 1);
+                       usleep_range(1000, 10000);
+
+                       put_task_struct(task);
+               }
+       }
+
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       /* We killed the open users, but because the driver cleans up after the
+        * user contexts are closed (e.g. mmu mappings), we need to wait again
+        * to make sure the cleaning phase is finished before continuing with
+        * the reset
+        */
+
+       pending_cnt = pending_total;
+
+       while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
+               dev_info(hdev->dev,
+                       "Waiting for all unmap operations to finish before hard reset\n");
+
+               pending_cnt--;
+
+               ssleep(1);
+       }
+
+       return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
+}
+
+static void device_hard_reset_pending(struct work_struct *work)
+{
+       struct hl_device_reset_work *device_reset_work =
+               container_of(work, struct hl_device_reset_work, reset_work);
+       struct hl_device *hdev = device_reset_work->hdev;
+
+       hl_device_reset(hdev, true, true);
+
+       kfree(device_reset_work);
+}
+
+/*
+ * hl_device_reset - reset the device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @hard_reset: should we do hard reset to all engines or just reset the
+ *              compute/dma engines
+ * @from_hard_reset_thread: is the caller the hard-reset thread
+ *
+ * Block future CS and wait for pending CS to be enqueued
+ * Call ASIC H/W fini
+ * Flush all completions
+ * Re-initialize all internal data structures
+ * Call ASIC H/W init, late_init
+ * Test queues
+ * Enable device
+ *
+ * Returns 0 for success or an error on failure.
+ */
+int hl_device_reset(struct hl_device *hdev, bool hard_reset,
+                       bool from_hard_reset_thread)
+{
+       int i, rc;
+
+       if (!hdev->init_done) {
+               dev_err(hdev->dev,
+                       "Can't reset before initialization is done\n");
+               return 0;
+       }
+
+       if ((!hard_reset) && (!hdev->supports_soft_reset)) {
+               dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
+               hard_reset = true;
+       }
+
+       /*
+        * Prevent concurrency in this function - only one reset should be
+        * done at any given time. Only need to perform this if we didn't
+        * get from the dedicated hard reset thread
+        */
+       if (!from_hard_reset_thread) {
+               /* Block future CS/VM/JOB completion operations */
+               rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+               if (rc)
+                       return 0;
+
+               if (hard_reset) {
+                       /* Disable PCI access from device F/W so he won't send
+                        * us additional interrupts. We disable MSI/MSI-X at
+                        * the halt_engines function and we can't have the F/W
+                        * sending us interrupts after that. We need to disable
+                        * the access here because if the device is marked
+                        * disable, the message won't be send. Also, in case
+                        * of heartbeat, the device CPU is marked as disable
+                        * so this message won't be sent
+                        */
+                       if (hl_fw_send_pci_access_msg(hdev,
+                                       ARMCP_PACKET_DISABLE_PCI_ACCESS))
+                               dev_warn(hdev->dev,
+                                       "Failed to disable PCI access by F/W\n");
+               }
+
+               /* This also blocks future CS/VM/JOB completion operations */
+               hdev->disabled = true;
+
+               /* Flush anyone that is inside the critical section of enqueue
+                * jobs to the H/W
+                */
+               hdev->asic_funcs->hw_queues_lock(hdev);
+               hdev->asic_funcs->hw_queues_unlock(hdev);
+
+               /* Flush anyone that is inside device open */
+               mutex_lock(&hdev->fpriv_list_lock);
+               mutex_unlock(&hdev->fpriv_list_lock);
+
+               dev_err(hdev->dev, "Going to RESET device!\n");
+       }
+
+again:
+       if ((hard_reset) && (!from_hard_reset_thread)) {
+               struct hl_device_reset_work *device_reset_work;
+
+               hdev->hard_reset_pending = true;
+
+               device_reset_work = kzalloc(sizeof(*device_reset_work),
+                                               GFP_ATOMIC);
+               if (!device_reset_work) {
+                       rc = -ENOMEM;
+                       goto out_err;
+               }
+
+               /*
+                * Because the reset function can't run from interrupt or
+                * from heartbeat work, we need to call the reset function
+                * from a dedicated work
+                */
+               INIT_WORK(&device_reset_work->reset_work,
+                               device_hard_reset_pending);
+               device_reset_work->hdev = hdev;
+               schedule_work(&device_reset_work->reset_work);
+
+               return 0;
+       }
+
+       if (hard_reset) {
+               device_late_fini(hdev);
+
+               /*
+                * Now that the heartbeat thread is closed, flush processes
+                * which are sending messages to CPU
+                */
+               mutex_lock(&hdev->send_cpu_message_lock);
+               mutex_unlock(&hdev->send_cpu_message_lock);
+       }
+
+       /*
+        * Halt the engines and disable interrupts so we won't get any more
+        * completions from H/W and we won't have any accesses from the
+        * H/W to the host machine
+        */
+       hdev->asic_funcs->halt_engines(hdev, hard_reset);
+
+       /* Go over all the queues, release all CS and their jobs */
+       hl_cs_rollback_all(hdev);
+
+       if (hard_reset) {
+               /* Kill processes here after CS rollback. This is because the
+                * process can't really exit until all its CSs are done, which
+                * is what we do in cs rollback
+                */
+               rc = device_kill_open_processes(hdev);
+               if (rc) {
+                       dev_crit(hdev->dev,
+                               "Failed to kill all open processes, stopping hard reset\n");
+                       goto out_err;
+               }
+
+               /* Flush the Event queue workers to make sure no other thread is
+                * reading or writing to registers during the reset
+                */
+               flush_workqueue(hdev->eq_wq);
+       }
+
+       /* Release kernel context */
+       if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
+               hdev->kernel_ctx = NULL;
+
+       /* Reset the H/W. It will be in idle state after this returns */
+       hdev->asic_funcs->hw_fini(hdev, hard_reset);
+
+       if (hard_reset) {
+               hl_vm_fini(hdev);
+               hl_mmu_fini(hdev);
+               hl_eq_reset(hdev, &hdev->event_queue);
+       }
+
+       /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
+       hl_hw_queue_reset(hdev, hard_reset);
+       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+               hl_cq_reset(hdev, &hdev->completion_queue[i]);
+
+       hdev->idle_busy_ts_idx = 0;
+       hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
+       hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
+
+       if (hdev->cs_active_cnt)
+               dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
+                       hdev->cs_active_cnt);
+
+       mutex_lock(&hdev->fpriv_list_lock);
+
+       /* Make sure the context switch phase will run again */
+       if (hdev->compute_ctx) {
+               atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
+               hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
+       }
+
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       /* Finished tear-down, starting to re-initialize */
+
+       if (hard_reset) {
+               hdev->device_cpu_disabled = false;
+               hdev->hard_reset_pending = false;
+
+               if (hdev->kernel_ctx) {
+                       dev_crit(hdev->dev,
+                               "kernel ctx was alive during hard reset, something is terribly wrong\n");
+                       rc = -EBUSY;
+                       goto out_err;
+               }
+
+               rc = hl_mmu_init(hdev);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed to initialize MMU S/W after hard reset\n");
+                       goto out_err;
+               }
+
+               /* Allocate the kernel context */
+               hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
+                                               GFP_KERNEL);
+               if (!hdev->kernel_ctx) {
+                       rc = -ENOMEM;
+                       goto out_err;
+               }
+
+               hdev->compute_ctx = NULL;
+
+               rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "failed to init kernel ctx in hard reset\n");
+                       kfree(hdev->kernel_ctx);
+                       hdev->kernel_ctx = NULL;
+                       goto out_err;
+               }
+       }
+
+       rc = hdev->asic_funcs->hw_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "failed to initialize the H/W after reset\n");
+               goto out_err;
+       }
+
+       hdev->disabled = false;
+
+       /* Check that the communication with the device is working */
+       rc = hdev->asic_funcs->test_queues(hdev);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to detect if device is alive after reset\n");
+               goto out_err;
+       }
+
+       if (hard_reset) {
+               rc = device_late_init(hdev);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed late init after hard reset\n");
+                       goto out_err;
+               }
+
+               rc = hl_vm_init(hdev);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed to init memory module after hard reset\n");
+                       goto out_err;
+               }
+
+               hl_set_max_power(hdev, hdev->max_power);
+       } else {
+               rc = hdev->asic_funcs->soft_reset_late_init(hdev);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed late init after soft reset\n");
+                       goto out_err;
+               }
+       }
+
+       atomic_set(&hdev->in_reset, 0);
+
+       if (hard_reset)
+               hdev->hard_reset_cnt++;
+       else
+               hdev->soft_reset_cnt++;
+
+       dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+
+       return 0;
+
+out_err:
+       hdev->disabled = true;
+
+       if (hard_reset) {
+               dev_err(hdev->dev,
+                       "Failed to reset! Device is NOT usable\n");
+               hdev->hard_reset_cnt++;
+       } else {
+               dev_err(hdev->dev,
+                       "Failed to do soft-reset, trying hard reset\n");
+               hdev->soft_reset_cnt++;
+               hard_reset = true;
+               goto again;
+       }
+
+       atomic_set(&hdev->in_reset, 0);
+
+       return rc;
+}
+
+/*
+ * hl_device_init - main initialization function for habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Allocate an id for the device, do early initialization and then call the
+ * ASIC specific initialization functions. Finally, create the cdev and the
+ * Linux device to expose it to the user
+ */
+int hl_device_init(struct hl_device *hdev, struct class *hclass)
+{
+       int i, rc, cq_cnt, cq_ready_cnt;
+       char *name;
+       bool add_cdev_sysfs_on_err = false;
+
+       name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
+       if (!name) {
+               rc = -ENOMEM;
+               goto out_disabled;
+       }
+
+       /* Initialize cdev and device structures */
+       rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
+                               &hdev->cdev, &hdev->dev);
+
+       kfree(name);
+
+       if (rc)
+               goto out_disabled;
+
+       name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
+       if (!name) {
+               rc = -ENOMEM;
+               goto free_dev;
+       }
+
+       /* Initialize cdev and device structures for control device */
+       rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
+                               name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
+
+       kfree(name);
+
+       if (rc)
+               goto free_dev;
+
+       /* Initialize ASIC function pointers and perform early init */
+       rc = device_early_init(hdev);
+       if (rc)
+               goto free_dev_ctrl;
+
+       /*
+        * Start calling ASIC initialization. First S/W then H/W and finally
+        * late init
+        */
+       rc = hdev->asic_funcs->sw_init(hdev);
+       if (rc)
+               goto early_fini;
+
+       /*
+        * Initialize the H/W queues. Must be done before hw_init, because
+        * there the addresses of the kernel queue are being written to the
+        * registers of the device
+        */
+       rc = hl_hw_queues_create(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "failed to initialize kernel queues\n");
+               goto sw_fini;
+       }
+
+       cq_cnt = hdev->asic_prop.completion_queues_count;
+
+       /*
+        * Initialize the completion queues. Must be done before hw_init,
+        * because there the addresses of the completion queues are being
+        * passed as arguments to request_irq
+        */
+       if (cq_cnt) {
+               hdev->completion_queue = kcalloc(cq_cnt,
+                               sizeof(*hdev->completion_queue),
+                               GFP_KERNEL);
+
+               if (!hdev->completion_queue) {
+                       dev_err(hdev->dev,
+                               "failed to allocate completion queues\n");
+                       rc = -ENOMEM;
+                       goto hw_queues_destroy;
+               }
+       }
+
+       for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
+               rc = hl_cq_init(hdev, &hdev->completion_queue[i],
+                               hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "failed to initialize completion queue\n");
+                       goto cq_fini;
+               }
+               hdev->completion_queue[i].cq_idx = i;
+       }
+
+       /*
+        * Initialize the event queue. Must be done before hw_init,
+        * because there the address of the event queue is being
+        * passed as argument to request_irq
+        */
+       rc = hl_eq_init(hdev, &hdev->event_queue);
+       if (rc) {
+               dev_err(hdev->dev, "failed to initialize event queue\n");
+               goto cq_fini;
+       }
+
+       /* MMU S/W must be initialized before kernel context is created */
+       rc = hl_mmu_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
+               goto eq_fini;
+       }
+
+       /* Allocate the kernel context */
+       hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
+       if (!hdev->kernel_ctx) {
+               rc = -ENOMEM;
+               goto mmu_fini;
+       }
+
+       hdev->compute_ctx = NULL;
+
+       rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
+       if (rc) {
+               dev_err(hdev->dev, "failed to initialize kernel context\n");
+               kfree(hdev->kernel_ctx);
+               goto mmu_fini;
+       }
+
+       rc = hl_cb_pool_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "failed to initialize CB pool\n");
+               goto release_ctx;
+       }
+
+       hl_debugfs_add_device(hdev);
+
+       if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+               dev_info(hdev->dev,
+                       "H/W state is dirty, must reset before initializing\n");
+               hdev->asic_funcs->halt_engines(hdev, true);
+               hdev->asic_funcs->hw_fini(hdev, true);
+       }
+
+       /*
+        * From this point, in case of an error, add char devices and create
+        * sysfs nodes as part of the error flow, to allow debugging.
+        */
+       add_cdev_sysfs_on_err = true;
+
+       rc = hdev->asic_funcs->hw_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "failed to initialize the H/W\n");
+               rc = 0;
+               goto out_disabled;
+       }
+
+       hdev->disabled = false;
+
+       /* Check that the communication with the device is working */
+       rc = hdev->asic_funcs->test_queues(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to detect if device is alive\n");
+               rc = 0;
+               goto out_disabled;
+       }
+
+       rc = device_late_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed late initialization\n");
+               rc = 0;
+               goto out_disabled;
+       }
+
+       dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
+               hdev->asic_name,
+               hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
+
+       rc = hl_vm_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to initialize memory module\n");
+               rc = 0;
+               goto out_disabled;
+       }
+
+       /*
+        * Expose devices and sysfs nodes to user.
+        * From here there is no need to add char devices and create sysfs nodes
+        * in case of an error.
+        */
+       add_cdev_sysfs_on_err = false;
+       rc = device_cdev_sysfs_add(hdev);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to add char devices and sysfs nodes\n");
+               rc = 0;
+               goto out_disabled;
+       }
+
+       /*
+        * hl_hwmon_init() must be called after device_late_init(), because only
+        * there we get the information from the device about which
+        * hwmon-related sensors the device supports.
+        * Furthermore, it must be done after adding the device to the system.
+        */
+       rc = hl_hwmon_init(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to initialize hwmon\n");
+               rc = 0;
+               goto out_disabled;
+       }
+
+       dev_notice(hdev->dev,
+               "Successfully added device to habanalabs driver\n");
+
+       hdev->init_done = true;
+
+       return 0;
+
+release_ctx:
+       if (hl_ctx_put(hdev->kernel_ctx) != 1)
+               dev_err(hdev->dev,
+                       "kernel ctx is still alive on initialization failure\n");
+mmu_fini:
+       hl_mmu_fini(hdev);
+eq_fini:
+       hl_eq_fini(hdev, &hdev->event_queue);
+cq_fini:
+       for (i = 0 ; i < cq_ready_cnt ; i++)
+               hl_cq_fini(hdev, &hdev->completion_queue[i]);
+       kfree(hdev->completion_queue);
+hw_queues_destroy:
+       hl_hw_queues_destroy(hdev);
+sw_fini:
+       hdev->asic_funcs->sw_fini(hdev);
+early_fini:
+       device_early_fini(hdev);
+free_dev_ctrl:
+       kfree(hdev->dev_ctrl);
+free_dev:
+       kfree(hdev->dev);
+out_disabled:
+       hdev->disabled = true;
+       if (add_cdev_sysfs_on_err)
+               device_cdev_sysfs_add(hdev);
+       if (hdev->pdev)
+               dev_err(&hdev->pdev->dev,
+                       "Failed to initialize hl%d. Device is NOT usable !\n",
+                       hdev->id / 2);
+       else
+               pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
+                       hdev->id / 2);
+
+       return rc;
+}
+
+/*
+ * hl_device_fini - main tear-down function for habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Destroy the device, call ASIC fini functions and release the id
+ */
+void hl_device_fini(struct hl_device *hdev)
+{
+       int i, rc;
+       ktime_t timeout;
+
+       dev_info(hdev->dev, "Removing device\n");
+
+       /*
+        * This function is competing with the reset function, so try to
+        * take the reset atomic and if we are already in middle of reset,
+        * wait until reset function is finished. Reset function is designed
+        * to always finish. However, in Gaudi, because of all the network
+        * ports, the hard reset could take between 10-30 seconds
+        */
+
+       timeout = ktime_add_us(ktime_get(),
+                               HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
+       rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+       while (rc) {
+               usleep_range(50, 200);
+               rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+               if (ktime_compare(ktime_get(), timeout) > 0) {
+                       WARN(1, "Failed to remove device because reset function did not finish\n");
+                       return;
+               }
+       }
+
+       /* Mark device as disabled */
+       hdev->disabled = true;
+
+       /* Flush anyone that is inside the critical section of enqueue
+        * jobs to the H/W
+        */
+       hdev->asic_funcs->hw_queues_lock(hdev);
+       hdev->asic_funcs->hw_queues_unlock(hdev);
+
+       /* Flush anyone that is inside device open */
+       mutex_lock(&hdev->fpriv_list_lock);
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       hdev->hard_reset_pending = true;
+
+       hl_hwmon_fini(hdev);
+
+       device_late_fini(hdev);
+
+       hl_debugfs_remove_device(hdev);
+
+       /*
+        * Halt the engines and disable interrupts so we won't get any more
+        * completions from H/W and we won't have any accesses from the
+        * H/W to the host machine
+        */
+       hdev->asic_funcs->halt_engines(hdev, true);
+
+       /* Go over all the queues, release all CS and their jobs */
+       hl_cs_rollback_all(hdev);
+
+       /* Kill processes here after CS rollback. This is because the process
+        * can't really exit until all its CSs are done, which is what we
+        * do in cs rollback
+        */
+       rc = device_kill_open_processes(hdev);
+       if (rc)
+               dev_crit(hdev->dev, "Failed to kill all open processes\n");
+
+       hl_cb_pool_fini(hdev);
+
+       /* Release kernel context */
+       if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
+               dev_err(hdev->dev, "kernel ctx is still alive\n");
+
+       /* Reset the H/W. It will be in idle state after this returns */
+       hdev->asic_funcs->hw_fini(hdev, true);
+
+       hl_vm_fini(hdev);
+
+       hl_mmu_fini(hdev);
+
+       hl_eq_fini(hdev, &hdev->event_queue);
+
+       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+               hl_cq_fini(hdev, &hdev->completion_queue[i]);
+       kfree(hdev->completion_queue);
+
+       hl_hw_queues_destroy(hdev);
+
+       /* Call ASIC S/W finalize function */
+       hdev->asic_funcs->sw_fini(hdev);
+
+       device_early_fini(hdev);
+
+       /* Hide devices and sysfs nodes from user */
+       device_cdev_sysfs_del(hdev);
+
+       pr_info("removed device successfully\n");
+}
+
+/*
+ * MMIO register access helper functions.
+ */
+
+/*
+ * hl_rreg - Read an MMIO register
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @reg: MMIO register offset (in bytes)
+ *
+ * Returns the value of the MMIO register we are asked to read
+ *
+ */
+inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
+{
+       return readl(hdev->rmmio + reg);
+}
+
+/*
+ * hl_wreg - Write to an MMIO register
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @reg: MMIO register offset (in bytes)
+ * @val: 32-bit value
+ *
+ * Writes the 32-bit value into the MMIO register
+ *
+ */
+inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
+{
+       writel(val, hdev->rmmio + reg);
+}
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
new file mode 100644 (file)
index 0000000..b2b8451
--- /dev/null
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/common/hl_boot_if.h"
+
+#include <linux/firmware.h>
+#include <linux/genalloc.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/slab.h>
+
+/**
+ * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
+ *
+ * @hdev: pointer to hl_device structure.
+ * @fw_name: the firmware image name
+ * @dst: IO memory mapped address space to copy firmware to
+ *
+ * Copy fw code from firmware file to device memory.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
+                               void __iomem *dst)
+{
+       const struct firmware *fw;
+       const u64 *fw_data;
+       size_t fw_size;
+       int rc;
+
+       rc = request_firmware(&fw, fw_name, hdev->dev);
+       if (rc) {
+               dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
+               goto out;
+       }
+
+       fw_size = fw->size;
+       if ((fw_size % 4) != 0) {
+               dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
+                       fw_name, fw_size);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+
+       fw_data = (const u64 *) fw->data;
+
+       memcpy_toio(dst, fw_data, fw_size);
+
+out:
+       release_firmware(fw);
+       return rc;
+}
+
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
+{
+       struct armcp_packet pkt = {};
+
+       pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+       return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
+                               sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+}
+
+int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+                               u16 len, u32 timeout, long *result)
+{
+       struct armcp_packet *pkt;
+       dma_addr_t pkt_dma_addr;
+       u32 tmp;
+       int rc = 0;
+
+       pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
+                                                               &pkt_dma_addr);
+       if (!pkt) {
+               dev_err(hdev->dev,
+                       "Failed to allocate DMA memory for packet to CPU\n");
+               return -ENOMEM;
+       }
+
+       memcpy(pkt, msg, len);
+
+       mutex_lock(&hdev->send_cpu_message_lock);
+
+       if (hdev->disabled)
+               goto out;
+
+       if (hdev->device_cpu_disabled) {
+               rc = -EIO;
+               goto out;
+       }
+
+       rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
+               goto out;
+       }
+
+       rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
+                               (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
+                               timeout, true);
+
+       hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
+
+       if (rc == -ETIMEDOUT) {
+               dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
+               hdev->device_cpu_disabled = true;
+               goto out;
+       }
+
+       tmp = le32_to_cpu(pkt->ctl);
+
+       rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+       if (rc) {
+               dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
+                       rc,
+                       (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
+                                               >> ARMCP_PKT_CTL_OPCODE_SHIFT);
+               rc = -EIO;
+       } else if (result) {
+               *result = (long) le64_to_cpu(pkt->result);
+       }
+
+out:
+       mutex_unlock(&hdev->send_cpu_message_lock);
+
+       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
+
+       return rc;
+}
+
+int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
+{
+       struct armcp_packet pkt;
+       long result;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.value = cpu_to_le64(event_type);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                       HL_DEVICE_TIMEOUT_USEC, &result);
+
+       if (rc)
+               dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
+
+       return rc;
+}
+
+int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+               size_t irq_arr_size)
+{
+       struct armcp_unmask_irq_arr_packet *pkt;
+       size_t total_pkt_size;
+       long result;
+       int rc;
+
+       total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
+                       irq_arr_size;
+
+       /* data should be aligned to 8 bytes in order to ArmCP to copy it */
+       total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+       /* total_pkt_size is casted to u16 later on */
+       if (total_pkt_size > USHRT_MAX) {
+               dev_err(hdev->dev, "too many elements in IRQ array\n");
+               return -EINVAL;
+       }
+
+       pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+       if (!pkt)
+               return -ENOMEM;
+
+       pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
+       memcpy(&pkt->irqs, irq_arr, irq_arr_size);
+
+       pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
+                                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
+                       total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
+
+       if (rc)
+               dev_err(hdev->dev, "failed to unmask IRQ array\n");
+
+       kfree(pkt);
+
+       return rc;
+}
+
+int hl_fw_test_cpu_queue(struct hl_device *hdev)
+{
+       struct armcp_packet test_pkt = {};
+       long result;
+       int rc;
+
+       test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+                                       ARMCP_PKT_CTL_OPCODE_SHIFT);
+       test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
+                       sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+       if (!rc) {
+               if (result != ARMCP_PACKET_FENCE_VAL)
+                       dev_err(hdev->dev,
+                               "CPU queue test failed (0x%08lX)\n", result);
+       } else {
+               dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
+       }
+
+       return rc;
+}
+
+void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+                                               dma_addr_t *dma_handle)
+{
+       u64 kernel_addr;
+
+       kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
+
+       *dma_handle = hdev->cpu_accessible_dma_address +
+               (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
+
+       return (void *) (uintptr_t) kernel_addr;
+}
+
+void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+                                       void *vaddr)
+{
+       gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
+                       size);
+}
+
+int hl_fw_send_heartbeat(struct hl_device *hdev)
+{
+       struct armcp_packet hb_pkt = {};
+       long result;
+       int rc;
+
+       hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+                                       ARMCP_PKT_CTL_OPCODE_SHIFT);
+       hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
+                       sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+       if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
+               rc = -EIO;
+
+       return rc;
+}
+
+int hl_fw_armcp_info_get(struct hl_device *hdev)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct armcp_packet pkt = {};
+       void *armcp_info_cpu_addr;
+       dma_addr_t armcp_info_dma_addr;
+       long result;
+       int rc;
+
+       armcp_info_cpu_addr =
+                       hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+                                       sizeof(struct armcp_info),
+                                       &armcp_info_dma_addr);
+       if (!armcp_info_cpu_addr) {
+               dev_err(hdev->dev,
+                       "Failed to allocate DMA memory for ArmCP info packet\n");
+               return -ENOMEM;
+       }
+
+       memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.addr = cpu_to_le64(armcp_info_dma_addr);
+       pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to handle ArmCP info pkt, error %d\n", rc);
+               goto out;
+       }
+
+       memcpy(&prop->armcp_info, armcp_info_cpu_addr,
+                       sizeof(prop->armcp_info));
+
+       rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to build hwmon channel info, error %d\n", rc);
+               rc = -EFAULT;
+               goto out;
+       }
+
+out:
+       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+                       sizeof(struct armcp_info), armcp_info_cpu_addr);
+
+       return rc;
+}
+
+int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
+{
+       struct armcp_packet pkt = {};
+       void *eeprom_info_cpu_addr;
+       dma_addr_t eeprom_info_dma_addr;
+       long result;
+       int rc;
+
+       eeprom_info_cpu_addr =
+                       hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+                                       max_size, &eeprom_info_dma_addr);
+       if (!eeprom_info_cpu_addr) {
+               dev_err(hdev->dev,
+                       "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
+               return -ENOMEM;
+       }
+
+       memset(eeprom_info_cpu_addr, 0, max_size);
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
+       pkt.data_max_size = cpu_to_le32(max_size);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                       HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
+               goto out;
+       }
+
+       /* result contains the actual size */
+       memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
+
+out:
+       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
+                       eeprom_info_cpu_addr);
+
+       return rc;
+}
+
+static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
+{
+       u32 err_val;
+
+       /* Some of the firmware status codes are deprecated in newer f/w
+        * versions. In those versions, the errors are reported
+        * in different registers. Therefore, we need to check those
+        * registers and print the exact errors. Moreover, there
+        * may be multiple errors, so we need to report on each error
+        * separately. Some of the error codes might indicate a state
+        * that is not an error per-se, but it is an error in production
+        * environment
+        */
+       err_val = RREG32(boot_err0_reg);
+       if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+               return;
+
+       if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+               dev_err(hdev->dev,
+                       "Device boot error - DRAM initialization failed\n");
+       if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+               dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+       if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+               dev_err(hdev->dev,
+                       "Device boot error - Thermal Sensor initialization failed\n");
+       if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+               dev_warn(hdev->dev,
+                       "Device boot warning - Skipped DRAM initialization\n");
+       if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
+               dev_warn(hdev->dev,
+                       "Device boot error - Skipped waiting for BMC\n");
+       if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+               dev_err(hdev->dev,
+                       "Device boot error - Serdes data from BMC not available\n");
+       if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+               dev_err(hdev->dev,
+                       "Device boot error - NIC F/W initialization failed\n");
+}
+
+static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
+{
+       switch (status) {
+       case CPU_BOOT_STATUS_NA:
+               dev_err(hdev->dev,
+                       "Device boot error - BTL did NOT run\n");
+               break;
+       case CPU_BOOT_STATUS_IN_WFE:
+               dev_err(hdev->dev,
+                       "Device boot error - Stuck inside WFE loop\n");
+               break;
+       case CPU_BOOT_STATUS_IN_BTL:
+               dev_err(hdev->dev,
+                       "Device boot error - Stuck in BTL\n");
+               break;
+       case CPU_BOOT_STATUS_IN_PREBOOT:
+               dev_err(hdev->dev,
+                       "Device boot error - Stuck in Preboot\n");
+               break;
+       case CPU_BOOT_STATUS_IN_SPL:
+               dev_err(hdev->dev,
+                       "Device boot error - Stuck in SPL\n");
+               break;
+       case CPU_BOOT_STATUS_IN_UBOOT:
+               dev_err(hdev->dev,
+                       "Device boot error - Stuck in u-boot\n");
+               break;
+       case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
+               dev_err(hdev->dev,
+                       "Device boot error - DRAM initialization failed\n");
+               break;
+       case CPU_BOOT_STATUS_UBOOT_NOT_READY:
+               dev_err(hdev->dev,
+                       "Device boot error - u-boot stopped by user\n");
+               break;
+       case CPU_BOOT_STATUS_TS_INIT_FAIL:
+               dev_err(hdev->dev,
+                       "Device boot error - Thermal Sensor initialization failed\n");
+               break;
+       default:
+               dev_err(hdev->dev,
+                       "Device boot error - Invalid status code %d\n",
+                       status);
+               break;
+       }
+}
+
+int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+                       u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+                       u32 boot_err0_reg, bool skip_bmc,
+                       u32 cpu_timeout, u32 boot_fit_timeout)
+{
+       u32 status;
+       int rc;
+
+       dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
+               cpu_timeout / USEC_PER_SEC);
+
+       /* Wait for boot FIT request */
+       rc = hl_poll_timeout(
+               hdev,
+               cpu_boot_status_reg,
+               status,
+               status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
+               10000,
+               boot_fit_timeout);
+
+       if (rc) {
+               dev_dbg(hdev->dev,
+                       "No boot fit request received, resuming boot\n");
+       } else {
+               rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
+               if (rc)
+                       goto out;
+
+               /* Clear device CPU message status */
+               WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
+
+               /* Signal device CPU that boot loader is ready */
+               WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+
+               /* Poll for CPU device ack */
+               rc = hl_poll_timeout(
+                       hdev,
+                       cpu_msg_status_reg,
+                       status,
+                       status == CPU_MSG_OK,
+                       10000,
+                       boot_fit_timeout);
+
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Timeout waiting for boot fit load ack\n");
+                       goto out;
+               }
+
+               /* Clear message */
+               WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+       }
+
+       /* Make sure CPU boot-loader is running */
+       rc = hl_poll_timeout(
+               hdev,
+               cpu_boot_status_reg,
+               status,
+               (status == CPU_BOOT_STATUS_DRAM_RDY) ||
+               (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
+               (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
+               (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+               10000,
+               cpu_timeout);
+
+       /* Read U-Boot, preboot versions now in case we will later fail */
+       hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+       hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
+
+       /* Some of the status codes below are deprecated in newer f/w
+        * versions but we keep them here for backward compatibility
+        */
+       if (rc) {
+               hl_detect_cpu_boot_status(hdev, status);
+               rc = -EIO;
+               goto out;
+       }
+
+       if (!hdev->fw_loading) {
+               dev_info(hdev->dev, "Skip loading FW\n");
+               goto out;
+       }
+
+       if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
+               goto out;
+
+       dev_info(hdev->dev,
+               "Loading firmware to device, may take some time...\n");
+
+       rc = hdev->asic_funcs->load_firmware_to_device(hdev);
+       if (rc)
+               goto out;
+
+       if (skip_bmc) {
+               WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
+
+               rc = hl_poll_timeout(
+                       hdev,
+                       cpu_boot_status_reg,
+                       status,
+                       (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
+                       10000,
+                       cpu_timeout);
+
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Failed to get ACK on skipping BMC, %d\n",
+                               status);
+                       WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+                       rc = -EIO;
+                       goto out;
+               }
+       }
+
+       WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+
+       rc = hl_poll_timeout(
+               hdev,
+               cpu_boot_status_reg,
+               status,
+               (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+               10000,
+               cpu_timeout);
+
+       /* Clear message */
+       WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+
+       if (rc) {
+               if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
+                       dev_err(hdev->dev,
+                               "Device reports FIT image is corrupted\n");
+               else
+                       dev_err(hdev->dev,
+                               "Failed to load firmware to device, %d\n",
+                               status);
+
+               rc = -EIO;
+               goto out;
+       }
+
+       dev_info(hdev->dev, "Successfully loaded firmware to device\n");
+
+out:
+       fw_read_errors(hdev, boot_err0_reg);
+
+       return rc;
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
new file mode 100644 (file)
index 0000000..82532f1
--- /dev/null
@@ -0,0 +1,1948 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef HABANALABSP_H_
+#define HABANALABSP_H_
+
+#include "include/common/armcp_if.h"
+#include "include/common/qman_if.h"
+#include <uapi/misc/habanalabs.h>
+
+#include <linux/cdev.h>
+#include <linux/iopoll.h>
+#include <linux/irqreturn.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
+#include <linux/hashtable.h>
+
+#define HL_NAME                                "habanalabs"
+
+#define HL_MMAP_CB_MASK                        (0x8000000000000000ull >> PAGE_SHIFT)
+
+#define HL_PENDING_RESET_PER_SEC       30
+
+#define HL_HARD_RESET_MAX_TIMEOUT      120
+
+#define HL_DEVICE_TIMEOUT_USEC         1000000 /* 1 s */
+
+#define HL_HEARTBEAT_PER_USEC          5000000 /* 5 s */
+
+#define HL_PLL_LOW_JOB_FREQ_USEC       5000000 /* 5 s */
+
+#define HL_ARMCP_INFO_TIMEOUT_USEC     10000000 /* 10s */
+#define HL_ARMCP_EEPROM_TIMEOUT_USEC   10000000 /* 10s */
+
+#define HL_PCI_ELBI_TIMEOUT_MSEC       10 /* 10ms */
+
+#define HL_SIM_MAX_TIMEOUT_US          10000000 /* 10s */
+
+#define HL_IDLE_BUSY_TS_ARR_SIZE       4096
+
+/* Memory */
+#define MEM_HASH_TABLE_BITS            7 /* 1 << 7 buckets */
+
+/* MMU */
+#define MMU_HASH_TABLE_BITS            7 /* 1 << 7 buckets */
+
+/*
+ * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
+ * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
+ */
+#define HL_RSVD_SOBS                   4
+#define HL_RSVD_MONS                   2
+
+#define HL_RSVD_SOBS_IN_USE            2
+#define HL_RSVD_MONS_IN_USE            1
+
+#define HL_MAX_SOB_VAL                 (1 << 15)
+
+#define IS_POWER_OF_2(n)               (n != 0 && ((n & (n - 1)) == 0))
+#define IS_MAX_PENDING_CS_VALID(n)     (IS_POWER_OF_2(n) && (n > 1))
+
+#define HL_PCI_NUM_BARS                        6
+
+/**
+ * struct pgt_info - MMU hop page info.
+ * @node: hash linked-list node for the pgts shadow hash of pgts.
+ * @phys_addr: physical address of the pgt.
+ * @shadow_addr: shadow hop in the host.
+ * @ctx: pointer to the owner ctx.
+ * @num_of_ptes: indicates how many ptes are used in the pgt.
+ *
+ * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
+ * is needed during mapping, a new page is allocated and this structure holds
+ * its essential information. During unmapping, if no valid PTEs remained in the
+ * page, it is freed with its pgt_info structure.
+ */
+struct pgt_info {
+       struct hlist_node       node;
+       u64                     phys_addr;
+       u64                     shadow_addr;
+       struct hl_ctx           *ctx;
+       int                     num_of_ptes;
+};
+
+struct hl_device;
+struct hl_fpriv;
+
+/**
+ * enum hl_pci_match_mode - pci match mode per region
+ * @PCI_ADDRESS_MATCH_MODE: address match mode
+ * @PCI_BAR_MATCH_MODE: bar match mode
+ */
+enum hl_pci_match_mode {
+       PCI_ADDRESS_MATCH_MODE,
+       PCI_BAR_MATCH_MODE
+};
+
+/**
+ * enum hl_fw_component - F/W components to read version through registers.
+ * @FW_COMP_UBOOT: u-boot.
+ * @FW_COMP_PREBOOT: preboot.
+ */
+enum hl_fw_component {
+       FW_COMP_UBOOT,
+       FW_COMP_PREBOOT
+};
+
+/**
+ * enum hl_queue_type - Supported QUEUE types.
+ * @QUEUE_TYPE_NA: queue is not available.
+ * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
+ *                  host.
+ * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
+ *                     memories and/or operates the compute engines.
+ * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
+ * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
+ *                 notifications are sent by H/W.
+ */
+enum hl_queue_type {
+       QUEUE_TYPE_NA,
+       QUEUE_TYPE_EXT,
+       QUEUE_TYPE_INT,
+       QUEUE_TYPE_CPU,
+       QUEUE_TYPE_HW
+};
+
+enum hl_cs_type {
+       CS_TYPE_DEFAULT,
+       CS_TYPE_SIGNAL,
+       CS_TYPE_WAIT
+};
+
+/*
+ * struct hl_inbound_pci_region - inbound region descriptor
+ * @mode: pci match mode for this region
+ * @addr: region target address
+ * @size: region size in bytes
+ * @offset_in_bar: offset within bar (address match mode)
+ * @bar: bar id
+ */
+struct hl_inbound_pci_region {
+       enum hl_pci_match_mode  mode;
+       u64                     addr;
+       u64                     size;
+       u64                     offset_in_bar;
+       u8                      bar;
+};
+
+/*
+ * struct hl_outbound_pci_region - outbound region descriptor
+ * @addr: region target address
+ * @size: region size in bytes
+ */
+struct hl_outbound_pci_region {
+       u64     addr;
+       u64     size;
+};
+
+/*
+ * struct hl_hw_sob - H/W SOB info.
+ * @hdev: habanalabs device structure.
+ * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
+ * @sob_id: id of this SOB.
+ * @q_idx: the H/W queue that uses this SOB.
+ */
+struct hl_hw_sob {
+       struct hl_device        *hdev;
+       struct kref             kref;
+       u32                     sob_id;
+       u32                     q_idx;
+};
+
+/**
+ * struct hw_queue_properties - queue information.
+ * @type: queue type.
+ * @driver_only: true if only the driver is allowed to send a job to this queue,
+ *               false otherwise.
+ * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
+ *                      queue, false otherwise (a CB address must be provided).
+ * @supports_sync_stream: True if queue supports sync stream
+ */
+struct hw_queue_properties {
+       enum hl_queue_type      type;
+       u8                      driver_only;
+       u8                      requires_kernel_cb;
+       u8                      supports_sync_stream;
+};
+
+/**
+ * enum vm_type_t - virtual memory mapping request information.
+ * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
+ * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
+ */
+enum vm_type_t {
+       VM_TYPE_USERPTR = 0x1,
+       VM_TYPE_PHYS_PACK = 0x2
+};
+
+/**
+ * enum hl_device_hw_state - H/W device state. use this to understand whether
+ *                           to do reset before hw_init or not
+ * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
+ * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
+ *                            hw_init
+ */
+enum hl_device_hw_state {
+       HL_DEVICE_HW_STATE_CLEAN = 0,
+       HL_DEVICE_HW_STATE_DIRTY
+};
+
+/**
+ * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @start_addr: virtual start address of the memory region.
+ * @end_addr: virtual end address of the memory region.
+ * @hop0_shift: shift of hop 0 mask.
+ * @hop1_shift: shift of hop 1 mask.
+ * @hop2_shift: shift of hop 2 mask.
+ * @hop3_shift: shift of hop 3 mask.
+ * @hop4_shift: shift of hop 4 mask.
+ * @hop0_mask: mask to get the PTE address in hop 0.
+ * @hop1_mask: mask to get the PTE address in hop 1.
+ * @hop2_mask: mask to get the PTE address in hop 2.
+ * @hop3_mask: mask to get the PTE address in hop 3.
+ * @hop4_mask: mask to get the PTE address in hop 4.
+ * @page_size: default page size used to allocate memory.
+ */
+struct hl_mmu_properties {
+       u64     start_addr;
+       u64     end_addr;
+       u64     hop0_shift;
+       u64     hop1_shift;
+       u64     hop2_shift;
+       u64     hop3_shift;
+       u64     hop4_shift;
+       u64     hop0_mask;
+       u64     hop1_mask;
+       u64     hop2_mask;
+       u64     hop3_mask;
+       u64     hop4_mask;
+       u32     page_size;
+};
+
+/**
+ * struct asic_fixed_properties - ASIC specific immutable properties.
+ * @hw_queues_props: H/W queues properties.
+ * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
+ *             available sensors.
+ * @uboot_ver: F/W U-boot version.
+ * @preboot_ver: F/W Preboot version.
+ * @dmmu: DRAM MMU address translation properties.
+ * @pmmu: PCI (host) MMU address translation properties.
+ * @pmmu_huge: PCI (host) MMU address translation properties for memory
+ *              allocated with huge pages.
+ * @sram_base_address: SRAM physical start address.
+ * @sram_end_address: SRAM physical end address.
+ * @sram_user_base_address - SRAM physical start address for user access.
+ * @dram_base_address: DRAM physical start address.
+ * @dram_end_address: DRAM physical end address.
+ * @dram_user_base_address: DRAM physical start address for user access.
+ * @dram_size: DRAM total size.
+ * @dram_pci_bar_size: size of PCI bar towards DRAM.
+ * @max_power_default: max power of the device after reset
+ * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
+ *                                      fault.
+ * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
+ * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
+ * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
+ * @mmu_dram_default_page_addr: DRAM default page physical address.
+ * @mmu_pgt_size: MMU page tables total size.
+ * @mmu_pte_size: PTE size in MMU page tables.
+ * @mmu_hop_table_size: MMU hop table size.
+ * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
+ * @dram_page_size: page size for MMU DRAM allocation.
+ * @cfg_size: configuration space size on SRAM.
+ * @sram_size: total size of SRAM.
+ * @max_asid: maximum number of open contexts (ASIDs).
+ * @num_of_events: number of possible internal H/W IRQs.
+ * @psoc_pci_pll_nr: PCI PLL NR value.
+ * @psoc_pci_pll_nf: PCI PLL NF value.
+ * @psoc_pci_pll_od: PCI PLL OD value.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
+ * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
+ * @high_pll: high PLL frequency used by the device.
+ * @cb_pool_cb_cnt: number of CBs in the CB pool.
+ * @cb_pool_cb_size: size of each CB in the CB pool.
+ * @max_pending_cs: maximum of concurrent pending command submissions
+ * @max_queues: maximum amount of queues in the system
+ * @sync_stream_first_sob: first sync object available for sync stream use
+ * @sync_stream_first_mon: first monitor available for sync stream use
+ * @tpc_enabled_mask: which TPCs are enabled.
+ * @completion_queues_count: number of completion queues.
+ */
+struct asic_fixed_properties {
+       struct hw_queue_properties      *hw_queues_props;
+       struct armcp_info               armcp_info;
+       char                            uboot_ver[VERSION_MAX_LEN];
+       char                            preboot_ver[VERSION_MAX_LEN];
+       struct hl_mmu_properties        dmmu;
+       struct hl_mmu_properties        pmmu;
+       struct hl_mmu_properties        pmmu_huge;
+       u64                             sram_base_address;
+       u64                             sram_end_address;
+       u64                             sram_user_base_address;
+       u64                             dram_base_address;
+       u64                             dram_end_address;
+       u64                             dram_user_base_address;
+       u64                             dram_size;
+       u64                             dram_pci_bar_size;
+       u64                             max_power_default;
+       u64                             dram_size_for_default_page_mapping;
+       u64                             pcie_dbi_base_address;
+       u64                             pcie_aux_dbi_reg_addr;
+       u64                             mmu_pgt_addr;
+       u64                             mmu_dram_default_page_addr;
+       u32                             mmu_pgt_size;
+       u32                             mmu_pte_size;
+       u32                             mmu_hop_table_size;
+       u32                             mmu_hop0_tables_total_size;
+       u32                             dram_page_size;
+       u32                             cfg_size;
+       u32                             sram_size;
+       u32                             max_asid;
+       u32                             num_of_events;
+       u32                             psoc_pci_pll_nr;
+       u32                             psoc_pci_pll_nf;
+       u32                             psoc_pci_pll_od;
+       u32                             psoc_pci_pll_div_factor;
+       u32                             psoc_timestamp_frequency;
+       u32                             high_pll;
+       u32                             cb_pool_cb_cnt;
+       u32                             cb_pool_cb_size;
+       u32                             max_pending_cs;
+       u32                             max_queues;
+       u16                             sync_stream_first_sob;
+       u16                             sync_stream_first_mon;
+       u8                              tpc_enabled_mask;
+       u8                              completion_queues_count;
+};
+
+/**
+ * struct hl_cs_compl - command submission completion object.
+ * @base_fence: kernel fence object.
+ * @lock: spinlock to protect fence.
+ * @hdev: habanalabs device structure.
+ * @hw_sob: the H/W SOB used in this signal/wait CS.
+ * @cs_seq: command submission sequence number.
+ * @type: type of the CS - signal/wait.
+ * @sob_val: the SOB value that is used in this signal/wait CS.
+ */
+struct hl_cs_compl {
+       struct dma_fence        base_fence;
+       spinlock_t              lock;
+       struct hl_device        *hdev;
+       struct hl_hw_sob        *hw_sob;
+       u64                     cs_seq;
+       enum hl_cs_type         type;
+       u16                     sob_val;
+};
+
+/*
+ * Command Buffers
+ */
+
+/**
+ * struct hl_cb_mgr - describes a Command Buffer Manager.
+ * @cb_lock: protects cb_handles.
+ * @cb_handles: an idr to hold all command buffer handles.
+ */
+struct hl_cb_mgr {
+       spinlock_t              cb_lock;
+       struct idr              cb_handles; /* protected by cb_lock */
+};
+
+/**
+ * struct hl_cb - describes a Command Buffer.
+ * @refcount: reference counter for usage of the CB.
+ * @hdev: pointer to device this CB belongs to.
+ * @lock: spinlock to protect mmap/cs flows.
+ * @debugfs_list: node in debugfs list of command buffers.
+ * @pool_list: node in pool list of command buffers.
+ * @kernel_address: Holds the CB's kernel virtual address.
+ * @bus_address: Holds the CB's DMA address.
+ * @mmap_size: Holds the CB's size that was mmaped.
+ * @size: holds the CB's size.
+ * @id: the CB's ID.
+ * @cs_cnt: holds number of CS that this CB participates in.
+ * @ctx_id: holds the ID of the owner's context.
+ * @mmap: true if the CB is currently mmaped to user.
+ * @is_pool: true if CB was acquired from the pool, false otherwise.
+ */
+struct hl_cb {
+       struct kref             refcount;
+       struct hl_device        *hdev;
+       spinlock_t              lock;
+       struct list_head        debugfs_list;
+       struct list_head        pool_list;
+       u64                     kernel_address;
+       dma_addr_t              bus_address;
+       u32                     mmap_size;
+       u32                     size;
+       u32                     id;
+       u32                     cs_cnt;
+       u32                     ctx_id;
+       u8                      mmap;
+       u8                      is_pool;
+};
+
+
+/*
+ * QUEUES
+ */
+
+struct hl_cs_job;
+
+/* Queue length of external and HW queues */
+#define HL_QUEUE_LENGTH                        4096
+#define HL_QUEUE_SIZE_IN_BYTES         (HL_QUEUE_LENGTH * HL_BD_SIZE)
+
+#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
+#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
+#endif
+
+/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
+#define HL_CQ_LENGTH                   HL_QUEUE_LENGTH
+#define HL_CQ_SIZE_IN_BYTES            (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
+
+/* Must be power of 2 */
+#define HL_EQ_LENGTH                   64
+#define HL_EQ_SIZE_IN_BYTES            (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
+
+/* Host <-> ArmCP shared memory size */
+#define HL_CPU_ACCESSIBLE_MEM_SIZE     SZ_2M
+
+/**
+ * struct hl_hw_queue - describes a H/W transport queue.
+ * @hw_sob: array of the used H/W SOBs by this H/W queue.
+ * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
+ * @queue_type: type of queue.
+ * @kernel_address: holds the queue's kernel virtual address.
+ * @bus_address: holds the queue's DMA address.
+ * @pi: holds the queue's pi value.
+ * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
+ * @hw_queue_id: the id of the H/W queue.
+ * @cq_id: the id for the corresponding CQ for this H/W queue.
+ * @msi_vec: the IRQ number of the H/W queue.
+ * @int_queue_len: length of internal queue (number of entries).
+ * @next_sob_val: the next value to use for the currently used SOB.
+ * @base_sob_id: the base SOB id of the SOBs used by this queue.
+ * @base_mon_id: the base MON id of the MONs used by this queue.
+ * @valid: is the queue valid (we have array of 32 queues, not all of them
+ *         exist).
+ * @curr_sob_offset: the id offset to the currently used SOB from the
+ *                   HL_RSVD_SOBS that are being used by this queue.
+ * @supports_sync_stream: True if queue supports sync stream
+ */
+struct hl_hw_queue {
+       struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
+       struct hl_cs_job        **shadow_queue;
+       enum hl_queue_type      queue_type;
+       u64                     kernel_address;
+       dma_addr_t              bus_address;
+       u32                     pi;
+       atomic_t                ci;
+       u32                     hw_queue_id;
+       u32                     cq_id;
+       u32                     msi_vec;
+       u16                     int_queue_len;
+       u16                     next_sob_val;
+       u16                     base_sob_id;
+       u16                     base_mon_id;
+       u8                      valid;
+       u8                      curr_sob_offset;
+       u8                      supports_sync_stream;
+};
+
+/**
+ * struct hl_cq - describes a completion queue
+ * @hdev: pointer to the device structure
+ * @kernel_address: holds the queue's kernel virtual address
+ * @bus_address: holds the queue's DMA address
+ * @cq_idx: completion queue index in array
+ * @hw_queue_id: the id of the matching H/W queue
+ * @ci: ci inside the queue
+ * @pi: pi inside the queue
+ * @free_slots_cnt: counter of free slots in queue
+ */
+struct hl_cq {
+       struct hl_device        *hdev;
+       u64                     kernel_address;
+       dma_addr_t              bus_address;
+       u32                     cq_idx;
+       u32                     hw_queue_id;
+       u32                     ci;
+       u32                     pi;
+       atomic_t                free_slots_cnt;
+};
+
+/**
+ * struct hl_eq - describes the event queue (single one per device)
+ * @hdev: pointer to the device structure
+ * @kernel_address: holds the queue's kernel virtual address
+ * @bus_address: holds the queue's DMA address
+ * @ci: ci inside the queue
+ */
+struct hl_eq {
+       struct hl_device        *hdev;
+       u64                     kernel_address;
+       dma_addr_t              bus_address;
+       u32                     ci;
+};
+
+
+/*
+ * ASICs
+ */
+
+/**
+ * enum hl_asic_type - supported ASIC types.
+ * @ASIC_INVALID: Invalid ASIC type.
+ * @ASIC_GOYA: Goya device.
+ * @ASIC_GAUDI: Gaudi device.
+ */
+enum hl_asic_type {
+       ASIC_INVALID,
+       ASIC_GOYA,
+       ASIC_GAUDI
+};
+
+struct hl_cs_parser;
+
+/**
+ * enum hl_pm_mng_profile - power management profile.
+ * @PM_AUTO: internal clock is set by the Linux driver.
+ * @PM_MANUAL: internal clock is set by the user.
+ * @PM_LAST: last power management type.
+ */
+enum hl_pm_mng_profile {
+       PM_AUTO = 1,
+       PM_MANUAL,
+       PM_LAST
+};
+
+/**
+ * enum hl_pll_frequency - PLL frequency.
+ * @PLL_HIGH: high frequency.
+ * @PLL_LOW: low frequency.
+ * @PLL_LAST: last frequency values that were configured by the user.
+ */
+enum hl_pll_frequency {
+       PLL_HIGH = 1,
+       PLL_LOW,
+       PLL_LAST
+};
+
+#define PLL_REF_CLK 50
+
+enum div_select_defs {
+       DIV_SEL_REF_CLK = 0,
+       DIV_SEL_PLL_CLK = 1,
+       DIV_SEL_DIVIDED_REF = 2,
+       DIV_SEL_DIVIDED_PLL = 3,
+};
+
+/**
+ * struct hl_asic_funcs - ASIC specific functions that are can be called from
+ *                        common code.
+ * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
+ * @early_fini: tears down what was done in early_init.
+ * @late_init: sets up late driver/hw state (post hw_init) - Optional.
+ * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
+ * @sw_init: sets up driver state, does not configure H/W.
+ * @sw_fini: tears down driver state, does not configure H/W.
+ * @hw_init: sets up the H/W state.
+ * @hw_fini: tears down the H/W state.
+ * @halt_engines: halt engines, needed for reset sequence. This also disables
+ *                interrupts from the device. Should be called before
+ *                hw_fini and before CS rollback.
+ * @suspend: handles IP specific H/W or SW changes for suspend.
+ * @resume: handles IP specific H/W or SW changes for resume.
+ * @cb_mmap: maps a CB.
+ * @ring_doorbell: increment PI on a given QMAN.
+ * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
+ *             function because the PQs are located in different memory areas
+ *             per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
+ *             writing the PQE must match the destination memory area
+ *             properties.
+ * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
+ *                           dma_alloc_coherent(). This is ASIC function because
+ *                           its implementation is not trivial when the driver
+ *                           is loaded in simulation mode (not upstreamed).
+ * @asic_dma_free_coherent:  Free coherent DMA memory by calling
+ *                           dma_free_coherent(). This is ASIC function because
+ *                           its implementation is not trivial when the driver
+ *                           is loaded in simulation mode (not upstreamed).
+ * @get_int_queue_base: get the internal queue base address.
+ * @test_queues: run simple test on all queues for sanity check.
+ * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
+ *                        size of allocation is HL_DMA_POOL_BLK_SIZE.
+ * @asic_dma_pool_free: free small DMA allocation from pool.
+ * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
+ * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
+ * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ * @cs_parser: parse Command Submission.
+ * @asic_dma_map_sg: DMA map scatter-gather list.
+ * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
+ * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
+ * @update_eq_ci: update event queue CI.
+ * @context_switch: called upon ASID context switch.
+ * @restore_phase_topology: clear all SOBs amd MONs.
+ * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
+ * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
+ * @add_device_attr: add ASIC specific device attributes.
+ * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
+ * @set_pll_profile: change PLL profile (manual/automatic).
+ * @get_events_stat: retrieve event queue entries histogram.
+ * @read_pte: read MMU page table entry from DRAM.
+ * @write_pte: write MMU page table entry to DRAM.
+ * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
+ *                        (L1 only) or hard (L0 & L1) flush.
+ * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
+ *                              ASID-VA-size mask.
+ * @send_heartbeat: send is-alive packet to ArmCP and verify response.
+ * @enable_clock_gating: enable clock gating for reducing power consumption.
+ * @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @debug_coresight: perform certain actions on Coresight for debugging.
+ * @is_device_idle: return true if device is idle, false otherwise.
+ * @soft_reset_late_init: perform certain actions needed after soft reset.
+ * @hw_queues_lock: acquire H/W queues lock.
+ * @hw_queues_unlock: release H/W queues lock.
+ * @get_pci_id: retrieve PCI ID.
+ * @get_eeprom_data: retrieve EEPROM data from F/W.
+ * @send_cpu_message: send buffer to ArmCP.
+ * @get_hw_state: retrieve the H/W state
+ * @pci_bars_map: Map PCI BARs.
+ * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
+ *                     old address the bar pointed to or U64_MAX for failure
+ * @init_iatu: Initialize the iATU unit inside the PCI controller.
+ * @rreg: Read a register. Needed for simulator support.
+ * @wreg: Write a register. Needed for simulator support.
+ * @halt_coresight: stop the ETF and ETR traces.
+ * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
+ * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
+ * @read_device_fw_version: read the device's firmware versions that are
+ *                          contained in registers
+ * @load_firmware_to_device: load the firmware to the device's memory
+ * @load_boot_fit_to_device: load boot fit to device's memory
+ * @get_signal_cb_size: Get signal CB size.
+ * @get_wait_cb_size: Get wait CB size.
+ * @gen_signal_cb: Generate a signal CB.
+ * @gen_wait_cb: Generate a wait CB.
+ * @reset_sob: Reset a SOB.
+ * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
+ *                        firmware configuration
+ * @get_device_time: Get the device time.
+ */
+struct hl_asic_funcs {
+       int (*early_init)(struct hl_device *hdev);
+       int (*early_fini)(struct hl_device *hdev);
+       int (*late_init)(struct hl_device *hdev);
+       void (*late_fini)(struct hl_device *hdev);
+       int (*sw_init)(struct hl_device *hdev);
+       int (*sw_fini)(struct hl_device *hdev);
+       int (*hw_init)(struct hl_device *hdev);
+       void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
+       void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+       int (*suspend)(struct hl_device *hdev);
+       int (*resume)(struct hl_device *hdev);
+       int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+                       u64 kaddress, phys_addr_t paddress, u32 size);
+       void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
+       void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
+                       struct hl_bd *bd);
+       void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
+                                       dma_addr_t *dma_handle, gfp_t flag);
+       void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
+                                       void *cpu_addr, dma_addr_t dma_handle);
+       void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
+                               dma_addr_t *dma_handle, u16 *queue_len);
+       int (*test_queues)(struct hl_device *hdev);
+       void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
+                               gfp_t mem_flags, dma_addr_t *dma_handle);
+       void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
+                               dma_addr_t dma_addr);
+       void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
+                               size_t size, dma_addr_t *dma_handle);
+       void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
+                               size_t size, void *vaddr);
+       void (*hl_dma_unmap_sg)(struct hl_device *hdev,
+                               struct scatterlist *sgl, int nents,
+                               enum dma_data_direction dir);
+       int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
+       int (*asic_dma_map_sg)(struct hl_device *hdev,
+                               struct scatterlist *sgl, int nents,
+                               enum dma_data_direction dir);
+       u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
+                                       struct sg_table *sgt);
+       void (*add_end_of_cb_packets)(struct hl_device *hdev,
+                                       u64 kernel_address, u32 len,
+                                       u64 cq_addr, u32 cq_val, u32 msix_num,
+                                       bool eb);
+       void (*update_eq_ci)(struct hl_device *hdev, u32 val);
+       int (*context_switch)(struct hl_device *hdev, u32 asid);
+       void (*restore_phase_topology)(struct hl_device *hdev);
+       int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
+       int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
+       int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
+       int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
+       void (*add_device_attr)(struct hl_device *hdev,
+                               struct attribute_group *dev_attr_grp);
+       void (*handle_eqe)(struct hl_device *hdev,
+                               struct hl_eq_entry *eq_entry);
+       void (*set_pll_profile)(struct hl_device *hdev,
+                       enum hl_pll_frequency freq);
+       void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
+                               u32 *size);
+       u64 (*read_pte)(struct hl_device *hdev, u64 addr);
+       void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
+       int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
+                                       u32 flags);
+       int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
+                       u32 asid, u64 va, u64 size);
+       int (*send_heartbeat)(struct hl_device *hdev);
+       void (*enable_clock_gating)(struct hl_device *hdev);
+       void (*disable_clock_gating)(struct hl_device *hdev);
+       int (*debug_coresight)(struct hl_device *hdev, void *data);
+       bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+                               struct seq_file *s);
+       int (*soft_reset_late_init)(struct hl_device *hdev);
+       void (*hw_queues_lock)(struct hl_device *hdev);
+       void (*hw_queues_unlock)(struct hl_device *hdev);
+       u32 (*get_pci_id)(struct hl_device *hdev);
+       int (*get_eeprom_data)(struct hl_device *hdev, void *data,
+                               size_t max_size);
+       int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
+                               u16 len, u32 timeout, long *result);
+       enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
+       int (*pci_bars_map)(struct hl_device *hdev);
+       u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+       int (*init_iatu)(struct hl_device *hdev);
+       u32 (*rreg)(struct hl_device *hdev, u32 reg);
+       void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
+       void (*halt_coresight)(struct hl_device *hdev);
+       int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+       u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
+       void (*read_device_fw_version)(struct hl_device *hdev,
+                                       enum hl_fw_component fwc);
+       int (*load_firmware_to_device)(struct hl_device *hdev);
+       int (*load_boot_fit_to_device)(struct hl_device *hdev);
+       u32 (*get_signal_cb_size)(struct hl_device *hdev);
+       u32 (*get_wait_cb_size)(struct hl_device *hdev);
+       void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
+       void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
+                               u16 sob_val, u16 mon_id, u32 q_idx);
+       void (*reset_sob)(struct hl_device *hdev, void *data);
+       void (*set_dma_mask_from_fw)(struct hl_device *hdev);
+       u64 (*get_device_time)(struct hl_device *hdev);
+};
+
+
+/*
+ * CONTEXTS
+ */
+
+#define HL_KERNEL_ASID_ID      0
+
+/**
+ * struct hl_va_range - virtual addresses range.
+ * @lock: protects the virtual addresses list.
+ * @list: list of virtual addresses blocks available for mappings.
+ * @start_addr: range start address.
+ * @end_addr: range end address.
+ */
+struct hl_va_range {
+       struct mutex            lock;
+       struct list_head        list;
+       u64                     start_addr;
+       u64                     end_addr;
+};
+
+/**
+ * struct hl_ctx - user/kernel context.
+ * @mem_hash: holds mapping from virtual address to virtual memory area
+ *             descriptor (hl_vm_phys_pg_list or hl_userptr).
+ * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
+ * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
+ * @hdev: pointer to the device structure.
+ * @refcount: reference counter for the context. Context is released only when
+ *             this hits 0l. It is incremented on CS and CS_WAIT.
+ * @cs_pending: array of DMA fence objects representing pending CS.
+ * @host_va_range: holds available virtual addresses for host mappings.
+ * @host_huge_va_range: holds available virtual addresses for host mappings
+ *                      with huge pages.
+ * @dram_va_range: holds available virtual addresses for DRAM mappings.
+ * @mem_hash_lock: protects the mem_hash.
+ * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
+ *            MMU hash or walking the PGT requires talking this lock.
+ * @debugfs_list: node in debugfs list of contexts.
+ * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
+ *                     to user so user could inquire about CS. It is used as
+ *                     index to cs_pending array.
+ * @dram_default_hops: array that holds all hops addresses needed for default
+ *                     DRAM mapping.
+ * @cs_lock: spinlock to protect cs_sequence.
+ * @dram_phys_mem: amount of used physical DRAM memory by this context.
+ * @thread_ctx_switch_token: token to prevent multiple threads of the same
+ *                             context from running the context switch phase.
+ *                             Only a single thread should run it.
+ * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
+ *                             the context switch phase from moving to their
+ *                             execution phase before the context switch phase
+ *                             has finished.
+ * @asid: context's unique address space ID in the device's MMU.
+ * @handle: context's opaque handle for user
+ */
+struct hl_ctx {
+       DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
+       DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
+       struct hl_fpriv         *hpriv;
+       struct hl_device        *hdev;
+       struct kref             refcount;
+       struct dma_fence        **cs_pending;
+       struct hl_va_range      *host_va_range;
+       struct hl_va_range      *host_huge_va_range;
+       struct hl_va_range      *dram_va_range;
+       struct mutex            mem_hash_lock;
+       struct mutex            mmu_lock;
+       struct list_head        debugfs_list;
+       struct hl_cs_counters   cs_counters;
+       u64                     cs_sequence;
+       u64                     *dram_default_hops;
+       spinlock_t              cs_lock;
+       atomic64_t              dram_phys_mem;
+       atomic_t                thread_ctx_switch_token;
+       u32                     thread_ctx_switch_wait_token;
+       u32                     asid;
+       u32                     handle;
+};
+
+/**
+ * struct hl_ctx_mgr - for handling multiple contexts.
+ * @ctx_lock: protects ctx_handles.
+ * @ctx_handles: idr to hold all ctx handles.
+ */
+struct hl_ctx_mgr {
+       struct mutex            ctx_lock;
+       struct idr              ctx_handles;
+};
+
+
+
+/*
+ * COMMAND SUBMISSIONS
+ */
+
+/**
+ * struct hl_userptr - memory mapping chunk information
+ * @vm_type: type of the VM.
+ * @job_node: linked-list node for hanging the object on the Job's list.
+ * @vec: pointer to the frame vector.
+ * @sgt: pointer to the scatter-gather table that holds the pages.
+ * @dir: for DMA unmapping, the direction must be supplied, so save it.
+ * @debugfs_list: node in debugfs list of command submissions.
+ * @addr: user-space virtual address of the start of the memory area.
+ * @size: size of the memory area to pin & map.
+ * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
+ */
+struct hl_userptr {
+       enum vm_type_t          vm_type; /* must be first */
+       struct list_head        job_node;
+       struct frame_vector     *vec;
+       struct sg_table         *sgt;
+       enum dma_data_direction dir;
+       struct list_head        debugfs_list;
+       u64                     addr;
+       u32                     size;
+       u8                      dma_mapped;
+};
+
+/**
+ * struct hl_cs - command submission.
+ * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
+ * @ctx: the context this CS belongs to.
+ * @job_list: list of the CS's jobs in the various queues.
+ * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
+ * @refcount: reference counter for usage of the CS.
+ * @fence: pointer to the fence object of this CS.
+ * @signal_fence: pointer to the fence object of the signal CS (used by wait
+ *                CS only).
+ * @finish_work: workqueue object to run when CS is completed by H/W.
+ * @work_tdr: delayed work node for TDR.
+ * @mirror_node : node in device mirror list of command submissions.
+ * @debugfs_list: node in debugfs list of command submissions.
+ * @sequence: the sequence number of this CS.
+ * @type: CS_TYPE_*.
+ * @submitted: true if CS was submitted to H/W.
+ * @completed: true if CS was completed by device.
+ * @timedout : true if CS was timedout.
+ * @tdr_active: true if TDR was activated for this CS (to prevent
+ *             double TDR activation).
+ * @aborted: true if CS was aborted due to some device error.
+ */
+struct hl_cs {
+       u16                     *jobs_in_queue_cnt;
+       struct hl_ctx           *ctx;
+       struct list_head        job_list;
+       spinlock_t              job_lock;
+       struct kref             refcount;
+       struct dma_fence        *fence;
+       struct dma_fence        *signal_fence;
+       struct work_struct      finish_work;
+       struct delayed_work     work_tdr;
+       struct list_head        mirror_node;
+       struct list_head        debugfs_list;
+       u64                     sequence;
+       enum hl_cs_type         type;
+       u8                      submitted;
+       u8                      completed;
+       u8                      timedout;
+       u8                      tdr_active;
+       u8                      aborted;
+};
+
+/**
+ * struct hl_cs_job - command submission job.
+ * @cs_node: the node to hang on the CS jobs list.
+ * @cs: the CS this job belongs to.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ *             the queue instead of the CB we got from the IOCTL.
+ * @finish_work: workqueue object to run when job is completed.
+ * @userptr_list: linked-list of userptr mappings that belong to this job and
+ *                     wait for completion.
+ * @debugfs_list: node in debugfs list of command submission jobs.
+ * @queue_type: the type of the H/W queue this job is submitted to.
+ * @id: the id of this job inside a CS.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ *                          handle to a kernel-allocated CB object, false
+ *                          otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ *                    info is needed later, when adding the 2xMSG_PROT at the
+ *                    end of the JOB, to know which barriers to put in the
+ *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ *                    have streams so the engine can't be busy by another
+ *                    stream.
+ */
+struct hl_cs_job {
+       struct list_head        cs_node;
+       struct hl_cs            *cs;
+       struct hl_cb            *user_cb;
+       struct hl_cb            *patched_cb;
+       struct work_struct      finish_work;
+       struct list_head        userptr_list;
+       struct list_head        debugfs_list;
+       enum hl_queue_type      queue_type;
+       u32                     id;
+       u32                     hw_queue_id;
+       u32                     user_cb_size;
+       u32                     job_cb_size;
+       u8                      is_kernel_allocated_cb;
+       u8                      contains_dma_pkt;
+};
+
+/**
+ * struct hl_cs_parser - command submission parser properties.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ *             the queue instead of the CB we got from the IOCTL.
+ * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ *                     job and wait for completion.
+ * @cs_sequence: the sequence number of the related CS.
+ * @queue_type: the type of the H/W queue this job is submitted to.
+ * @ctx_id: the ID of the context the related CS belongs to.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @patched_cb_size: the size of the CB after parsing.
+ * @job_id: the id of the related job inside the related CS.
+ * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ *                          handle to a kernel-allocated CB object, false
+ *                          otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ *                    info is needed later, when adding the 2xMSG_PROT at the
+ *                    end of the JOB, to know which barriers to put in the
+ *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ *                    have streams so the engine can't be busy by another
+ *                    stream.
+ */
+struct hl_cs_parser {
+       struct hl_cb            *user_cb;
+       struct hl_cb            *patched_cb;
+       struct list_head        *job_userptr_list;
+       u64                     cs_sequence;
+       enum hl_queue_type      queue_type;
+       u32                     ctx_id;
+       u32                     hw_queue_id;
+       u32                     user_cb_size;
+       u32                     patched_cb_size;
+       u8                      job_id;
+       u8                      is_kernel_allocated_cb;
+       u8                      contains_dma_pkt;
+};
+
+
+/*
+ * MEMORY STRUCTURE
+ */
+
+/**
+ * struct hl_vm_hash_node - hash element from virtual address to virtual
+ *                             memory area descriptor (hl_vm_phys_pg_list or
+ *                             hl_userptr).
+ * @node: node to hang on the hash table in context object.
+ * @vaddr: key virtual address.
+ * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
+ */
+struct hl_vm_hash_node {
+       struct hlist_node       node;
+       u64                     vaddr;
+       void                    *ptr;
+};
+
+/**
+ * struct hl_vm_phys_pg_pack - physical page pack.
+ * @vm_type: describes the type of the virtual area descriptor.
+ * @pages: the physical page array.
+ * @npages: num physical pages in the pack.
+ * @total_size: total size of all the pages in this list.
+ * @mapping_cnt: number of shared mappings.
+ * @asid: the context related to this list.
+ * @page_size: size of each page in the pack.
+ * @flags: HL_MEM_* flags related to this list.
+ * @handle: the provided handle related to this list.
+ * @offset: offset from the first page.
+ * @contiguous: is contiguous physical memory.
+ * @created_from_userptr: is product of host virtual address.
+ */
+struct hl_vm_phys_pg_pack {
+       enum vm_type_t          vm_type; /* must be first */
+       u64                     *pages;
+       u64                     npages;
+       u64                     total_size;
+       atomic_t                mapping_cnt;
+       u32                     asid;
+       u32                     page_size;
+       u32                     flags;
+       u32                     handle;
+       u32                     offset;
+       u8                      contiguous;
+       u8                      created_from_userptr;
+};
+
+/**
+ * struct hl_vm_va_block - virtual range block information.
+ * @node: node to hang on the virtual range list in context object.
+ * @start: virtual range start address.
+ * @end: virtual range end address.
+ * @size: virtual range size.
+ */
+struct hl_vm_va_block {
+       struct list_head        node;
+       u64                     start;
+       u64                     end;
+       u64                     size;
+};
+
+/**
+ * struct hl_vm - virtual memory manager for MMU.
+ * @dram_pg_pool: pool for DRAM physical pages of 2MB.
+ * @dram_pg_pool_refcount: reference counter for the pool usage.
+ * @idr_lock: protects the phys_pg_list_handles.
+ * @phys_pg_pack_handles: idr to hold all device allocations handles.
+ * @init_done: whether initialization was done. We need this because VM
+ *             initialization might be skipped during device initialization.
+ */
+struct hl_vm {
+       struct gen_pool         *dram_pg_pool;
+       struct kref             dram_pg_pool_refcount;
+       spinlock_t              idr_lock;
+       struct idr              phys_pg_pack_handles;
+       u8                      init_done;
+};
+
+
+/*
+ * DEBUG, PROFILING STRUCTURE
+ */
+
+/**
+ * struct hl_debug_params - Coresight debug parameters.
+ * @input: pointer to component specific input parameters.
+ * @output: pointer to component specific output parameters.
+ * @output_size: size of output buffer.
+ * @reg_idx: relevant register ID.
+ * @op: component operation to execute.
+ * @enable: true if to enable component debugging, false otherwise.
+ */
+struct hl_debug_params {
+       void *input;
+       void *output;
+       u32 output_size;
+       u32 reg_idx;
+       u32 op;
+       bool enable;
+};
+
+/*
+ * FILE PRIVATE STRUCTURE
+ */
+
+/**
+ * struct hl_fpriv - process information stored in FD private data.
+ * @hdev: habanalabs device structure.
+ * @filp: pointer to the given file structure.
+ * @taskpid: current process ID.
+ * @ctx: current executing context. TODO: remove for multiple ctx per process
+ * @ctx_mgr: context manager to handle multiple context for this FD.
+ * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
+ * @debugfs_list: list of relevant ASIC debugfs.
+ * @dev_node: node in the device list of file private data
+ * @refcount: number of related contexts.
+ * @restore_phase_mutex: lock for context switch and restore phase.
+ * @is_control: true for control device, false otherwise
+ */
+struct hl_fpriv {
+       struct hl_device        *hdev;
+       struct file             *filp;
+       struct pid              *taskpid;
+       struct hl_ctx           *ctx;
+       struct hl_ctx_mgr       ctx_mgr;
+       struct hl_cb_mgr        cb_mgr;
+       struct list_head        debugfs_list;
+       struct list_head        dev_node;
+       struct kref             refcount;
+       struct mutex            restore_phase_mutex;
+       u8                      is_control;
+};
+
+
+/*
+ * DebugFS
+ */
+
+/**
+ * struct hl_info_list - debugfs file ops.
+ * @name: file name.
+ * @show: function to output information.
+ * @write: function to write to the file.
+ */
+struct hl_info_list {
+       const char      *name;
+       int             (*show)(struct seq_file *s, void *data);
+       ssize_t         (*write)(struct file *file, const char __user *buf,
+                               size_t count, loff_t *f_pos);
+};
+
+/**
+ * struct hl_debugfs_entry - debugfs dentry wrapper.
+ * @dent: base debugfs entry structure.
+ * @info_ent: dentry realted ops.
+ * @dev_entry: ASIC specific debugfs manager.
+ */
+struct hl_debugfs_entry {
+       struct dentry                   *dent;
+       const struct hl_info_list       *info_ent;
+       struct hl_dbg_device_entry      *dev_entry;
+};
+
+/**
+ * struct hl_dbg_device_entry - ASIC specific debugfs manager.
+ * @root: root dentry.
+ * @hdev: habanalabs device structure.
+ * @entry_arr: array of available hl_debugfs_entry.
+ * @file_list: list of available debugfs files.
+ * @file_mutex: protects file_list.
+ * @cb_list: list of available CBs.
+ * @cb_spinlock: protects cb_list.
+ * @cs_list: list of available CSs.
+ * @cs_spinlock: protects cs_list.
+ * @cs_job_list: list of available CB jobs.
+ * @cs_job_spinlock: protects cs_job_list.
+ * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
+ * @userptr_spinlock: protects userptr_list.
+ * @ctx_mem_hash_list: list of available contexts with MMU mappings.
+ * @ctx_mem_hash_spinlock: protects cb_list.
+ * @addr: next address to read/write from/to in read/write32.
+ * @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ * @mmu_asid: ASID to use while translating in mmu_show.
+ * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
+ * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
+ * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
+ */
+struct hl_dbg_device_entry {
+       struct dentry                   *root;
+       struct hl_device                *hdev;
+       struct hl_debugfs_entry         *entry_arr;
+       struct list_head                file_list;
+       struct mutex                    file_mutex;
+       struct list_head                cb_list;
+       spinlock_t                      cb_spinlock;
+       struct list_head                cs_list;
+       spinlock_t                      cs_spinlock;
+       struct list_head                cs_job_list;
+       spinlock_t                      cs_job_spinlock;
+       struct list_head                userptr_list;
+       spinlock_t                      userptr_spinlock;
+       struct list_head                ctx_mem_hash_list;
+       spinlock_t                      ctx_mem_hash_spinlock;
+       u64                             addr;
+       u64                             mmu_addr;
+       u32                             mmu_asid;
+       u8                              i2c_bus;
+       u8                              i2c_addr;
+       u8                              i2c_reg;
+};
+
+
+/*
+ * DEVICES
+ */
+
+/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
+ * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
+ */
+#define HL_MAX_MINORS  256
+
+/*
+ * Registers read & write functions.
+ */
+
+u32 hl_rreg(struct hl_device *hdev, u32 reg);
+void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
+
+#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
+#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
+#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n",   \
+                       hdev->asic_funcs->rreg(hdev, (reg)))
+
+#define WREG32_P(reg, val, mask)                               \
+       do {                                                    \
+               u32 tmp_ = RREG32(reg);                         \
+               tmp_ &= (mask);                                 \
+               tmp_ |= ((val) & ~(mask));                      \
+               WREG32(reg, tmp_);                              \
+       } while (0)
+#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
+
+#define RMWREG32(reg, val, mask)                               \
+       do {                                                    \
+               u32 tmp_ = RREG32(reg);                         \
+               tmp_ &= ~(mask);                                \
+               tmp_ |= ((val) << __ffs(mask));                 \
+               WREG32(reg, tmp_);                              \
+       } while (0)
+
+#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
+
+#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
+#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
+#define WREG32_FIELD(reg, offset, field, val)  \
+       WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
+                               ~REG_FIELD_MASK(reg, field)) | \
+                               (val) << REG_FIELD_SHIFT(reg, field))
+
+/* Timeout should be longer when working with simulator but cap the
+ * increased timeout to some maximum
+ */
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+({ \
+       ktime_t __timeout; \
+       if (hdev->pdev) \
+               __timeout = ktime_add_us(ktime_get(), timeout_us); \
+       else \
+               __timeout = ktime_add_us(ktime_get(),\
+                               min((u64)(timeout_us * 10), \
+                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
+       might_sleep_if(sleep_us); \
+       for (;;) { \
+               (val) = RREG32(addr); \
+               if (cond) \
+                       break; \
+               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+                       (val) = RREG32(addr); \
+                       break; \
+               } \
+               if (sleep_us) \
+                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
+       } \
+       (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/*
+ * address in this macro points always to a memory location in the
+ * host's (server's) memory. That location is updated asynchronously
+ * either by the direct access of the device or by another core.
+ *
+ * To work both in LE and BE architectures, we need to distinguish between the
+ * two states (device or another core updates the memory location). Therefore,
+ * if mem_written_by_device is true, the host memory being polled will be
+ * updated directly by the device. If false, the host memory being polled will
+ * be updated by host CPU. Required so host knows whether or not the memory
+ * might need to be byte-swapped before returning value to caller.
+ */
+#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
+                               mem_written_by_device) \
+({ \
+       ktime_t __timeout; \
+       if (hdev->pdev) \
+               __timeout = ktime_add_us(ktime_get(), timeout_us); \
+       else \
+               __timeout = ktime_add_us(ktime_get(),\
+                               min((u64)(timeout_us * 10), \
+                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
+       might_sleep_if(sleep_us); \
+       for (;;) { \
+               /* Verify we read updates done by other cores or by device */ \
+               mb(); \
+               (val) = *((u32 *) (uintptr_t) (addr)); \
+               if (mem_written_by_device) \
+                       (val) = le32_to_cpu(*(__le32 *) &(val)); \
+               if (cond) \
+                       break; \
+               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+                       (val) = *((u32 *) (uintptr_t) (addr)); \
+                       if (mem_written_by_device) \
+                               (val) = le32_to_cpu(*(__le32 *) &(val)); \
+                       break; \
+               } \
+               if (sleep_us) \
+                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
+       } \
+       (cond) ? 0 : -ETIMEDOUT; \
+})
+
+#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
+                                       timeout_us) \
+({ \
+       ktime_t __timeout; \
+       if (hdev->pdev) \
+               __timeout = ktime_add_us(ktime_get(), timeout_us); \
+       else \
+               __timeout = ktime_add_us(ktime_get(),\
+                               min((u64)(timeout_us * 10), \
+                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
+       might_sleep_if(sleep_us); \
+       for (;;) { \
+               (val) = readl(addr); \
+               if (cond) \
+                       break; \
+               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+                       (val) = readl(addr); \
+                       break; \
+               } \
+               if (sleep_us) \
+                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
+       } \
+       (cond) ? 0 : -ETIMEDOUT; \
+})
+
+struct hwmon_chip_info;
+
+/**
+ * struct hl_device_reset_work - reset workqueue task wrapper.
+ * @reset_work: reset work to be done.
+ * @hdev: habanalabs device structure.
+ */
+struct hl_device_reset_work {
+       struct work_struct              reset_work;
+       struct hl_device                *hdev;
+};
+
+/**
+ * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
+ * @idle_to_busy_ts: timestamp where device changed from idle to busy.
+ * @busy_to_idle_ts: timestamp where device changed from busy to idle.
+ */
+struct hl_device_idle_busy_ts {
+       ktime_t                         idle_to_busy_ts;
+       ktime_t                         busy_to_idle_ts;
+};
+
+/**
+ * struct hl_device - habanalabs device structure.
+ * @pdev: pointer to PCI device, can be NULL in case of simulator device.
+ * @pcie_bar_phys: array of available PCIe bars physical addresses.
+ *                (required only for PCI address match mode)
+ * @pcie_bar: array of available PCIe bars virtual addresses.
+ * @rmmio: configuration area address on SRAM.
+ * @cdev: related char device.
+ * @cdev_ctrl: char device for control operations only (INFO IOCTL)
+ * @dev: related kernel basic device structure.
+ * @dev_ctrl: related kernel device structure for the control device
+ * @work_freq: delayed work to lower device frequency if possible.
+ * @work_heartbeat: delayed work for ArmCP is-alive check.
+ * @asic_name: ASIC specific nmae.
+ * @asic_type: ASIC specific type.
+ * @completion_queue: array of hl_cq.
+ * @cq_wq: work queues of completion queues for executing work in process
+ *         context.
+ * @eq_wq: work queue of event queue for executing work in process context.
+ * @kernel_ctx: Kernel driver context structure.
+ * @kernel_queues: array of hl_hw_queue.
+ * @hw_queues_mirror_list: CS mirror list for TDR.
+ * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
+ * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
+ * @event_queue: event queue for IRQ from ArmCP.
+ * @dma_pool: DMA pool for small allocations.
+ * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
+ * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
+ * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
+ * @asid_bitmap: holds used/available ASIDs.
+ * @asid_mutex: protects asid_bitmap.
+ * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
+ * @debug_lock: protects critical section of setting debug mode for device
+ * @asic_prop: ASIC specific immutable properties.
+ * @asic_funcs: ASIC specific functions.
+ * @asic_specific: ASIC specific information to use only from ASIC files.
+ * @mmu_pgt_pool: pool of available MMU hops.
+ * @vm: virtual memory manager for MMU.
+ * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
+ * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
+ * @hwmon_dev: H/W monitor device.
+ * @pm_mng_profile: current power management profile.
+ * @hl_chip_info: ASIC's sensors information.
+ * @hl_debugfs: device's debugfs manager.
+ * @cb_pool: list of preallocated CBs.
+ * @cb_pool_lock: protects the CB pool.
+ * @fpriv_list: list of file private data structures. Each structure is created
+ *              when a user opens the device
+ * @fpriv_list_lock: protects the fpriv_list
+ * @compute_ctx: current compute context executing.
+ * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
+ *                    and vice-versa
+ * @aggregated_cs_counters: aggregated cs counters among all contexts
+ * @dram_used_mem: current DRAM memory consumption.
+ * @timeout_jiffies: device CS timeout value.
+ * @max_power: the max power of the device, as configured by the sysadmin. This
+ *             value is saved so in case of hard-reset, the driver will restore
+ *             this value and update the F/W after the re-initialization
+ * @in_reset: is device in reset flow.
+ * @curr_pll_profile: current PLL profile.
+ * @cs_active_cnt: number of active command submissions on this device (active
+ *                 means already in H/W queues)
+ * @major: habanalabs kernel driver major.
+ * @high_pll: high PLL profile frequency.
+ * @soft_reset_cnt: number of soft reset since the driver was loaded.
+ * @hard_reset_cnt: number of hard reset since the driver was loaded.
+ * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ * @id: device minor.
+ * @id_control: minor of the control device
+ * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
+ *                    addresses.
+ * @disabled: is device disabled.
+ * @late_init_done: is late init stage was done during initialization.
+ * @hwmon_initialized: is H/W monitor sensors was initialized.
+ * @hard_reset_pending: is there a hard reset work pending.
+ * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
+ * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
+ *                   otherwise.
+ * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
+ * @dram_default_page_mapping: is DRAM default page mapping enabled.
+ * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
+ *                   huge pages.
+ * @init_done: is the initialization of the device done.
+ * @mmu_enable: is MMU enabled.
+ * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
+ * @clock_gating: is clock gating enabled.
+ * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
+ * @dma_mask: the dma mask that was set for this device
+ * @in_debug: is device under debug. This, together with fpriv_list, enforces
+ *            that only a single user is configuring the debug infrastructure.
+ * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
+ *                           only to POWER9 machines.
+ * @cdev_sysfs_created: were char devices and sysfs nodes created.
+ * @stop_on_err: true if engines should stop on error.
+ * @supports_sync_stream: is sync stream supported.
+ * @sync_stream_queue_idx: helper index for sync stream queues initialization.
+ * @supports_coresight: is CoreSight supported.
+ * @supports_soft_reset: is soft reset supported.
+ */
+struct hl_device {
+       struct pci_dev                  *pdev;
+       u64                             pcie_bar_phys[HL_PCI_NUM_BARS];
+       void __iomem                    *pcie_bar[HL_PCI_NUM_BARS];
+       void __iomem                    *rmmio;
+       struct cdev                     cdev;
+       struct cdev                     cdev_ctrl;
+       struct device                   *dev;
+       struct device                   *dev_ctrl;
+       struct delayed_work             work_freq;
+       struct delayed_work             work_heartbeat;
+       char                            asic_name[16];
+       enum hl_asic_type               asic_type;
+       struct hl_cq                    *completion_queue;
+       struct workqueue_struct         **cq_wq;
+       struct workqueue_struct         *eq_wq;
+       struct hl_ctx                   *kernel_ctx;
+       struct hl_hw_queue              *kernel_queues;
+       struct list_head                hw_queues_mirror_list;
+       spinlock_t                      hw_queues_mirror_lock;
+       struct hl_cb_mgr                kernel_cb_mgr;
+       struct hl_eq                    event_queue;
+       struct dma_pool                 *dma_pool;
+       void                            *cpu_accessible_dma_mem;
+       dma_addr_t                      cpu_accessible_dma_address;
+       struct gen_pool                 *cpu_accessible_dma_pool;
+       unsigned long                   *asid_bitmap;
+       struct mutex                    asid_mutex;
+       struct mutex                    send_cpu_message_lock;
+       struct mutex                    debug_lock;
+       struct asic_fixed_properties    asic_prop;
+       const struct hl_asic_funcs      *asic_funcs;
+       void                            *asic_specific;
+       struct gen_pool                 *mmu_pgt_pool;
+       struct hl_vm                    vm;
+       struct mutex                    mmu_cache_lock;
+       void                            *mmu_shadow_hop0;
+       struct device                   *hwmon_dev;
+       enum hl_pm_mng_profile          pm_mng_profile;
+       struct hwmon_chip_info          *hl_chip_info;
+
+       struct hl_dbg_device_entry      hl_debugfs;
+
+       struct list_head                cb_pool;
+       spinlock_t                      cb_pool_lock;
+
+       struct list_head                fpriv_list;
+       struct mutex                    fpriv_list_lock;
+
+       struct hl_ctx                   *compute_ctx;
+
+       struct hl_device_idle_busy_ts   *idle_busy_ts_arr;
+
+       struct hl_cs_counters           aggregated_cs_counters;
+
+       atomic64_t                      dram_used_mem;
+       u64                             timeout_jiffies;
+       u64                             max_power;
+       atomic_t                        in_reset;
+       enum hl_pll_frequency           curr_pll_profile;
+       int                             cs_active_cnt;
+       u32                             major;
+       u32                             high_pll;
+       u32                             soft_reset_cnt;
+       u32                             hard_reset_cnt;
+       u32                             idle_busy_ts_idx;
+       u16                             id;
+       u16                             id_control;
+       u16                             cpu_pci_msb_addr;
+       u8                              disabled;
+       u8                              late_init_done;
+       u8                              hwmon_initialized;
+       u8                              hard_reset_pending;
+       u8                              heartbeat;
+       u8                              reset_on_lockup;
+       u8                              dram_supports_virtual_memory;
+       u8                              dram_default_page_mapping;
+       u8                              pmmu_huge_range;
+       u8                              init_done;
+       u8                              clock_gating;
+       u8                              device_cpu_disabled;
+       u8                              dma_mask;
+       u8                              in_debug;
+       u8                              power9_64bit_dma_enable;
+       u8                              cdev_sysfs_created;
+       u8                              stop_on_err;
+       u8                              supports_sync_stream;
+       u8                              sync_stream_queue_idx;
+       u8                              supports_coresight;
+       u8                              supports_soft_reset;
+
+       /* Parameters for bring-up */
+       u8                              mmu_enable;
+       u8                              mmu_huge_page_opt;
+       u8                              cpu_enable;
+       u8                              reset_pcilink;
+       u8                              cpu_queues_enable;
+       u8                              fw_loading;
+       u8                              pldm;
+       u8                              axi_drain;
+       u8                              sram_scrambler_enable;
+       u8                              dram_scrambler_enable;
+       u8                              hard_reset_on_fw_events;
+       u8                              bmc_enable;
+       u8                              rl_enable;
+};
+
+
+/*
+ * IOCTLs
+ */
+
+/**
+ * typedef hl_ioctl_t - typedef for ioctl function in the driver
+ * @hpriv: pointer to the FD's private data, which contains state of
+ *             user process
+ * @data: pointer to the input/output arguments structure of the IOCTL
+ *
+ * Return: 0 for success, negative value for error
+ */
+typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
+
+/**
+ * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
+ * @cmd: the IOCTL code as created by the kernel macros.
+ * @func: pointer to the driver's function that should be called for this IOCTL.
+ */
+struct hl_ioctl_desc {
+       unsigned int cmd;
+       hl_ioctl_t *func;
+};
+
+
+/*
+ * Kernel module functions that can be accessed by entire module
+ */
+
+/**
+ * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area is inside the valid range, false otherwise.
+ */
+static inline bool hl_mem_area_inside_range(u64 address, u32 size,
+                               u64 range_start_address, u64 range_end_address)
+{
+       u64 end_address = address + size;
+
+       if ((address >= range_start_address) &&
+                       (end_address <= range_end_address) &&
+                       (end_address > address))
+               return true;
+
+       return false;
+}
+
+/**
+ * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area overlaps part or all of the valid range,
+ *             false otherwise.
+ */
+static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
+                               u64 range_start_address, u64 range_end_address)
+{
+       u64 end_address = address + size;
+
+       if ((address >= range_start_address) &&
+                       (address < range_end_address))
+               return true;
+
+       if ((end_address >= range_start_address) &&
+                       (end_address < range_end_address))
+               return true;
+
+       if ((address < range_start_address) &&
+                       (end_address >= range_end_address))
+               return true;
+
+       return false;
+}
+
+int hl_device_open(struct inode *inode, struct file *filp);
+int hl_device_open_ctrl(struct inode *inode, struct file *filp);
+bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
+enum hl_device_status hl_device_status(struct hl_device *hdev);
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
+int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
+               enum hl_asic_type asic_type, int minor);
+void destroy_hdev(struct hl_device *hdev);
+int hl_hw_queues_create(struct hl_device *hdev);
+void hl_hw_queues_destroy(struct hl_device *hdev);
+int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+                               u32 cb_size, u64 cb_ptr);
+int hl_hw_queue_schedule_cs(struct hl_cs *cs);
+u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
+void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
+void hl_int_hw_queue_update_ci(struct hl_cs *cs);
+void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
+
+#define hl_queue_inc_ptr(p)            hl_hw_queue_add_ptr(p, 1)
+#define hl_pi_2_offset(pi)             ((pi) & (HL_QUEUE_LENGTH - 1))
+
+int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
+void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
+int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
+void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
+void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
+void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
+irqreturn_t hl_irq_handler_cq(int irq, void *arg);
+irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+u32 hl_cq_inc_ptr(u32 ptr);
+
+int hl_asid_init(struct hl_device *hdev);
+void hl_asid_fini(struct hl_device *hdev);
+unsigned long hl_asid_alloc(struct hl_device *hdev);
+void hl_asid_free(struct hl_device *hdev, unsigned long asid);
+
+int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
+void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
+int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
+void hl_ctx_do_release(struct kref *ref);
+void hl_ctx_get(struct hl_device *hdev,        struct hl_ctx *ctx);
+int hl_ctx_put(struct hl_ctx *ctx);
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
+void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
+
+int hl_device_init(struct hl_device *hdev, struct class *hclass);
+void hl_device_fini(struct hl_device *hdev);
+int hl_device_suspend(struct hl_device *hdev);
+int hl_device_resume(struct hl_device *hdev);
+int hl_device_reset(struct hl_device *hdev, bool hard_reset,
+                       bool from_hard_reset_thread);
+void hl_hpriv_get(struct hl_fpriv *hpriv);
+void hl_hpriv_put(struct hl_fpriv *hpriv);
+int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
+uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
+
+int hl_build_hwmon_channel_info(struct hl_device *hdev,
+               struct armcp_sensor *sensors_arr);
+
+int hl_sysfs_init(struct hl_device *hdev);
+void hl_sysfs_fini(struct hl_device *hdev);
+
+int hl_hwmon_init(struct hl_device *hdev);
+void hl_hwmon_fini(struct hl_device *hdev);
+
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
+               u64 *handle, int ctx_id);
+int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
+int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+struct hl_cb *hl_cb_get(struct hl_device *hdev,        struct hl_cb_mgr *mgr,
+                       u32 handle);
+void hl_cb_put(struct hl_cb *cb);
+void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
+void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
+int hl_cb_pool_init(struct hl_device *hdev);
+int hl_cb_pool_fini(struct hl_device *hdev);
+
+void hl_cs_rollback_all(struct hl_device *hdev);
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+               enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
+void hl_sob_reset_error(struct kref *ref);
+
+void goya_set_asic_funcs(struct hl_device *hdev);
+void gaudi_set_asic_funcs(struct hl_device *hdev);
+
+int hl_vm_ctx_init(struct hl_ctx *ctx);
+void hl_vm_ctx_fini(struct hl_ctx *ctx);
+
+int hl_vm_init(struct hl_device *hdev);
+void hl_vm_fini(struct hl_device *hdev);
+
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
+                       struct hl_userptr *userptr);
+void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_userptr_delete_list(struct hl_device *hdev,
+                               struct list_head *userptr_list);
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
+                               struct list_head *userptr_list,
+                               struct hl_userptr **userptr);
+
+int hl_mmu_init(struct hl_device *hdev);
+void hl_mmu_fini(struct hl_device *hdev);
+int hl_mmu_ctx_init(struct hl_ctx *ctx);
+void hl_mmu_ctx_fini(struct hl_ctx *ctx);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+               u32 page_size, bool flush_pte);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+               bool flush_pte);
+void hl_mmu_swap_out(struct hl_ctx *ctx);
+void hl_mmu_swap_in(struct hl_ctx *ctx);
+
+int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
+                               void __iomem *dst);
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
+int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+                               u16 len, u32 timeout, long *result);
+int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
+int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+               size_t irq_arr_size);
+int hl_fw_test_cpu_queue(struct hl_device *hdev);
+void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+                                               dma_addr_t *dma_handle);
+void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+                                       void *vaddr);
+int hl_fw_send_heartbeat(struct hl_device *hdev);
+int hl_fw_armcp_info_get(struct hl_device *hdev);
+int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+                       u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+                       u32 boot_err0_reg, bool skip_bmc,
+                       u32 cpu_timeout, u32 boot_fit_timeout);
+
+int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+                       bool is_wc[3]);
+int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
+int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
+                               u64 addr);
+int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+               struct hl_inbound_pci_region *pci_region);
+int hl_pci_set_outbound_region(struct hl_device *hdev,
+               struct hl_outbound_pci_region *pci_region);
+int hl_pci_init(struct hl_device *hdev);
+void hl_pci_fini(struct hl_device *hdev);
+
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
+int hl_get_temperature(struct hl_device *hdev,
+                      int sensor_index, u32 attr, long *value);
+int hl_set_temperature(struct hl_device *hdev,
+                      int sensor_index, u32 attr, long value);
+int hl_get_voltage(struct hl_device *hdev,
+                  int sensor_index, u32 attr, long *value);
+int hl_get_current(struct hl_device *hdev,
+                  int sensor_index, u32 attr, long *value);
+int hl_get_fan_speed(struct hl_device *hdev,
+                    int sensor_index, u32 attr, long *value);
+int hl_get_pwm_info(struct hl_device *hdev,
+                   int sensor_index, u32 attr, long *value);
+void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
+                       long value);
+u64 hl_get_max_power(struct hl_device *hdev);
+void hl_set_max_power(struct hl_device *hdev, u64 value);
+int hl_set_voltage(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long value);
+int hl_set_current(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long value);
+
+#ifdef CONFIG_DEBUG_FS
+
+void hl_debugfs_init(void);
+void hl_debugfs_fini(void);
+void hl_debugfs_add_device(struct hl_device *hdev);
+void hl_debugfs_remove_device(struct hl_device *hdev);
+void hl_debugfs_add_file(struct hl_fpriv *hpriv);
+void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
+void hl_debugfs_add_cb(struct hl_cb *cb);
+void hl_debugfs_remove_cb(struct hl_cb *cb);
+void hl_debugfs_add_cs(struct hl_cs *cs);
+void hl_debugfs_remove_cs(struct hl_cs *cs);
+void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
+void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
+void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_debugfs_remove_userptr(struct hl_device *hdev,
+                               struct hl_userptr *userptr);
+void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+
+#else
+
+static inline void __init hl_debugfs_init(void)
+{
+}
+
+static inline void hl_debugfs_fini(void)
+{
+}
+
+static inline void hl_debugfs_add_device(struct hl_device *hdev)
+{
+}
+
+static inline void hl_debugfs_remove_device(struct hl_device *hdev)
+{
+}
+
+static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
+{
+}
+
+static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
+{
+}
+
+static inline void hl_debugfs_add_cb(struct hl_cb *cb)
+{
+}
+
+static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
+{
+}
+
+static inline void hl_debugfs_add_cs(struct hl_cs *cs)
+{
+}
+
+static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
+{
+}
+
+static inline void hl_debugfs_add_job(struct hl_device *hdev,
+                                       struct hl_cs_job *job)
+{
+}
+
+static inline void hl_debugfs_remove_job(struct hl_device *hdev,
+                                       struct hl_cs_job *job)
+{
+}
+
+static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
+                                       struct hl_userptr *userptr)
+{
+}
+
+static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
+                                       struct hl_userptr *userptr)
+{
+}
+
+static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
+                                       struct hl_ctx *ctx)
+{
+}
+
+static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
+                                       struct hl_ctx *ctx)
+{
+}
+
+#endif
+
+/* IOCTLs */
+long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
+int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
+
+#endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
new file mode 100644 (file)
index 0000000..f38664b
--- /dev/null
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#define pr_fmt(fmt)            "habanalabs: " fmt
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+#include <linux/module.h>
+
+#define HL_DRIVER_AUTHOR       "HabanaLabs Kernel Driver Team"
+
+#define HL_DRIVER_DESC         "Driver for HabanaLabs's AI Accelerators"
+
+MODULE_AUTHOR(HL_DRIVER_AUTHOR);
+MODULE_DESCRIPTION(HL_DRIVER_DESC);
+MODULE_LICENSE("GPL v2");
+
+static int hl_major;
+static struct class *hl_class;
+static DEFINE_IDR(hl_devs_idr);
+static DEFINE_MUTEX(hl_devs_idr_lock);
+
+static int timeout_locked = 5;
+static int reset_on_lockup = 1;
+
+module_param(timeout_locked, int, 0444);
+MODULE_PARM_DESC(timeout_locked,
+       "Device lockup timeout in seconds (0 = disabled, default 5s)");
+
+module_param(reset_on_lockup, int, 0444);
+MODULE_PARM_DESC(reset_on_lockup,
+       "Do device reset on lockup (0 = no, 1 = yes, default yes)");
+
+#define PCI_VENDOR_ID_HABANALABS       0x1da3
+
+#define PCI_IDS_GOYA                   0x0001
+#define PCI_IDS_GAUDI                  0x1000
+
+static const struct pci_device_id ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
+       { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ids);
+
+/*
+ * get_asic_type - translate device id to asic type
+ *
+ * @device: id of the PCI device
+ *
+ * Translate device id to asic type.
+ * In case of unidentified device, return -1
+ */
+static enum hl_asic_type get_asic_type(u16 device)
+{
+       enum hl_asic_type asic_type;
+
+       switch (device) {
+       case PCI_IDS_GOYA:
+               asic_type = ASIC_GOYA;
+               break;
+       case PCI_IDS_GAUDI:
+               asic_type = ASIC_GAUDI;
+               break;
+       default:
+               asic_type = ASIC_INVALID;
+               break;
+       }
+
+       return asic_type;
+}
+
+/*
+ * hl_device_open - open function for habanalabs device
+ *
+ * @inode: pointer to inode structure
+ * @filp: pointer to file structure
+ *
+ * Called when process opens an habanalabs device.
+ */
+int hl_device_open(struct inode *inode, struct file *filp)
+{
+       struct hl_device *hdev;
+       struct hl_fpriv *hpriv;
+       int rc;
+
+       mutex_lock(&hl_devs_idr_lock);
+       hdev = idr_find(&hl_devs_idr, iminor(inode));
+       mutex_unlock(&hl_devs_idr_lock);
+
+       if (!hdev) {
+               pr_err("Couldn't find device %d:%d\n",
+                       imajor(inode), iminor(inode));
+               return -ENXIO;
+       }
+
+       hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
+       if (!hpriv)
+               return -ENOMEM;
+
+       hpriv->hdev = hdev;
+       filp->private_data = hpriv;
+       hpriv->filp = filp;
+       mutex_init(&hpriv->restore_phase_mutex);
+       kref_init(&hpriv->refcount);
+       nonseekable_open(inode, filp);
+
+       hl_cb_mgr_init(&hpriv->cb_mgr);
+       hl_ctx_mgr_init(&hpriv->ctx_mgr);
+
+       hpriv->taskpid = find_get_pid(current->pid);
+
+       mutex_lock(&hdev->fpriv_list_lock);
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_err_ratelimited(hdev->dev,
+                       "Can't open %s because it is disabled or in reset\n",
+                       dev_name(hdev->dev));
+               rc = -EPERM;
+               goto out_err;
+       }
+
+       if (hdev->in_debug) {
+               dev_err_ratelimited(hdev->dev,
+                       "Can't open %s because it is being debugged by another user\n",
+                       dev_name(hdev->dev));
+               rc = -EPERM;
+               goto out_err;
+       }
+
+       if (hdev->compute_ctx) {
+               dev_dbg_ratelimited(hdev->dev,
+                       "Can't open %s because another user is working on it\n",
+                       dev_name(hdev->dev));
+               rc = -EBUSY;
+               goto out_err;
+       }
+
+       rc = hl_ctx_create(hdev, hpriv);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to create context %d\n", rc);
+               goto out_err;
+       }
+
+       /* Device is IDLE at this point so it is legal to change PLLs.
+        * There is no need to check anything because if the PLL is
+        * already HIGH, the set function will return without doing
+        * anything
+        */
+       hl_device_set_frequency(hdev, PLL_HIGH);
+
+       list_add(&hpriv->dev_node, &hdev->fpriv_list);
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       hl_debugfs_add_file(hpriv);
+
+       return 0;
+
+out_err:
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+       hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+       filp->private_data = NULL;
+       mutex_destroy(&hpriv->restore_phase_mutex);
+       put_pid(hpriv->taskpid);
+
+       kfree(hpriv);
+
+       return rc;
+}
+
+int hl_device_open_ctrl(struct inode *inode, struct file *filp)
+{
+       struct hl_device *hdev;
+       struct hl_fpriv *hpriv;
+       int rc;
+
+       mutex_lock(&hl_devs_idr_lock);
+       hdev = idr_find(&hl_devs_idr, iminor(inode));
+       mutex_unlock(&hl_devs_idr_lock);
+
+       if (!hdev) {
+               pr_err("Couldn't find device %d:%d\n",
+                       imajor(inode), iminor(inode));
+               return -ENXIO;
+       }
+
+       hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
+       if (!hpriv)
+               return -ENOMEM;
+
+       mutex_lock(&hdev->fpriv_list_lock);
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_err_ratelimited(hdev->dev_ctrl,
+                       "Can't open %s because it is disabled or in reset\n",
+                       dev_name(hdev->dev_ctrl));
+               rc = -EPERM;
+               goto out_err;
+       }
+
+       list_add(&hpriv->dev_node, &hdev->fpriv_list);
+       mutex_unlock(&hdev->fpriv_list_lock);
+
+       hpriv->hdev = hdev;
+       filp->private_data = hpriv;
+       hpriv->filp = filp;
+       hpriv->is_control = true;
+       nonseekable_open(inode, filp);
+
+       hpriv->taskpid = find_get_pid(current->pid);
+
+       return 0;
+
+out_err:
+       mutex_unlock(&hdev->fpriv_list_lock);
+       kfree(hpriv);
+       return rc;
+}
+
+static void set_driver_behavior_per_device(struct hl_device *hdev)
+{
+       hdev->mmu_enable = 1;
+       hdev->cpu_enable = 1;
+       hdev->fw_loading = 1;
+       hdev->cpu_queues_enable = 1;
+       hdev->heartbeat = 1;
+       hdev->clock_gating = 1;
+
+       hdev->reset_pcilink = 0;
+       hdev->axi_drain = 0;
+       hdev->sram_scrambler_enable = 1;
+       hdev->dram_scrambler_enable = 1;
+       hdev->bmc_enable = 1;
+       hdev->hard_reset_on_fw_events = 1;
+}
+
+/*
+ * create_hdev - create habanalabs device instance
+ *
+ * @dev: will hold the pointer to the new habanalabs device structure
+ * @pdev: pointer to the pci device
+ * @asic_type: in case of simulator device, which device is it
+ * @minor: in case of simulator device, the minor of the device
+ *
+ * Allocate memory for habanalabs device and initialize basic fields
+ * Identify the ASIC type
+ * Allocate ID (minor) for the device (only for real devices)
+ */
+int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
+               enum hl_asic_type asic_type, int minor)
+{
+       struct hl_device *hdev;
+       int rc, main_id, ctrl_id = 0;
+
+       *dev = NULL;
+
+       hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+       if (!hdev)
+               return -ENOMEM;
+
+       /* First, we must find out which ASIC are we handling. This is needed
+        * to configure the behavior of the driver (kernel parameters)
+        */
+       if (pdev) {
+               hdev->asic_type = get_asic_type(pdev->device);
+               if (hdev->asic_type == ASIC_INVALID) {
+                       dev_err(&pdev->dev, "Unsupported ASIC\n");
+                       rc = -ENODEV;
+                       goto free_hdev;
+               }
+       } else {
+               hdev->asic_type = asic_type;
+       }
+
+       hdev->major = hl_major;
+       hdev->reset_on_lockup = reset_on_lockup;
+       hdev->pldm = 0;
+
+       set_driver_behavior_per_device(hdev);
+
+       if (timeout_locked)
+               hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
+       else
+               hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
+       hdev->disabled = true;
+       hdev->pdev = pdev; /* can be NULL in case of simulator device */
+
+       /* Set default DMA mask to 32 bits */
+       hdev->dma_mask = 32;
+
+       mutex_lock(&hl_devs_idr_lock);
+
+       /* Always save 2 numbers, 1 for main device and 1 for control.
+        * They must be consecutive
+        */
+       main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
+                               GFP_KERNEL);
+
+       if (main_id >= 0)
+               ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
+                                       main_id + 2, GFP_KERNEL);
+
+       mutex_unlock(&hl_devs_idr_lock);
+
+       if ((main_id < 0) || (ctrl_id < 0)) {
+               if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
+                       pr_err("too many devices in the system\n");
+
+               if (main_id >= 0) {
+                       mutex_lock(&hl_devs_idr_lock);
+                       idr_remove(&hl_devs_idr, main_id);
+                       mutex_unlock(&hl_devs_idr_lock);
+               }
+
+               rc = -EBUSY;
+               goto free_hdev;
+       }
+
+       hdev->id = main_id;
+       hdev->id_control = ctrl_id;
+
+       *dev = hdev;
+
+       return 0;
+
+free_hdev:
+       kfree(hdev);
+       return rc;
+}
+
+/*
+ * destroy_hdev - destroy habanalabs device instance
+ *
+ * @dev: pointer to the habanalabs device structure
+ *
+ */
+void destroy_hdev(struct hl_device *hdev)
+{
+       /* Remove device from the device list */
+       mutex_lock(&hl_devs_idr_lock);
+       idr_remove(&hl_devs_idr, hdev->id);
+       idr_remove(&hl_devs_idr, hdev->id_control);
+       mutex_unlock(&hl_devs_idr_lock);
+
+       kfree(hdev);
+}
+
+static int hl_pmops_suspend(struct device *dev)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       pr_debug("Going to suspend PCI device\n");
+
+       if (!hdev) {
+               pr_err("device pointer is NULL in suspend\n");
+               return 0;
+       }
+
+       return hl_device_suspend(hdev);
+}
+
+static int hl_pmops_resume(struct device *dev)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       pr_debug("Going to resume PCI device\n");
+
+       if (!hdev) {
+               pr_err("device pointer is NULL in resume\n");
+               return 0;
+       }
+
+       return hl_device_resume(hdev);
+}
+
+/*
+ * hl_pci_probe - probe PCI habanalabs devices
+ *
+ * @pdev: pointer to pci device
+ * @id: pointer to pci device id structure
+ *
+ * Standard PCI probe function for habanalabs device.
+ * Create a new habanalabs device and initialize it according to the
+ * device's type
+ */
+static int hl_pci_probe(struct pci_dev *pdev,
+                               const struct pci_device_id *id)
+{
+       struct hl_device *hdev;
+       int rc;
+
+       dev_info(&pdev->dev, HL_NAME
+                " device found [%04x:%04x] (rev %x)\n",
+                (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
+
+       rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
+       if (rc)
+               return rc;
+
+       pci_set_drvdata(pdev, hdev);
+
+       rc = hl_device_init(hdev, hl_class);
+       if (rc) {
+               dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
+               rc = -ENODEV;
+               goto disable_device;
+       }
+
+       return 0;
+
+disable_device:
+       pci_set_drvdata(pdev, NULL);
+       destroy_hdev(hdev);
+
+       return rc;
+}
+
+/*
+ * hl_pci_remove - remove PCI habanalabs devices
+ *
+ * @pdev: pointer to pci device
+ *
+ * Standard PCI remove function for habanalabs device
+ */
+static void hl_pci_remove(struct pci_dev *pdev)
+{
+       struct hl_device *hdev;
+
+       hdev = pci_get_drvdata(pdev);
+       if (!hdev)
+               return;
+
+       hl_device_fini(hdev);
+       pci_set_drvdata(pdev, NULL);
+
+       destroy_hdev(hdev);
+}
+
+static const struct dev_pm_ops hl_pm_ops = {
+       .suspend = hl_pmops_suspend,
+       .resume = hl_pmops_resume,
+};
+
+static struct pci_driver hl_pci_driver = {
+       .name = HL_NAME,
+       .id_table = ids,
+       .probe = hl_pci_probe,
+       .remove = hl_pci_remove,
+       .driver.pm = &hl_pm_ops,
+};
+
+/*
+ * hl_init - Initialize the habanalabs kernel driver
+ */
+static int __init hl_init(void)
+{
+       int rc;
+       dev_t dev;
+
+       pr_info("loading driver\n");
+
+       rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
+       if (rc < 0) {
+               pr_err("unable to get major\n");
+               return rc;
+       }
+
+       hl_major = MAJOR(dev);
+
+       hl_class = class_create(THIS_MODULE, HL_NAME);
+       if (IS_ERR(hl_class)) {
+               pr_err("failed to allocate class\n");
+               rc = PTR_ERR(hl_class);
+               goto remove_major;
+       }
+
+       hl_debugfs_init();
+
+       rc = pci_register_driver(&hl_pci_driver);
+       if (rc) {
+               pr_err("failed to register pci device\n");
+               goto remove_debugfs;
+       }
+
+       pr_debug("driver loaded\n");
+
+       return 0;
+
+remove_debugfs:
+       hl_debugfs_fini();
+       class_destroy(hl_class);
+remove_major:
+       unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
+       return rc;
+}
+
+/*
+ * hl_exit - Release all resources of the habanalabs kernel driver
+ */
+static void __exit hl_exit(void)
+{
+       pci_unregister_driver(&hl_pci_driver);
+
+       /*
+        * Removing debugfs must be after all devices or simulator devices
+        * have been removed because otherwise we get a bug in the
+        * debugfs module for referencing NULL objects
+        */
+       hl_debugfs_fini();
+
+       class_destroy(hl_class);
+       unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
+
+       idr_destroy(&hl_devs_idr);
+
+       pr_debug("driver removed\n");
+}
+
+module_init(hl_init);
+module_exit(hl_exit);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
new file mode 100644 (file)
index 0000000..5af1c03
--- /dev/null
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
+       [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
+       [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
+       [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm),
+       [HL_DEBUG_OP_FUNNEL] = 0,
+       [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon),
+       [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu),
+       [HL_DEBUG_OP_TIMESTAMP] = 0
+
+};
+
+static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_device_status dev_stat = {0};
+       u32 size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!size) || (!out))
+               return -EINVAL;
+
+       dev_stat.status = hl_device_status(hdev);
+
+       return copy_to_user(out, &dev_stat,
+                       min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0;
+}
+
+static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_hw_ip_info hw_ip = {0};
+       u32 size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 sram_kmd_size, dram_kmd_size;
+
+       if ((!size) || (!out))
+               return -EINVAL;
+
+       sram_kmd_size = (prop->sram_user_base_address -
+                               prop->sram_base_address);
+       dram_kmd_size = (prop->dram_user_base_address -
+                               prop->dram_base_address);
+
+       hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
+       hw_ip.sram_base_address = prop->sram_user_base_address;
+       hw_ip.dram_base_address = prop->dram_user_base_address;
+       hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
+       hw_ip.sram_size = prop->sram_size - sram_kmd_size;
+       hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+       if (hw_ip.dram_size > PAGE_SIZE)
+               hw_ip.dram_enabled = 1;
+       hw_ip.num_of_events = prop->num_of_events;
+
+       memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
+               min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
+
+       memcpy(hw_ip.card_name, prop->armcp_info.card_name,
+               min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
+
+       hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
+       hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
+
+       hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
+       hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
+       hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
+       hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
+
+       return copy_to_user(out, &hw_ip,
+               min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
+}
+
+static int hw_events_info(struct hl_device *hdev, bool aggregate,
+                       struct hl_info_args *args)
+{
+       u32 size, max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+       void *arr;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
+
+       return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
+}
+
+static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_info_dram_usage dram_usage = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 dram_kmd_size;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       dram_kmd_size = (prop->dram_user_base_address -
+                               prop->dram_base_address);
+       dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) -
+                                       atomic64_read(&hdev->dram_used_mem);
+       if (hpriv->ctx)
+               dram_usage.ctx_dram_mem =
+                       atomic64_read(&hpriv->ctx->dram_phys_mem);
+
+       return copy_to_user(out, &dram_usage,
+               min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0;
+}
+
+static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_hw_idle hw_idle = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
+                                       &hw_idle.busy_engines_mask, NULL);
+
+       return copy_to_user(out, &hw_idle,
+               min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
+}
+
+static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
+{
+       struct hl_debug_params *params;
+       void *input = NULL, *output = NULL;
+       int rc;
+
+       params = kzalloc(sizeof(*params), GFP_KERNEL);
+       if (!params)
+               return -ENOMEM;
+
+       params->reg_idx = args->reg_idx;
+       params->enable = args->enable;
+       params->op = args->op;
+
+       if (args->input_ptr && args->input_size) {
+               input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL);
+               if (!input) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               if (copy_from_user(input, u64_to_user_ptr(args->input_ptr),
+                                       args->input_size)) {
+                       rc = -EFAULT;
+                       dev_err(hdev->dev, "failed to copy input debug data\n");
+                       goto out;
+               }
+
+               params->input = input;
+       }
+
+       if (args->output_ptr && args->output_size) {
+               output = kzalloc(args->output_size, GFP_KERNEL);
+               if (!output) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               params->output = output;
+               params->output_size = args->output_size;
+       }
+
+       rc = hdev->asic_funcs->debug_coresight(hdev, params);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "debug coresight operation failed %d\n", rc);
+               goto out;
+       }
+
+       if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr,
+                                       output, args->output_size)) {
+               dev_err(hdev->dev, "copy to user failed in debug ioctl\n");
+               rc = -EFAULT;
+               goto out;
+       }
+
+
+out:
+       kfree(params);
+       kfree(output);
+       kfree(input);
+
+       return rc;
+}
+
+static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_device_utilization device_util = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       if ((args->period_ms < 100) || (args->period_ms > 1000) ||
+               (args->period_ms % 100)) {
+               dev_err(hdev->dev,
+                       "period %u must be between 100 - 1000 and must be divisible by 100\n",
+                       args->period_ms);
+               return -EINVAL;
+       }
+
+       device_util.utilization = hl_device_utilization(hdev, args->period_ms);
+
+       return copy_to_user(out, &device_util,
+               min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
+}
+
+static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_clk_rate clk_rate = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+       int rc;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
+                                               &clk_rate.max_clk_rate_mhz);
+       if (rc)
+               return rc;
+
+       return copy_to_user(out, &clk_rate,
+               min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
+}
+
+static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_reset_count reset_count = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
+       reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
+
+       return copy_to_user(out, &reset_count,
+               min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
+}
+
+static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+       struct hl_info_time_sync time_sync = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
+       time_sync.host_time = ktime_get_raw_ns();
+
+       return copy_to_user(out, &time_sync,
+               min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
+}
+
+static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_info_cs_counters cs_counters = {0};
+       u32 max_size = args->return_size;
+       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+       if ((!max_size) || (!out))
+               return -EINVAL;
+
+       memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
+                       sizeof(struct hl_cs_counters));
+
+       if (hpriv->ctx)
+               memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
+                               sizeof(struct hl_cs_counters));
+
+       return copy_to_user(out, &cs_counters,
+               min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
+}
+
+static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
+                               struct device *dev)
+{
+       struct hl_info_args *args = data;
+       struct hl_device *hdev = hpriv->hdev;
+       int rc;
+
+       /*
+        * Information is returned for the following opcodes even if the device
+        * is disabled or in reset.
+        */
+       switch (args->op) {
+       case HL_INFO_HW_IP_INFO:
+               return hw_ip_info(hdev, args);
+
+       case HL_INFO_DEVICE_STATUS:
+               return device_status_info(hdev, args);
+
+       case HL_INFO_RESET_COUNT:
+               return get_reset_count(hdev, args);
+
+       default:
+               break;
+       }
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_warn_ratelimited(dev,
+                       "Device is %s. Can't execute INFO IOCTL\n",
+                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+               return -EBUSY;
+       }
+
+       switch (args->op) {
+       case HL_INFO_HW_EVENTS:
+               rc = hw_events_info(hdev, false, args);
+               break;
+
+       case HL_INFO_DRAM_USAGE:
+               rc = dram_usage_info(hpriv, args);
+               break;
+
+       case HL_INFO_HW_IDLE:
+               rc = hw_idle(hdev, args);
+               break;
+
+       case HL_INFO_DEVICE_UTILIZATION:
+               rc = device_utilization(hdev, args);
+               break;
+
+       case HL_INFO_HW_EVENTS_AGGREGATE:
+               rc = hw_events_info(hdev, true, args);
+               break;
+
+       case HL_INFO_CLK_RATE:
+               rc = get_clk_rate(hdev, args);
+               break;
+
+       case HL_INFO_TIME_SYNC:
+               return time_sync_info(hdev, args);
+
+       case HL_INFO_CS_COUNTERS:
+               return cs_counters_info(hpriv, args);
+
+       default:
+               dev_err(dev, "Invalid request %d\n", args->op);
+               rc = -ENOTTY;
+               break;
+       }
+
+       return rc;
+}
+
+static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
+}
+
+static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
+{
+       return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
+}
+
+static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       struct hl_debug_args *args = data;
+       struct hl_device *hdev = hpriv->hdev;
+       int rc = 0;
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_warn_ratelimited(hdev->dev,
+                       "Device is %s. Can't execute DEBUG IOCTL\n",
+                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+               return -EBUSY;
+       }
+
+       switch (args->op) {
+       case HL_DEBUG_OP_ETR:
+       case HL_DEBUG_OP_ETF:
+       case HL_DEBUG_OP_STM:
+       case HL_DEBUG_OP_FUNNEL:
+       case HL_DEBUG_OP_BMON:
+       case HL_DEBUG_OP_SPMU:
+       case HL_DEBUG_OP_TIMESTAMP:
+               if (!hdev->in_debug) {
+                       dev_err_ratelimited(hdev->dev,
+                               "Rejecting debug configuration request because device not in debug mode\n");
+                       return -EFAULT;
+               }
+               args->input_size =
+                       min(args->input_size, hl_debug_struct_size[args->op]);
+               rc = debug_coresight(hdev, args);
+               break;
+       case HL_DEBUG_OP_SET_MODE:
+               rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
+               break;
+       default:
+               dev_err(hdev->dev, "Invalid request %d\n", args->op);
+               rc = -ENOTTY;
+               break;
+       }
+
+       return rc;
+}
+
+#define HL_IOCTL_DEF(ioctl, _func) \
+       [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
+
+static const struct hl_ioctl_desc hl_ioctls[] = {
+       HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
+       HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
+       HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
+       HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
+       HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
+       HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
+};
+
+static const struct hl_ioctl_desc hl_ioctls_control[] = {
+       HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
+};
+
+static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
+               const struct hl_ioctl_desc *ioctl, struct device *dev)
+{
+       struct hl_fpriv *hpriv = filep->private_data;
+       struct hl_device *hdev = hpriv->hdev;
+       unsigned int nr = _IOC_NR(cmd);
+       char stack_kdata[128] = {0};
+       char *kdata = NULL;
+       unsigned int usize, asize;
+       hl_ioctl_t *func;
+       u32 hl_size;
+       int retcode;
+
+       if (hdev->hard_reset_pending) {
+               dev_crit_ratelimited(hdev->dev_ctrl,
+                       "Device HARD reset pending! Please close FD\n");
+               return -ENODEV;
+       }
+
+       /* Do not trust userspace, use our own definition */
+       func = ioctl->func;
+
+       if (unlikely(!func)) {
+               dev_dbg(dev, "no function\n");
+               retcode = -ENOTTY;
+               goto out_err;
+       }
+
+       hl_size = _IOC_SIZE(ioctl->cmd);
+       usize = asize = _IOC_SIZE(cmd);
+       if (hl_size > asize)
+               asize = hl_size;
+
+       cmd = ioctl->cmd;
+
+       if (cmd & (IOC_IN | IOC_OUT)) {
+               if (asize <= sizeof(stack_kdata)) {
+                       kdata = stack_kdata;
+               } else {
+                       kdata = kzalloc(asize, GFP_KERNEL);
+                       if (!kdata) {
+                               retcode = -ENOMEM;
+                               goto out_err;
+                       }
+               }
+       }
+
+       if (cmd & IOC_IN) {
+               if (copy_from_user(kdata, (void __user *)arg, usize)) {
+                       retcode = -EFAULT;
+                       goto out_err;
+               }
+       } else if (cmd & IOC_OUT) {
+               memset(kdata, 0, usize);
+       }
+
+       retcode = func(hpriv, kdata);
+
+       if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize))
+               retcode = -EFAULT;
+
+out_err:
+       if (retcode)
+               dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
+                         task_pid_nr(current), cmd, nr);
+
+       if (kdata != stack_kdata)
+               kfree(kdata);
+
+       return retcode;
+}
+
+long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+       struct hl_fpriv *hpriv = filep->private_data;
+       struct hl_device *hdev = hpriv->hdev;
+       const struct hl_ioctl_desc *ioctl = NULL;
+       unsigned int nr = _IOC_NR(cmd);
+
+       if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
+               ioctl = &hl_ioctls[nr];
+       } else {
+               dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
+                       task_pid_nr(current), nr);
+               return -ENOTTY;
+       }
+
+       return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
+}
+
+long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+       struct hl_fpriv *hpriv = filep->private_data;
+       struct hl_device *hdev = hpriv->hdev;
+       const struct hl_ioctl_desc *ioctl = NULL;
+       unsigned int nr = _IOC_NR(cmd);
+
+       if (nr == _IOC_NR(HL_IOCTL_INFO)) {
+               ioctl = &hl_ioctls_control[nr];
+       } else {
+               dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
+                       task_pid_nr(current), nr);
+               return -ENOTTY;
+       }
+
+       return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
+}
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
new file mode 100644 (file)
index 0000000..2876816
--- /dev/null
@@ -0,0 +1,918 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+/*
+ * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
+ *
+ * @ptr: the current pi/ci value
+ * @val: the amount to add
+ *
+ * Add val to ptr. It can go until twice the queue length.
+ */
+inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
+{
+       ptr += val;
+       ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
+       return ptr;
+}
+static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
+{
+       return atomic_read(ci) & ((queue_len << 1) - 1);
+}
+
+static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
+{
+       int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
+
+       if (delta >= 0)
+               return (queue_len - delta);
+       else
+               return (abs(delta) - queue_len);
+}
+
+void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+{
+       struct hl_device *hdev = cs->ctx->hdev;
+       struct hl_hw_queue *q;
+       int i;
+
+       if (hdev->disabled)
+               return;
+
+       q = &hdev->kernel_queues[0];
+       for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
+               if (q->queue_type == QUEUE_TYPE_INT)
+                       atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
+       }
+}
+
+/*
+ * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
+ *                                H/W queue.
+ * @hdev: pointer to habanalabs device structure
+ * @q: pointer to habanalabs queue structure
+ * @ctl: BD's control word
+ * @len: BD's length
+ * @ptr: BD's pointer
+ *
+ * This function assumes there is enough space on the queue to submit a new
+ * BD to it. It initializes the next BD and calls the device specific
+ * function to set the pi (and doorbell)
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
+                       struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
+{
+       struct hl_bd *bd;
+
+       bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
+       bd += hl_pi_2_offset(q->pi);
+       bd->ctl = cpu_to_le32(ctl);
+       bd->len = cpu_to_le32(len);
+       bd->ptr = cpu_to_le64(ptr);
+
+       q->pi = hl_queue_inc_ptr(q->pi);
+       hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * ext_queue_sanity_checks - perform some sanity checks on external queue
+ *
+ * @hdev              : pointer to hl_device structure
+ * @q                 :        pointer to hl_hw_queue structure
+ * @num_of_entries    : how many entries to check for space
+ * @reserve_cq_entry  :        whether to reserve an entry in the cq
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ * - Make sure we have enough space in the completion queue
+ * - Reserve space in the completion queue (needs to be reversed if there
+ *   is a failure down the road before the actual submission of work). Only
+ *   do this action if reserve_cq_entry is true
+ *
+ */
+static int ext_queue_sanity_checks(struct hl_device *hdev,
+                               struct hl_hw_queue *q, int num_of_entries,
+                               bool reserve_cq_entry)
+{
+       atomic_t *free_slots =
+                       &hdev->completion_queue[q->cq_id].free_slots_cnt;
+       int free_slots_cnt;
+
+       /* Check we have enough space in the queue */
+       free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
+
+       if (free_slots_cnt < num_of_entries) {
+               dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+                       q->hw_queue_id, num_of_entries);
+               return -EAGAIN;
+       }
+
+       if (reserve_cq_entry) {
+               /*
+                * Check we have enough space in the completion queue
+                * Add -1 to counter (decrement) unless counter was already 0
+                * In that case, CQ is full so we can't submit a new CB because
+                * we won't get ack on its completion
+                * atomic_add_unless will return 0 if counter was already 0
+                */
+               if (atomic_add_negative(num_of_entries * -1, free_slots)) {
+                       dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
+                               num_of_entries, q->hw_queue_id);
+                       atomic_add(num_of_entries, free_slots);
+                       return -EAGAIN;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * int_queue_sanity_checks - perform some sanity checks on internal queue
+ *
+ * @hdev              : pointer to hl_device structure
+ * @q                 :        pointer to hl_hw_queue structure
+ * @num_of_entries    : how many entries to check for space
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ *
+ */
+static int int_queue_sanity_checks(struct hl_device *hdev,
+                                       struct hl_hw_queue *q,
+                                       int num_of_entries)
+{
+       int free_slots_cnt;
+
+       if (num_of_entries > q->int_queue_len) {
+               dev_err(hdev->dev,
+                       "Cannot populate queue %u with %u jobs\n",
+                       q->hw_queue_id, num_of_entries);
+               return -ENOMEM;
+       }
+
+       /* Check we have enough space in the queue */
+       free_slots_cnt = queue_free_slots(q, q->int_queue_len);
+
+       if (free_slots_cnt < num_of_entries) {
+               dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+                       q->hw_queue_id, num_of_entries);
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+
+/*
+ * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
+ * @hdev: Pointer to hl_device structure.
+ * @q: Pointer to hl_hw_queue structure.
+ * @num_of_entries: How many entries to check for space.
+ *
+ * Notice: We do not reserve queue entries so this function mustn't be called
+ *         more than once per CS for the same queue
+ *
+ */
+static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
+                                       int num_of_entries)
+{
+       int free_slots_cnt;
+
+       /* Check we have enough space in the queue */
+       free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
+
+       if (free_slots_cnt < num_of_entries) {
+               dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+                       q->hw_queue_id, num_of_entries);
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+
+/*
+ * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
+ *
+ * @hdev: pointer to hl_device structure
+ * @hw_queue_id: Queue's type
+ * @cb_size: size of CB
+ * @cb_ptr: pointer to CB location
+ *
+ * This function sends a single CB, that must NOT generate a completion entry
+ *
+ */
+int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+                               u32 cb_size, u64 cb_ptr)
+{
+       struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
+       int rc = 0;
+
+       /*
+        * The CPU queue is a synchronous queue with an effective depth of
+        * a single entry (although it is allocated with room for multiple
+        * entries). Therefore, there is a different lock, called
+        * send_cpu_message_lock, that serializes accesses to the CPU queue.
+        * As a result, we don't need to lock the access to the entire H/W
+        * queues module when submitting a JOB to the CPU queue
+        */
+       if (q->queue_type != QUEUE_TYPE_CPU)
+               hdev->asic_funcs->hw_queues_lock(hdev);
+
+       if (hdev->disabled) {
+               rc = -EPERM;
+               goto out;
+       }
+
+       /*
+        * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
+        * type only on init phase, when the queues are empty and being tested,
+        * so there is no need for sanity checks.
+        */
+       if (q->queue_type != QUEUE_TYPE_HW) {
+               rc = ext_queue_sanity_checks(hdev, q, 1, false);
+               if (rc)
+                       goto out;
+       }
+
+       ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
+
+out:
+       if (q->queue_type != QUEUE_TYPE_CPU)
+               hdev->asic_funcs->hw_queues_unlock(hdev);
+
+       return rc;
+}
+
+/*
+ * ext_queue_schedule_job - submit a JOB to an external queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_queue_schedule_job(struct hl_cs_job *job)
+{
+       struct hl_device *hdev = job->cs->ctx->hdev;
+       struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+       struct hl_cq_entry cq_pkt;
+       struct hl_cq *cq;
+       u64 cq_addr;
+       struct hl_cb *cb;
+       u32 ctl;
+       u32 len;
+       u64 ptr;
+
+       /*
+        * Update the JOB ID inside the BD CTL so the device would know what
+        * to write in the completion queue
+        */
+       ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
+
+       cb = job->patched_cb;
+       len = job->job_cb_size;
+       ptr = cb->bus_address;
+
+       cq_pkt.data = cpu_to_le32(
+                               ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
+                                       & CQ_ENTRY_SHADOW_INDEX_MASK) |
+                               (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
+                               (1 << CQ_ENTRY_READY_SHIFT));
+
+       /*
+        * No need to protect pi_offset because scheduling to the
+        * H/W queues is done under the scheduler mutex
+        *
+        * No need to check if CQ is full because it was already
+        * checked in ext_queue_sanity_checks
+        */
+       cq = &hdev->completion_queue[q->cq_id];
+       cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
+
+       hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
+                                               cq_addr,
+                                               le32_to_cpu(cq_pkt.data),
+                                               q->msi_vec,
+                                               job->contains_dma_pkt);
+
+       q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
+
+       cq->pi = hl_cq_inc_ptr(cq->pi);
+
+       ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
+ * int_queue_schedule_job - submit a JOB to an internal queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void int_queue_schedule_job(struct hl_cs_job *job)
+{
+       struct hl_device *hdev = job->cs->ctx->hdev;
+       struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+       struct hl_bd bd;
+       __le64 *pi;
+
+       bd.ctl = 0;
+       bd.len = cpu_to_le32(job->job_cb_size);
+       bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
+
+       pi = (__le64 *) (uintptr_t) (q->kernel_address +
+               ((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
+
+       q->pi++;
+       q->pi &= ((q->int_queue_len << 1) - 1);
+
+       hdev->asic_funcs->pqe_write(hdev, pi, &bd);
+
+       hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * hw_queue_schedule_job - submit a JOB to a H/W queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void hw_queue_schedule_job(struct hl_cs_job *job)
+{
+       struct hl_device *hdev = job->cs->ctx->hdev;
+       struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+       u64 ptr;
+       u32 offset, ctl, len;
+
+       /*
+        * Upon PQE completion, COMP_DATA is used as the write data to the
+        * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
+        * write address offset in the SM block (QMAN LBW message).
+        * The write address offset is calculated as "COMP_OFFSET << 2".
+        */
+       offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
+       ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
+               ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
+
+       len = job->job_cb_size;
+
+       /*
+        * A patched CB is created only if a user CB was allocated by driver and
+        * MMU is disabled. If MMU is enabled, the user CB should be used
+        * instead. If the user CB wasn't allocated by driver, assume that it
+        * holds an address.
+        */
+       if (job->patched_cb)
+               ptr = job->patched_cb->bus_address;
+       else if (job->is_kernel_allocated_cb)
+               ptr = job->user_cb->bus_address;
+       else
+               ptr = (u64) (uintptr_t) job->user_cb;
+
+       ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
+ * init_signal_wait_cs - initialize a signal/wait CS
+ * @cs: pointer to the signal/wait CS
+ *
+ * H/W queues spinlock should be taken before calling this function
+ */
+static void init_signal_wait_cs(struct hl_cs *cs)
+{
+       struct hl_ctx *ctx = cs->ctx;
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_hw_queue *hw_queue;
+       struct hl_cs_compl *cs_cmpl =
+                       container_of(cs->fence, struct hl_cs_compl, base_fence);
+
+       struct hl_hw_sob *hw_sob;
+       struct hl_cs_job *job;
+       u32 q_idx;
+
+       /* There is only one job in a signal/wait CS */
+       job = list_first_entry(&cs->job_list, struct hl_cs_job,
+                               cs_node);
+       q_idx = job->hw_queue_id;
+       hw_queue = &hdev->kernel_queues[q_idx];
+
+       if (cs->type & CS_TYPE_SIGNAL) {
+               hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset];
+
+               cs_cmpl->hw_sob = hw_sob;
+               cs_cmpl->sob_val = hw_queue->next_sob_val++;
+
+               dev_dbg(hdev->dev,
+                       "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
+                       cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+
+               hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
+                                       cs_cmpl->hw_sob->sob_id);
+
+               kref_get(&hw_sob->kref);
+
+               /* check for wraparound */
+               if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) {
+                       /*
+                        * Decrement as we reached the max value.
+                        * The release function won't be called here as we've
+                        * just incremented the refcount.
+                        */
+                       kref_put(&hw_sob->kref, hl_sob_reset_error);
+                       hw_queue->next_sob_val = 1;
+                       /* only two SOBs are currently in use */
+                       hw_queue->curr_sob_offset =
+                                       (hw_queue->curr_sob_offset + 1) %
+                                               HL_RSVD_SOBS_IN_USE;
+
+                       dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
+                                       hw_queue->curr_sob_offset, q_idx);
+               }
+       } else if (cs->type & CS_TYPE_WAIT) {
+               struct hl_cs_compl *signal_cs_cmpl;
+
+               signal_cs_cmpl = container_of(cs->signal_fence,
+                                               struct hl_cs_compl,
+                                               base_fence);
+
+               /* copy the the SOB id and value of the signal CS */
+               cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+               cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+
+               dev_dbg(hdev->dev,
+                       "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
+                       cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
+                       hw_queue->base_mon_id, q_idx);
+
+               hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb,
+                                               cs_cmpl->hw_sob->sob_id,
+                                               cs_cmpl->sob_val,
+                                               hw_queue->base_mon_id,
+                                               q_idx);
+
+               kref_get(&cs_cmpl->hw_sob->kref);
+               /*
+                * Must put the signal fence after the SOB refcnt increment so
+                * the SOB refcnt won't turn 0 and reset the SOB before the
+                * wait CS was submitted.
+                */
+               mb();
+               dma_fence_put(cs->signal_fence);
+               cs->signal_fence = NULL;
+       }
+}
+
+/*
+ * hl_hw_queue_schedule_cs - schedule a command submission
+ * @cs: pointer to the CS
+ */
+int hl_hw_queue_schedule_cs(struct hl_cs *cs)
+{
+       struct hl_ctx *ctx = cs->ctx;
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_cs_job *job, *tmp;
+       struct hl_hw_queue *q;
+       u32 max_queues;
+       int rc = 0, i, cq_cnt;
+
+       hdev->asic_funcs->hw_queues_lock(hdev);
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               ctx->cs_counters.device_in_reset_drop_cnt++;
+               dev_err(hdev->dev,
+                       "device is disabled or in reset, CS rejected!\n");
+               rc = -EPERM;
+               goto out;
+       }
+
+       max_queues = hdev->asic_prop.max_queues;
+
+       q = &hdev->kernel_queues[0];
+       for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
+               if (cs->jobs_in_queue_cnt[i]) {
+                       switch (q->queue_type) {
+                       case QUEUE_TYPE_EXT:
+                               rc = ext_queue_sanity_checks(hdev, q,
+                                               cs->jobs_in_queue_cnt[i], true);
+                               break;
+                       case QUEUE_TYPE_INT:
+                               rc = int_queue_sanity_checks(hdev, q,
+                                               cs->jobs_in_queue_cnt[i]);
+                               break;
+                       case QUEUE_TYPE_HW:
+                               rc = hw_queue_sanity_checks(hdev, q,
+                                               cs->jobs_in_queue_cnt[i]);
+                               break;
+                       default:
+                               dev_err(hdev->dev, "Queue type %d is invalid\n",
+                                       q->queue_type);
+                               rc = -EINVAL;
+                               break;
+                       }
+
+                       if (rc) {
+                               ctx->cs_counters.queue_full_drop_cnt++;
+                               goto unroll_cq_resv;
+                       }
+
+                       if (q->queue_type == QUEUE_TYPE_EXT)
+                               cq_cnt++;
+               }
+       }
+
+       if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
+               init_signal_wait_cs(cs);
+
+       spin_lock(&hdev->hw_queues_mirror_lock);
+       list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
+
+       /* Queue TDR if the CS is the first entry and if timeout is wanted */
+       if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
+                       (list_first_entry(&hdev->hw_queues_mirror_list,
+                                       struct hl_cs, mirror_node) == cs)) {
+               cs->tdr_active = true;
+               schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
+               spin_unlock(&hdev->hw_queues_mirror_lock);
+       } else {
+               spin_unlock(&hdev->hw_queues_mirror_lock);
+       }
+
+       if (!hdev->cs_active_cnt++) {
+               struct hl_device_idle_busy_ts *ts;
+
+               ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
+               ts->busy_to_idle_ts = ktime_set(0, 0);
+               ts->idle_to_busy_ts = ktime_get();
+       }
+
+       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+               switch (job->queue_type) {
+               case QUEUE_TYPE_EXT:
+                       ext_queue_schedule_job(job);
+                       break;
+               case QUEUE_TYPE_INT:
+                       int_queue_schedule_job(job);
+                       break;
+               case QUEUE_TYPE_HW:
+                       hw_queue_schedule_job(job);
+                       break;
+               default:
+                       break;
+               }
+
+       cs->submitted = true;
+
+       goto out;
+
+unroll_cq_resv:
+       q = &hdev->kernel_queues[0];
+       for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
+               if ((q->queue_type == QUEUE_TYPE_EXT) &&
+                                               (cs->jobs_in_queue_cnt[i])) {
+                       atomic_t *free_slots =
+                               &hdev->completion_queue[i].free_slots_cnt;
+                       atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
+                       cq_cnt--;
+               }
+       }
+
+out:
+       hdev->asic_funcs->hw_queues_unlock(hdev);
+
+       return rc;
+}
+
+/*
+ * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
+ *
+ * @hdev: pointer to hl_device structure
+ * @hw_queue_id: which queue to increment its ci
+ */
+void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
+{
+       struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
+
+       atomic_inc(&q->ci);
+}
+
+static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+                                       bool is_cpu_queue)
+{
+       void *p;
+       int rc;
+
+       if (is_cpu_queue)
+               p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+                                                       HL_QUEUE_SIZE_IN_BYTES,
+                                                       &q->bus_address);
+       else
+               p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+                                               HL_QUEUE_SIZE_IN_BYTES,
+                                               &q->bus_address,
+                                               GFP_KERNEL | __GFP_ZERO);
+       if (!p)
+               return -ENOMEM;
+
+       q->kernel_address = (u64) (uintptr_t) p;
+
+       q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
+                                       sizeof(*q->shadow_queue),
+                                       GFP_KERNEL);
+       if (!q->shadow_queue) {
+               dev_err(hdev->dev,
+                       "Failed to allocate shadow queue for H/W queue %d\n",
+                       q->hw_queue_id);
+               rc = -ENOMEM;
+               goto free_queue;
+       }
+
+       /* Make sure read/write pointers are initialized to start of queue */
+       atomic_set(&q->ci, 0);
+       q->pi = 0;
+
+       return 0;
+
+free_queue:
+       if (is_cpu_queue)
+               hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+                                       HL_QUEUE_SIZE_IN_BYTES,
+                                       (void *) (uintptr_t) q->kernel_address);
+       else
+               hdev->asic_funcs->asic_dma_free_coherent(hdev,
+                                       HL_QUEUE_SIZE_IN_BYTES,
+                                       (void *) (uintptr_t) q->kernel_address,
+                                       q->bus_address);
+
+       return rc;
+}
+
+static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+       void *p;
+
+       p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
+                                       &q->bus_address, &q->int_queue_len);
+       if (!p) {
+               dev_err(hdev->dev,
+                       "Failed to get base address for internal queue %d\n",
+                       q->hw_queue_id);
+               return -EFAULT;
+       }
+
+       q->kernel_address = (u64) (uintptr_t) p;
+       q->pi = 0;
+       atomic_set(&q->ci, 0);
+
+       return 0;
+}
+
+static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+       return ext_and_cpu_queue_init(hdev, q, true);
+}
+
+static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+       return ext_and_cpu_queue_init(hdev, q, false);
+}
+
+static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+       void *p;
+
+       p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+                                               HL_QUEUE_SIZE_IN_BYTES,
+                                               &q->bus_address,
+                                               GFP_KERNEL | __GFP_ZERO);
+       if (!p)
+               return -ENOMEM;
+
+       q->kernel_address = (u64) (uintptr_t) p;
+
+       /* Make sure read/write pointers are initialized to start of queue */
+       atomic_set(&q->ci, 0);
+       q->pi = 0;
+
+       return 0;
+}
+
+static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
+{
+       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_hw_sob *hw_sob;
+       int sob, queue_idx = hdev->sync_stream_queue_idx++;
+
+       hw_queue->base_sob_id =
+               prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
+       hw_queue->base_mon_id =
+               prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
+       hw_queue->next_sob_val = 1;
+       hw_queue->curr_sob_offset = 0;
+
+       for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
+               hw_sob = &hw_queue->hw_sob[sob];
+               hw_sob->hdev = hdev;
+               hw_sob->sob_id = hw_queue->base_sob_id + sob;
+               hw_sob->q_idx = q_idx;
+               kref_init(&hw_sob->kref);
+       }
+}
+
+static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
+{
+       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+
+       /*
+        * In case we got here due to a stuck CS, the refcnt might be bigger
+        * than 1 and therefore we reset it.
+        */
+       kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
+       hw_queue->curr_sob_offset = 0;
+       hw_queue->next_sob_val = 1;
+}
+
+/*
+ * queue_init - main initialization function for H/W queue object
+ *
+ * @hdev: pointer to hl_device device structure
+ * @q: pointer to hl_hw_queue queue structure
+ * @hw_queue_id: The id of the H/W queue
+ *
+ * Allocate dma-able memory for the queue and initialize fields
+ * Returns 0 on success
+ */
+static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+                       u32 hw_queue_id)
+{
+       int rc;
+
+       q->hw_queue_id = hw_queue_id;
+
+       switch (q->queue_type) {
+       case QUEUE_TYPE_EXT:
+               rc = ext_queue_init(hdev, q);
+               break;
+       case QUEUE_TYPE_INT:
+               rc = int_queue_init(hdev, q);
+               break;
+       case QUEUE_TYPE_CPU:
+               rc = cpu_queue_init(hdev, q);
+               break;
+       case QUEUE_TYPE_HW:
+               rc = hw_queue_init(hdev, q);
+               break;
+       case QUEUE_TYPE_NA:
+               q->valid = 0;
+               return 0;
+       default:
+               dev_crit(hdev->dev, "wrong queue type %d during init\n",
+                       q->queue_type);
+               rc = -EINVAL;
+               break;
+       }
+
+       if (q->supports_sync_stream)
+               sync_stream_queue_init(hdev, q->hw_queue_id);
+
+       if (rc)
+               return rc;
+
+       q->valid = 1;
+
+       return 0;
+}
+
+/*
+ * hw_queue_fini - destroy queue
+ *
+ * @hdev: pointer to hl_device device structure
+ * @q: pointer to hl_hw_queue queue structure
+ *
+ * Free the queue memory
+ */
+static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+       if (!q->valid)
+               return;
+
+       /*
+        * If we arrived here, there are no jobs waiting on this queue
+        * so we can safely remove it.
+        * This is because this function can only called when:
+        * 1. Either a context is deleted, which only can occur if all its
+        *    jobs were finished
+        * 2. A context wasn't able to be created due to failure or timeout,
+        *    which means there are no jobs on the queue yet
+        *
+        * The only exception are the queues of the kernel context, but
+        * if they are being destroyed, it means that the entire module is
+        * being removed. If the module is removed, it means there is no open
+        * user context. It also means that if a job was submitted by
+        * the kernel driver (e.g. context creation), the job itself was
+        * released by the kernel driver when a timeout occurred on its
+        * Completion. Thus, we don't need to release it again.
+        */
+
+       if (q->queue_type == QUEUE_TYPE_INT)
+               return;
+
+       kfree(q->shadow_queue);
+
+       if (q->queue_type == QUEUE_TYPE_CPU)
+               hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+                                       HL_QUEUE_SIZE_IN_BYTES,
+                                       (void *) (uintptr_t) q->kernel_address);
+       else
+               hdev->asic_funcs->asic_dma_free_coherent(hdev,
+                                       HL_QUEUE_SIZE_IN_BYTES,
+                                       (void *) (uintptr_t) q->kernel_address,
+                                       q->bus_address);
+}
+
+int hl_hw_queues_create(struct hl_device *hdev)
+{
+       struct asic_fixed_properties *asic = &hdev->asic_prop;
+       struct hl_hw_queue *q;
+       int i, rc, q_ready_cnt;
+
+       hdev->kernel_queues = kcalloc(asic->max_queues,
+                               sizeof(*hdev->kernel_queues), GFP_KERNEL);
+
+       if (!hdev->kernel_queues) {
+               dev_err(hdev->dev, "Not enough memory for H/W queues\n");
+               return -ENOMEM;
+       }
+
+       /* Initialize the H/W queues */
+       for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
+                       i < asic->max_queues ; i++, q_ready_cnt++, q++) {
+
+               q->queue_type = asic->hw_queues_props[i].type;
+               q->supports_sync_stream =
+                               asic->hw_queues_props[i].supports_sync_stream;
+               rc = queue_init(hdev, q, i);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "failed to initialize queue %d\n", i);
+                       goto release_queues;
+               }
+       }
+
+       return 0;
+
+release_queues:
+       for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
+               queue_fini(hdev, q);
+
+       kfree(hdev->kernel_queues);
+
+       return rc;
+}
+
+void hl_hw_queues_destroy(struct hl_device *hdev)
+{
+       struct hl_hw_queue *q;
+       u32 max_queues = hdev->asic_prop.max_queues;
+       int i;
+
+       for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
+               queue_fini(hdev, q);
+
+       kfree(hdev->kernel_queues);
+}
+
+void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
+{
+       struct hl_hw_queue *q;
+       u32 max_queues = hdev->asic_prop.max_queues;
+       int i;
+
+       for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
+               if ((!q->valid) ||
+                       ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
+                       continue;
+               q->pi = 0;
+               atomic_set(&q->ci, 0);
+
+               if (q->supports_sync_stream)
+                       sync_stream_queue_reset(hdev, q->hw_queue_id);
+       }
+}
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
new file mode 100644 (file)
index 0000000..8c6cd77
--- /dev/null
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+#include <linux/hwmon.h>
+
+#define SENSORS_PKT_TIMEOUT            1000000 /* 1s */
+#define HWMON_NR_SENSOR_TYPES          (hwmon_pwm + 1)
+
+int hl_build_hwmon_channel_info(struct hl_device *hdev,
+                               struct armcp_sensor *sensors_arr)
+{
+       u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
+       u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
+       u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
+       struct hwmon_channel_info **channels_info;
+       u32 num_sensors_for_type, num_active_sensor_types = 0,
+                       arr_size = 0, *curr_arr;
+       enum hwmon_sensor_types type;
+       int rc, i, j;
+
+       for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
+               type = le32_to_cpu(sensors_arr[i].type);
+
+               if ((type == 0) && (sensors_arr[i].flags == 0))
+                       break;
+
+               if (type >= HWMON_NR_SENSOR_TYPES) {
+                       dev_err(hdev->dev,
+                               "Got wrong sensor type %d from device\n", type);
+                       return -EINVAL;
+               }
+
+               counts[type]++;
+               arr_size++;
+       }
+
+       for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
+               if (counts[i] == 0)
+                       continue;
+
+               num_sensors_for_type = counts[i] + 1;
+               curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
+                               GFP_KERNEL);
+               if (!curr_arr) {
+                       rc = -ENOMEM;
+                       goto sensors_type_err;
+               }
+
+               num_active_sensor_types++;
+               sensors_by_type[i] = curr_arr;
+       }
+
+       for (i = 0 ; i < arr_size ; i++) {
+               type = le32_to_cpu(sensors_arr[i].type);
+               curr_arr = sensors_by_type[type];
+               curr_arr[sensors_by_type_next_index[type]++] =
+                               le32_to_cpu(sensors_arr[i].flags);
+       }
+
+       channels_info = kcalloc(num_active_sensor_types + 1,
+                       sizeof(*channels_info), GFP_KERNEL);
+       if (!channels_info) {
+               rc = -ENOMEM;
+               goto channels_info_array_err;
+       }
+
+       for (i = 0 ; i < num_active_sensor_types ; i++) {
+               channels_info[i] = kzalloc(sizeof(*channels_info[i]),
+                               GFP_KERNEL);
+               if (!channels_info[i]) {
+                       rc = -ENOMEM;
+                       goto channel_info_err;
+               }
+       }
+
+       for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
+               if (!sensors_by_type[i])
+                       continue;
+
+               channels_info[j]->type = i;
+               channels_info[j]->config = sensors_by_type[i];
+               j++;
+       }
+
+       hdev->hl_chip_info->info =
+                       (const struct hwmon_channel_info **)channels_info;
+
+       return 0;
+
+channel_info_err:
+       for (i = 0 ; i < num_active_sensor_types ; i++)
+               if (channels_info[i]) {
+                       kfree(channels_info[i]->config);
+                       kfree(channels_info[i]);
+               }
+       kfree(channels_info);
+channels_info_array_err:
+sensors_type_err:
+       for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
+               kfree(sensors_by_type[i]);
+
+       return rc;
+}
+
+static int hl_read(struct device *dev, enum hwmon_sensor_types type,
+                       u32 attr, int channel, long *val)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               return -ENODEV;
+
+       switch (type) {
+       case hwmon_temp:
+               switch (attr) {
+               case hwmon_temp_input:
+               case hwmon_temp_max:
+               case hwmon_temp_crit:
+               case hwmon_temp_max_hyst:
+               case hwmon_temp_crit_hyst:
+               case hwmon_temp_offset:
+               case hwmon_temp_highest:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               rc = hl_get_temperature(hdev, channel, attr, val);
+               break;
+       case hwmon_in:
+               switch (attr) {
+               case hwmon_in_input:
+               case hwmon_in_min:
+               case hwmon_in_max:
+               case hwmon_in_highest:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               rc = hl_get_voltage(hdev, channel, attr, val);
+               break;
+       case hwmon_curr:
+               switch (attr) {
+               case hwmon_curr_input:
+               case hwmon_curr_min:
+               case hwmon_curr_max:
+               case hwmon_curr_highest:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               rc = hl_get_current(hdev, channel, attr, val);
+               break;
+       case hwmon_fan:
+               switch (attr) {
+               case hwmon_fan_input:
+               case hwmon_fan_min:
+               case hwmon_fan_max:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               rc = hl_get_fan_speed(hdev, channel, attr, val);
+               break;
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+               case hwmon_pwm_enable:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               rc = hl_get_pwm_info(hdev, channel, attr, val);
+               break;
+       default:
+               return -EINVAL;
+       }
+       return rc;
+}
+
+static int hl_write(struct device *dev, enum hwmon_sensor_types type,
+                       u32 attr, int channel, long val)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               return -ENODEV;
+
+       switch (type) {
+       case hwmon_temp:
+               switch (attr) {
+               case hwmon_temp_offset:
+               case hwmon_temp_reset_history:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               hl_set_temperature(hdev, channel, attr, val);
+               break;
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+               case hwmon_pwm_enable:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               hl_set_pwm_info(hdev, channel, attr, val);
+               break;
+       case hwmon_in:
+               switch (attr) {
+               case hwmon_in_reset_history:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               hl_set_voltage(hdev, channel, attr, val);
+               break;
+       case hwmon_curr:
+               switch (attr) {
+               case hwmon_curr_reset_history:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               hl_set_current(hdev, channel, attr, val);
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
+                               u32 attr, int channel)
+{
+       switch (type) {
+       case hwmon_temp:
+               switch (attr) {
+               case hwmon_temp_input:
+               case hwmon_temp_max:
+               case hwmon_temp_max_hyst:
+               case hwmon_temp_crit:
+               case hwmon_temp_crit_hyst:
+               case hwmon_temp_highest:
+                       return 0444;
+               case hwmon_temp_offset:
+                       return 0644;
+               case hwmon_temp_reset_history:
+                       return 0200;
+               }
+               break;
+       case hwmon_in:
+               switch (attr) {
+               case hwmon_in_input:
+               case hwmon_in_min:
+               case hwmon_in_max:
+               case hwmon_in_highest:
+                       return 0444;
+               case hwmon_in_reset_history:
+                       return 0200;
+               }
+               break;
+       case hwmon_curr:
+               switch (attr) {
+               case hwmon_curr_input:
+               case hwmon_curr_min:
+               case hwmon_curr_max:
+               case hwmon_curr_highest:
+                       return 0444;
+               case hwmon_curr_reset_history:
+                       return 0200;
+               }
+               break;
+       case hwmon_fan:
+               switch (attr) {
+               case hwmon_fan_input:
+               case hwmon_fan_min:
+               case hwmon_fan_max:
+                       return 0444;
+               }
+               break;
+       case hwmon_pwm:
+               switch (attr) {
+               case hwmon_pwm_input:
+               case hwmon_pwm_enable:
+                       return 0644;
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static const struct hwmon_ops hl_hwmon_ops = {
+       .is_visible = hl_is_visible,
+       .read = hl_read,
+       .write = hl_write
+};
+
+int hl_get_temperature(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long *value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                       SENSORS_PKT_TIMEOUT, value);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to get temperature from sensor %d, error %d\n",
+                       sensor_index, rc);
+               *value = 0;
+       }
+
+       return rc;
+}
+
+int hl_set_temperature(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+       pkt.value = __cpu_to_le64(value);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                               SENSORS_PKT_TIMEOUT, NULL);
+
+       if (rc)
+               dev_err(hdev->dev,
+                       "Failed to set temperature of sensor %d, error %d\n",
+                       sensor_index, rc);
+
+       return rc;
+}
+
+int hl_get_voltage(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long *value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SENSORS_PKT_TIMEOUT, value);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to get voltage from sensor %d, error %d\n",
+                       sensor_index, rc);
+               *value = 0;
+       }
+
+       return rc;
+}
+
+int hl_get_current(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long *value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SENSORS_PKT_TIMEOUT, value);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to get current from sensor %d, error %d\n",
+                       sensor_index, rc);
+               *value = 0;
+       }
+
+       return rc;
+}
+
+int hl_get_fan_speed(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long *value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SENSORS_PKT_TIMEOUT, value);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to get fan speed from sensor %d, error %d\n",
+                       sensor_index, rc);
+               *value = 0;
+       }
+
+       return rc;
+}
+
+int hl_get_pwm_info(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long *value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SENSORS_PKT_TIMEOUT, value);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to get pwm info from sensor %d, error %d\n",
+                       sensor_index, rc);
+               *value = 0;
+       }
+
+       return rc;
+}
+
+void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
+                       long value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+       pkt.value = cpu_to_le64(value);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SENSORS_PKT_TIMEOUT, NULL);
+
+       if (rc)
+               dev_err(hdev->dev,
+                       "Failed to set pwm info to sensor %d, error %d\n",
+                       sensor_index, rc);
+}
+
+int hl_set_voltage(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+       pkt.value = __cpu_to_le64(value);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                               SENSORS_PKT_TIMEOUT, NULL);
+
+       if (rc)
+               dev_err(hdev->dev,
+                       "Failed to set voltage of sensor %d, error %d\n",
+                       sensor_index, rc);
+
+       return rc;
+}
+
+int hl_set_current(struct hl_device *hdev,
+                       int sensor_index, u32 attr, long value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.sensor_index = __cpu_to_le16(sensor_index);
+       pkt.type = __cpu_to_le16(attr);
+       pkt.value = __cpu_to_le64(value);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                               SENSORS_PKT_TIMEOUT, NULL);
+
+       if (rc)
+               dev_err(hdev->dev,
+                       "Failed to set current of sensor %d, error %d\n",
+                       sensor_index, rc);
+
+       return rc;
+}
+
+int hl_hwmon_init(struct hl_device *hdev)
+{
+       struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       int rc;
+
+       if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
+               return 0;
+
+       if (hdev->hl_chip_info->info) {
+               hdev->hl_chip_info->ops = &hl_hwmon_ops;
+
+               hdev->hwmon_dev = hwmon_device_register_with_info(dev,
+                                       prop->armcp_info.card_name, hdev,
+                                       hdev->hl_chip_info, NULL);
+               if (IS_ERR(hdev->hwmon_dev)) {
+                       rc = PTR_ERR(hdev->hwmon_dev);
+                       dev_err(hdev->dev,
+                               "Unable to register hwmon device: %d\n", rc);
+                       return rc;
+               }
+
+               dev_info(hdev->dev, "%s: add sensors information\n",
+                       dev_name(hdev->hwmon_dev));
+
+               hdev->hwmon_initialized = true;
+       } else {
+               dev_info(hdev->dev, "no available sensors\n");
+       }
+
+       return 0;
+}
+
+void hl_hwmon_fini(struct hl_device *hdev)
+{
+       if (!hdev->hwmon_initialized)
+               return;
+
+       hwmon_device_unregister(hdev->hwmon_dev);
+}
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
new file mode 100644 (file)
index 0000000..c8db717
--- /dev/null
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+/**
+ * struct hl_eqe_work - This structure is used to schedule work of EQ
+ *                      entry and armcp_reset event
+ *
+ * @eq_work:          workqueue object to run when EQ entry is received
+ * @hdev:             pointer to device structure
+ * @eq_entry:         copy of the EQ entry
+ */
+struct hl_eqe_work {
+       struct work_struct      eq_work;
+       struct hl_device        *hdev;
+       struct hl_eq_entry      eq_entry;
+};
+
+/**
+ * hl_cq_inc_ptr - increment ci or pi of cq
+ *
+ * @ptr: the current ci or pi value of the completion queue
+ *
+ * Increment ptr by 1. If it reaches the number of completion queue
+ * entries, set it to 0
+ */
+inline u32 hl_cq_inc_ptr(u32 ptr)
+{
+       ptr++;
+       if (unlikely(ptr == HL_CQ_LENGTH))
+               ptr = 0;
+       return ptr;
+}
+
+/**
+ * hl_eq_inc_ptr - increment ci of eq
+ *
+ * @ptr: the current ci value of the event queue
+ *
+ * Increment ptr by 1. If it reaches the number of event queue
+ * entries, set it to 0
+ */
+inline u32 hl_eq_inc_ptr(u32 ptr)
+{
+       ptr++;
+       if (unlikely(ptr == HL_EQ_LENGTH))
+               ptr = 0;
+       return ptr;
+}
+
+static void irq_handle_eqe(struct work_struct *work)
+{
+       struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
+                                                       eq_work);
+       struct hl_device *hdev = eqe_work->hdev;
+
+       hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
+
+       kfree(eqe_work);
+}
+
+/**
+ * hl_irq_handler_cq - irq handler for completion queue
+ *
+ * @irq: irq number
+ * @arg: pointer to completion queue structure
+ *
+ */
+irqreturn_t hl_irq_handler_cq(int irq, void *arg)
+{
+       struct hl_cq *cq = arg;
+       struct hl_device *hdev = cq->hdev;
+       struct hl_hw_queue *queue;
+       struct hl_cs_job *job;
+       bool shadow_index_valid;
+       u16 shadow_index;
+       struct hl_cq_entry *cq_entry, *cq_base;
+
+       if (hdev->disabled) {
+               dev_dbg(hdev->dev,
+                       "Device disabled but received IRQ %d for CQ %d\n",
+                       irq, cq->hw_queue_id);
+               return IRQ_HANDLED;
+       }
+
+       cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
+
+       while (1) {
+               bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
+                                       CQ_ENTRY_READY_MASK)
+                                               >> CQ_ENTRY_READY_SHIFT);
+
+               if (!entry_ready)
+                       break;
+
+               cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
+
+               /* Make sure we read CQ entry contents after we've
+                * checked the ownership bit.
+                */
+               dma_rmb();
+
+               shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
+                                       CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
+                                       >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
+
+               shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
+                                       CQ_ENTRY_SHADOW_INDEX_MASK)
+                                       >> CQ_ENTRY_SHADOW_INDEX_SHIFT);
+
+               queue = &hdev->kernel_queues[cq->hw_queue_id];
+
+               if ((shadow_index_valid) && (!hdev->disabled)) {
+                       job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
+                       queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
+               }
+
+               atomic_inc(&queue->ci);
+
+               /* Clear CQ entry ready bit */
+               cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
+                                               ~CQ_ENTRY_READY_MASK);
+
+               cq->ci = hl_cq_inc_ptr(cq->ci);
+
+               /* Increment free slots */
+               atomic_inc(&cq->free_slots_cnt);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * hl_irq_handler_eq - irq handler for event queue
+ *
+ * @irq: irq number
+ * @arg: pointer to event queue structure
+ *
+ */
+irqreturn_t hl_irq_handler_eq(int irq, void *arg)
+{
+       struct hl_eq *eq = arg;
+       struct hl_device *hdev = eq->hdev;
+       struct hl_eq_entry *eq_entry;
+       struct hl_eq_entry *eq_base;
+       struct hl_eqe_work *handle_eqe_work;
+
+       eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address;
+
+       while (1) {
+               bool entry_ready =
+                       ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
+                               EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
+
+               if (!entry_ready)
+                       break;
+
+               eq_entry = &eq_base[eq->ci];
+
+               /*
+                * Make sure we read EQ entry contents after we've
+                * checked the ownership bit.
+                */
+               dma_rmb();
+
+               if (hdev->disabled) {
+                       dev_warn(hdev->dev,
+                               "Device disabled but received IRQ %d for EQ\n",
+                                       irq);
+                       goto skip_irq;
+               }
+
+               handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
+               if (handle_eqe_work) {
+                       INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
+                       handle_eqe_work->hdev = hdev;
+
+                       memcpy(&handle_eqe_work->eq_entry, eq_entry,
+                                       sizeof(*eq_entry));
+
+                       queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
+               }
+skip_irq:
+               /* Clear EQ entry ready bit */
+               eq_entry->hdr.ctl =
+                       cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
+                                                       ~EQ_CTL_READY_MASK);
+
+               eq->ci = hl_eq_inc_ptr(eq->ci);
+
+               hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * hl_cq_init - main initialization function for an cq object
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to cq structure
+ * @hw_queue_id: The H/W queue ID this completion queue belongs to
+ *
+ * Allocate dma-able memory for the completion queue and initialize fields
+ * Returns 0 on success
+ */
+int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
+{
+       void *p;
+
+       p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+                               &q->bus_address, GFP_KERNEL | __GFP_ZERO);
+       if (!p)
+               return -ENOMEM;
+
+       q->hdev = hdev;
+       q->kernel_address = (u64) (uintptr_t) p;
+       q->hw_queue_id = hw_queue_id;
+       q->ci = 0;
+       q->pi = 0;
+
+       atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
+
+       return 0;
+}
+
+/**
+ * hl_cq_fini - destroy completion queue
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to cq structure
+ *
+ * Free the completion queue memory
+ */
+void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
+{
+       hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+                       (void *) (uintptr_t) q->kernel_address, q->bus_address);
+}
+
+void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
+{
+       q->ci = 0;
+       q->pi = 0;
+
+       atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
+
+       /*
+        * It's not enough to just reset the PI/CI because the H/W may have
+        * written valid completion entries before it was halted and therefore
+        * we need to clean the actual queues so we won't process old entries
+        * when the device is operational again
+        */
+
+       memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
+}
+
+/**
+ * hl_eq_init - main initialization function for an event queue object
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to eq structure
+ *
+ * Allocate dma-able memory for the event queue and initialize fields
+ * Returns 0 on success
+ */
+int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
+{
+       void *p;
+
+       p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+                                                       HL_EQ_SIZE_IN_BYTES,
+                                                       &q->bus_address);
+       if (!p)
+               return -ENOMEM;
+
+       q->hdev = hdev;
+       q->kernel_address = (u64) (uintptr_t) p;
+       q->ci = 0;
+
+       return 0;
+}
+
+/**
+ * hl_eq_fini - destroy event queue
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to eq structure
+ *
+ * Free the event queue memory
+ */
+void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
+{
+       flush_workqueue(hdev->eq_wq);
+
+       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+                                       HL_EQ_SIZE_IN_BYTES,
+                                       (void *) (uintptr_t) q->kernel_address);
+}
+
+void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
+{
+       q->ci = 0;
+
+       /*
+        * It's not enough to just reset the PI/CI because the H/W may have
+        * written valid completion entries before it was halted and therefore
+        * we need to clean the actual queues so we won't process old entries
+        * when the device is operational again
+        */
+
+       memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
+}
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
new file mode 100644 (file)
index 0000000..e4e1693
--- /dev/null
@@ -0,0 +1,1843 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+
+#define HL_MMU_DEBUG   0
+
+/*
+ * The va ranges in context object contain a list with the available chunks of
+ * device virtual memory.
+ * There is one range for host allocations and one for DRAM allocations.
+ *
+ * On initialization each range contains one chunk of all of its available
+ * virtual range which is a half of the total device virtual range.
+ *
+ * On each mapping of physical pages, a suitable virtual range chunk (with a
+ * minimum size) is selected from the list. If the chunk size equals the
+ * requested size, the chunk is returned. Otherwise, the chunk is split into
+ * two chunks - one to return as result and a remainder to stay in the list.
+ *
+ * On each Unmapping of a virtual address, the relevant virtual chunk is
+ * returned to the list. The chunk is added to the list and if its edges match
+ * the edges of the adjacent chunks (means a contiguous chunk can be created),
+ * the chunks are merged.
+ *
+ * On finish, the list is checked to have only one chunk of all the relevant
+ * virtual range (which is a half of the device total virtual range).
+ * If not (means not all mappings were unmapped), a warning is printed.
+ */
+
+/*
+ * alloc_device_memory - allocate device memory
+ *
+ * @ctx                 : current context
+ * @args                : host parameters containing the requested size
+ * @ret_handle          : result handle
+ *
+ * This function does the following:
+ * - Allocate the requested size rounded up to 2MB pages
+ * - Return unique handle
+ */
+static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
+                               u32 *ret_handle)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm *vm = &hdev->vm;
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+       u64 paddr = 0, total_size, num_pgs, i;
+       u32 num_curr_pgs, page_size, page_shift;
+       int handle, rc;
+       bool contiguous;
+
+       num_curr_pgs = 0;
+       page_size = hdev->asic_prop.dram_page_size;
+       page_shift = __ffs(page_size);
+       num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
+       total_size = num_pgs << page_shift;
+
+       contiguous = args->flags & HL_MEM_CONTIGUOUS;
+
+       if (contiguous) {
+               paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
+               if (!paddr) {
+                       dev_err(hdev->dev,
+                               "failed to allocate %llu huge contiguous pages\n",
+                               num_pgs);
+                       return -ENOMEM;
+               }
+       }
+
+       phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+       if (!phys_pg_pack) {
+               rc = -ENOMEM;
+               goto pages_pack_err;
+       }
+
+       phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
+       phys_pg_pack->asid = ctx->asid;
+       phys_pg_pack->npages = num_pgs;
+       phys_pg_pack->page_size = page_size;
+       phys_pg_pack->total_size = total_size;
+       phys_pg_pack->flags = args->flags;
+       phys_pg_pack->contiguous = contiguous;
+
+       phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
+       if (!phys_pg_pack->pages) {
+               rc = -ENOMEM;
+               goto pages_arr_err;
+       }
+
+       if (phys_pg_pack->contiguous) {
+               for (i = 0 ; i < num_pgs ; i++)
+                       phys_pg_pack->pages[i] = paddr + i * page_size;
+       } else {
+               for (i = 0 ; i < num_pgs ; i++) {
+                       phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
+                                                       vm->dram_pg_pool,
+                                                       page_size);
+                       if (!phys_pg_pack->pages[i]) {
+                               dev_err(hdev->dev,
+                                       "Failed to allocate device memory (out of memory)\n");
+                               rc = -ENOMEM;
+                               goto page_err;
+                       }
+
+                       num_curr_pgs++;
+               }
+       }
+
+       spin_lock(&vm->idr_lock);
+       handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
+                               GFP_ATOMIC);
+       spin_unlock(&vm->idr_lock);
+
+       if (handle < 0) {
+               dev_err(hdev->dev, "Failed to get handle for page\n");
+               rc = -EFAULT;
+               goto idr_err;
+       }
+
+       for (i = 0 ; i < num_pgs ; i++)
+               kref_get(&vm->dram_pg_pool_refcount);
+
+       phys_pg_pack->handle = handle;
+
+       atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+       atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+       *ret_handle = handle;
+
+       return 0;
+
+idr_err:
+page_err:
+       if (!phys_pg_pack->contiguous)
+               for (i = 0 ; i < num_curr_pgs ; i++)
+                       gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
+                                       page_size);
+
+       kvfree(phys_pg_pack->pages);
+pages_arr_err:
+       kfree(phys_pg_pack);
+pages_pack_err:
+       if (contiguous)
+               gen_pool_free(vm->dram_pg_pool, paddr, total_size);
+
+       return rc;
+}
+
+/*
+ * dma_map_host_va - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure
+ * @addr: the host virtual address of the memory area
+ * @size: the size of the memory area
+ * @p_userptr: pointer to result userptr structure
+ *
+ * This function does the following:
+ * - Allocate userptr structure
+ * - Pin the given host memory using the userptr structure
+ * - Perform DMA mapping to have the DMA addresses of the pages
+ */
+static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
+                               struct hl_userptr **p_userptr)
+{
+       struct hl_userptr *userptr;
+       int rc;
+
+       userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
+       if (!userptr) {
+               rc = -ENOMEM;
+               goto userptr_err;
+       }
+
+       rc = hl_pin_host_memory(hdev, addr, size, userptr);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to pin host memory\n");
+               goto pin_err;
+       }
+
+       rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
+                                       userptr->sgt->nents, DMA_BIDIRECTIONAL);
+       if (rc) {
+               dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+               goto dma_map_err;
+       }
+
+       userptr->dma_mapped = true;
+       userptr->dir = DMA_BIDIRECTIONAL;
+       userptr->vm_type = VM_TYPE_USERPTR;
+
+       *p_userptr = userptr;
+
+       return 0;
+
+dma_map_err:
+       hl_unpin_host_memory(hdev, userptr);
+pin_err:
+       kfree(userptr);
+userptr_err:
+
+       return rc;
+}
+
+/*
+ * dma_unmap_host_va - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure
+ * @userptr: userptr to free
+ *
+ * This function does the following:
+ * - Unpins the physical pages
+ * - Frees the userptr structure
+ */
+static void dma_unmap_host_va(struct hl_device *hdev,
+                               struct hl_userptr *userptr)
+{
+       hl_unpin_host_memory(hdev, userptr);
+       kfree(userptr);
+}
+
+/*
+ * dram_pg_pool_do_release - free DRAM pages pool
+ *
+ * @ref                 : pointer to reference object
+ *
+ * This function does the following:
+ * - Frees the idr structure of physical pages handles
+ * - Frees the generic pool of DRAM physical pages
+ */
+static void dram_pg_pool_do_release(struct kref *ref)
+{
+       struct hl_vm *vm = container_of(ref, struct hl_vm,
+                       dram_pg_pool_refcount);
+
+       /*
+        * free the idr here as only here we know for sure that there are no
+        * allocated physical pages and hence there are no handles in use
+        */
+       idr_destroy(&vm->phys_pg_pack_handles);
+       gen_pool_destroy(vm->dram_pg_pool);
+}
+
+/*
+ * free_phys_pg_pack - free physical page pack
+ * @hdev: habanalabs device structure
+ * @phys_pg_pack: physical page pack to free
+ *
+ * This function does the following:
+ * - For DRAM memory only, iterate over the pack and free each physical block
+ *   structure by returning it to the general pool
+ * - Free the hl_vm_phys_pg_pack structure
+ */
+static void free_phys_pg_pack(struct hl_device *hdev,
+                               struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+       struct hl_vm *vm = &hdev->vm;
+       u64 i;
+
+       if (!phys_pg_pack->created_from_userptr) {
+               if (phys_pg_pack->contiguous) {
+                       gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+                                       phys_pg_pack->total_size);
+
+                       for (i = 0; i < phys_pg_pack->npages ; i++)
+                               kref_put(&vm->dram_pg_pool_refcount,
+                                       dram_pg_pool_do_release);
+               } else {
+                       for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+                               gen_pool_free(vm->dram_pg_pool,
+                                               phys_pg_pack->pages[i],
+                                               phys_pg_pack->page_size);
+                               kref_put(&vm->dram_pg_pool_refcount,
+                                       dram_pg_pool_do_release);
+                       }
+               }
+       }
+
+       kvfree(phys_pg_pack->pages);
+       kfree(phys_pg_pack);
+}
+
+/*
+ * free_device_memory - free device memory
+ *
+ * @ctx                  : current context
+ * @handle              : handle of the memory chunk to free
+ *
+ * This function does the following:
+ * - Free the device memory related to the given handle
+ */
+static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm *vm = &hdev->vm;
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+
+       spin_lock(&vm->idr_lock);
+       phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+       if (phys_pg_pack) {
+               if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
+                       dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
+                               handle);
+                       spin_unlock(&vm->idr_lock);
+                       return -EINVAL;
+               }
+
+               /*
+                * must remove from idr before the freeing of the physical
+                * pages as the refcount of the pool is also the trigger of the
+                * idr destroy
+                */
+               idr_remove(&vm->phys_pg_pack_handles, handle);
+               spin_unlock(&vm->idr_lock);
+
+               atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+               atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+               free_phys_pg_pack(hdev, phys_pg_pack);
+       } else {
+               spin_unlock(&vm->idr_lock);
+               dev_err(hdev->dev,
+                       "free device memory failed, no match for handle %u\n",
+                       handle);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * clear_va_list_locked - free virtual addresses list
+ *
+ * @hdev                : habanalabs device structure
+ * @va_list             : list of virtual addresses to free
+ *
+ * This function does the following:
+ * - Iterate over the list and free each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void clear_va_list_locked(struct hl_device *hdev,
+               struct list_head *va_list)
+{
+       struct hl_vm_va_block *va_block, *tmp;
+
+       list_for_each_entry_safe(va_block, tmp, va_list, node) {
+               list_del(&va_block->node);
+               kfree(va_block);
+       }
+}
+
+/*
+ * print_va_list_locked    - print virtual addresses list
+ *
+ * @hdev                : habanalabs device structure
+ * @va_list             : list of virtual addresses to print
+ *
+ * This function does the following:
+ * - Iterate over the list and print each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void print_va_list_locked(struct hl_device *hdev,
+               struct list_head *va_list)
+{
+#if HL_MMU_DEBUG
+       struct hl_vm_va_block *va_block;
+
+       dev_dbg(hdev->dev, "print va list:\n");
+
+       list_for_each_entry(va_block, va_list, node)
+               dev_dbg(hdev->dev,
+                       "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
+                       va_block->start, va_block->end, va_block->size);
+#endif
+}
+
+/*
+ * merge_va_blocks_locked - merge a virtual block if possible
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_list             : pointer to the virtual addresses block list
+ * @va_block            : virtual block to merge with adjacent blocks
+ *
+ * This function does the following:
+ * - Merge the given blocks with the adjacent blocks if their virtual ranges
+ *   create a contiguous virtual range
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static void merge_va_blocks_locked(struct hl_device *hdev,
+               struct list_head *va_list, struct hl_vm_va_block *va_block)
+{
+       struct hl_vm_va_block *prev, *next;
+
+       prev = list_prev_entry(va_block, node);
+       if (&prev->node != va_list && prev->end + 1 == va_block->start) {
+               prev->end = va_block->end;
+               prev->size = prev->end - prev->start;
+               list_del(&va_block->node);
+               kfree(va_block);
+               va_block = prev;
+       }
+
+       next = list_next_entry(va_block, node);
+       if (&next->node != va_list && va_block->end + 1 == next->start) {
+               next->start = va_block->start;
+               next->size = next->end - next->start;
+               list_del(&va_block->node);
+               kfree(va_block);
+       }
+}
+
+/*
+ * add_va_block_locked - add a virtual block to the virtual addresses list
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_list             : pointer to the virtual addresses block list
+ * @start               : start virtual address
+ * @end                 : end virtual address
+ *
+ * This function does the following:
+ * - Add the given block to the virtual blocks list and merge with other
+ * blocks if a contiguous virtual block can be created
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static int add_va_block_locked(struct hl_device *hdev,
+               struct list_head *va_list, u64 start, u64 end)
+{
+       struct hl_vm_va_block *va_block, *res = NULL;
+       u64 size = end - start;
+
+       print_va_list_locked(hdev, va_list);
+
+       list_for_each_entry(va_block, va_list, node) {
+               /* TODO: remove upon matureness */
+               if (hl_mem_area_crosses_range(start, size, va_block->start,
+                               va_block->end)) {
+                       dev_err(hdev->dev,
+                               "block crossing ranges at start 0x%llx, end 0x%llx\n",
+                               va_block->start, va_block->end);
+                       return -EINVAL;
+               }
+
+               if (va_block->end < start)
+                       res = va_block;
+       }
+
+       va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
+       if (!va_block)
+               return -ENOMEM;
+
+       va_block->start = start;
+       va_block->end = end;
+       va_block->size = size;
+
+       if (!res)
+               list_add(&va_block->node, va_list);
+       else
+               list_add(&va_block->node, &res->node);
+
+       merge_va_blocks_locked(hdev, va_list, va_block);
+
+       print_va_list_locked(hdev, va_list);
+
+       return 0;
+}
+
+/*
+ * add_va_block - wrapper for add_va_block_locked
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_list             : pointer to the virtual addresses block list
+ * @start               : start virtual address
+ * @end                 : end virtual address
+ *
+ * This function does the following:
+ * - Takes the list lock and calls add_va_block_locked
+ */
+static inline int add_va_block(struct hl_device *hdev,
+               struct hl_va_range *va_range, u64 start, u64 end)
+{
+       int rc;
+
+       mutex_lock(&va_range->lock);
+       rc = add_va_block_locked(hdev, &va_range->list, start, end);
+       mutex_unlock(&va_range->lock);
+
+       return rc;
+}
+
+/*
+ * get_va_block - get a virtual block with the requested size
+ *
+ * @hdev            : pointer to the habanalabs device structure
+ * @va_range        : pointer to the virtual addresses range
+ * @size            : requested block size
+ * @hint_addr       : hint for request address by the user
+ * @is_userptr      : is host or DRAM memory
+ *
+ * This function does the following:
+ * - Iterate on the virtual block list to find a suitable virtual block for the
+ *   requested size
+ * - Reserve the requested block and update the list
+ * - Return the start address of the virtual block
+ */
+static u64 get_va_block(struct hl_device *hdev,
+                       struct hl_va_range *va_range, u64 size, u64 hint_addr,
+                       bool is_userptr)
+{
+       struct hl_vm_va_block *va_block, *new_va_block = NULL;
+       u64 valid_start, valid_size, prev_start, prev_end, page_mask,
+               res_valid_start = 0, res_valid_size = 0;
+       u32 page_size;
+       bool add_prev = false;
+
+       if (is_userptr)
+               /*
+                * We cannot know if the user allocated memory with huge pages
+                * or not, hence we continue with the biggest possible
+                * granularity.
+                */
+               page_size = hdev->asic_prop.pmmu_huge.page_size;
+       else
+               page_size = hdev->asic_prop.dmmu.page_size;
+
+       page_mask = ~((u64)page_size - 1);
+
+       mutex_lock(&va_range->lock);
+
+       print_va_list_locked(hdev, &va_range->list);
+
+       list_for_each_entry(va_block, &va_range->list, node) {
+               /* calc the first possible aligned addr */
+               valid_start = va_block->start;
+
+               if (valid_start & (page_size - 1)) {
+                       valid_start &= page_mask;
+                       valid_start += page_size;
+                       if (valid_start > va_block->end)
+                               continue;
+               }
+
+               valid_size = va_block->end - valid_start;
+
+               if (valid_size >= size &&
+                       (!new_va_block || valid_size < res_valid_size)) {
+                       new_va_block = va_block;
+                       res_valid_start = valid_start;
+                       res_valid_size = valid_size;
+               }
+
+               if (hint_addr && hint_addr >= valid_start &&
+                               ((hint_addr + size) <= va_block->end)) {
+                       new_va_block = va_block;
+                       res_valid_start = hint_addr;
+                       res_valid_size = valid_size;
+                       break;
+               }
+       }
+
+       if (!new_va_block) {
+               dev_err(hdev->dev, "no available va block for size %llu\n",
+                               size);
+               goto out;
+       }
+
+       if (res_valid_start > new_va_block->start) {
+               prev_start = new_va_block->start;
+               prev_end = res_valid_start - 1;
+
+               new_va_block->start = res_valid_start;
+               new_va_block->size = res_valid_size;
+
+               add_prev = true;
+       }
+
+       if (new_va_block->size > size) {
+               new_va_block->start += size;
+               new_va_block->size = new_va_block->end - new_va_block->start;
+       } else {
+               list_del(&new_va_block->node);
+               kfree(new_va_block);
+       }
+
+       if (add_prev)
+               add_va_block_locked(hdev, &va_range->list, prev_start,
+                               prev_end);
+
+       print_va_list_locked(hdev, &va_range->list);
+out:
+       mutex_unlock(&va_range->lock);
+
+       return res_valid_start;
+}
+
+/*
+ * get_sg_info - get number of pages and the DMA address from SG list
+ *
+ * @sg                 : the SG list
+ * @dma_addr           : pointer to DMA address to return
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+       *dma_addr = sg_dma_address(sg);
+
+       return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+                       (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
+/*
+ * init_phys_pg_pack_from_userptr - initialize physical page pack from host
+ *                                  memory
+ * @ctx: current context
+ * @userptr: userptr to initialize from
+ * @pphys_pg_pack: result pointer
+ *
+ * This function does the following:
+ * - Pin the physical pages related to the given virtual block
+ * - Create a physical page pack from the physical pages related to the given
+ *   virtual block
+ */
+static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
+                               struct hl_userptr *userptr,
+                               struct hl_vm_phys_pg_pack **pphys_pg_pack)
+{
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+       struct scatterlist *sg;
+       dma_addr_t dma_addr;
+       u64 page_mask, total_npages;
+       u32 npages, page_size = PAGE_SIZE,
+               huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
+       bool first = true, is_huge_page_opt = true;
+       int rc, i, j;
+       u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
+
+       phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+       if (!phys_pg_pack)
+               return -ENOMEM;
+
+       phys_pg_pack->vm_type = userptr->vm_type;
+       phys_pg_pack->created_from_userptr = true;
+       phys_pg_pack->asid = ctx->asid;
+       atomic_set(&phys_pg_pack->mapping_cnt, 1);
+
+       /* Only if all dma_addrs are aligned to 2MB and their
+        * sizes is at least 2MB, we can use huge page mapping.
+        * We limit the 2MB optimization to this condition,
+        * since later on we acquire the related VA range as one
+        * consecutive block.
+        */
+       total_npages = 0;
+       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+               npages = get_sg_info(sg, &dma_addr);
+
+               total_npages += npages;
+
+               if ((npages % pgs_in_huge_page) ||
+                                       (dma_addr & (huge_page_size - 1)))
+                       is_huge_page_opt = false;
+       }
+
+       if (is_huge_page_opt) {
+               page_size = huge_page_size;
+               do_div(total_npages, pgs_in_huge_page);
+       }
+
+       page_mask = ~(((u64) page_size) - 1);
+
+       phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
+                                               GFP_KERNEL);
+       if (!phys_pg_pack->pages) {
+               rc = -ENOMEM;
+               goto page_pack_arr_mem_err;
+       }
+
+       phys_pg_pack->npages = total_npages;
+       phys_pg_pack->page_size = page_size;
+       phys_pg_pack->total_size = total_npages * page_size;
+
+       j = 0;
+       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+               npages = get_sg_info(sg, &dma_addr);
+
+               /* align down to physical page size and save the offset */
+               if (first) {
+                       first = false;
+                       phys_pg_pack->offset = dma_addr & (page_size - 1);
+                       dma_addr &= page_mask;
+               }
+
+               while (npages) {
+                       phys_pg_pack->pages[j++] = dma_addr;
+                       dma_addr += page_size;
+
+                       if (is_huge_page_opt)
+                               npages -= pgs_in_huge_page;
+                       else
+                               npages--;
+               }
+       }
+
+       *pphys_pg_pack = phys_pg_pack;
+
+       return 0;
+
+page_pack_arr_mem_err:
+       kfree(phys_pg_pack);
+
+       return rc;
+}
+
+/*
+ * map_phys_pg_pack - maps the physical page pack.
+ * @ctx: current context
+ * @vaddr: start address of the virtual area to map from
+ * @phys_pg_pack: the pack of physical pages to map to
+ *
+ * This function does the following:
+ * - Maps each chunk of virtual memory to matching physical chunk
+ * - Stores number of successful mappings in the given argument
+ * - Returns 0 on success, error code otherwise
+ */
+static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
+                               struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+       struct hl_device *hdev = ctx->hdev;
+       u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
+       u32 page_size = phys_pg_pack->page_size;
+       int rc = 0;
+
+       for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+               paddr = phys_pg_pack->pages[i];
+
+               rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+                               (i + 1) == phys_pg_pack->npages);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "map failed for handle %u, npages: %llu, mapped: %llu",
+                               phys_pg_pack->handle, phys_pg_pack->npages,
+                               mapped_pg_cnt);
+                       goto err;
+               }
+
+               mapped_pg_cnt++;
+               next_vaddr += page_size;
+       }
+
+       return 0;
+
+err:
+       next_vaddr = vaddr;
+       for (i = 0 ; i < mapped_pg_cnt ; i++) {
+               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+                                       (i + 1) == mapped_pg_cnt))
+                       dev_warn_ratelimited(hdev->dev,
+                               "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
+                                       phys_pg_pack->handle, next_vaddr,
+                                       phys_pg_pack->pages[i], page_size);
+
+               next_vaddr += page_size;
+       }
+
+       return rc;
+}
+
+/*
+ * unmap_phys_pg_pack - unmaps the physical page pack
+ * @ctx: current context
+ * @vaddr: start address of the virtual area to unmap
+ * @phys_pg_pack: the pack of physical pages to unmap
+ */
+static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
+                               struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+       struct hl_device *hdev = ctx->hdev;
+       u64 next_vaddr, i;
+       u32 page_size;
+
+       page_size = phys_pg_pack->page_size;
+       next_vaddr = vaddr;
+
+       for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
+               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+                                      (i + 1) == phys_pg_pack->npages))
+                       dev_warn_ratelimited(hdev->dev,
+                       "unmap failed for vaddr: 0x%llx\n", next_vaddr);
+
+               /*
+                * unmapping on Palladium can be really long, so avoid a CPU
+                * soft lockup bug by sleeping a little between unmapping pages
+                */
+               if (hdev->pldm)
+                       usleep_range(500, 1000);
+       }
+}
+
+static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
+                               u64 *paddr)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm *vm = &hdev->vm;
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+       u32 handle;
+
+       handle = lower_32_bits(args->map_device.handle);
+       spin_lock(&vm->idr_lock);
+       phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+       if (!phys_pg_pack) {
+               spin_unlock(&vm->idr_lock);
+               dev_err(hdev->dev, "no match for handle %u\n", handle);
+               return -EINVAL;
+       }
+
+       *paddr = phys_pg_pack->pages[0];
+
+       spin_unlock(&vm->idr_lock);
+
+       return 0;
+}
+
+/*
+ * map_device_va - map the given memory
+ *
+ * @ctx                 : current context
+ * @args         : host parameters with handle/host virtual address
+ * @device_addr         : pointer to result device virtual address
+ *
+ * This function does the following:
+ * - If given a physical device memory handle, map to a device virtual block
+ *   and return the start address of this block
+ * - If given a host virtual address and size, find the related physical pages,
+ *   map a device virtual block to this pages and return the start address of
+ *   this block
+ */
+static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+               u64 *device_addr)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm *vm = &hdev->vm;
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+       struct hl_userptr *userptr = NULL;
+       struct hl_vm_hash_node *hnode;
+       struct hl_va_range *va_range;
+       enum vm_type_t *vm_type;
+       u64 ret_vaddr, hint_addr;
+       u32 handle = 0;
+       int rc;
+       bool is_userptr = args->flags & HL_MEM_USERPTR;
+
+       /* Assume failure */
+       *device_addr = 0;
+
+       if (is_userptr) {
+               u64 addr = args->map_host.host_virt_addr,
+                       size = args->map_host.mem_size;
+
+               rc = dma_map_host_va(hdev, addr, size, &userptr);
+               if (rc) {
+                       dev_err(hdev->dev, "failed to get userptr from va\n");
+                       return rc;
+               }
+
+               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+                               &phys_pg_pack);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "unable to init page pack for vaddr 0x%llx\n",
+                               addr);
+                       goto init_page_pack_err;
+               }
+
+               vm_type = (enum vm_type_t *) userptr;
+               hint_addr = args->map_host.hint_addr;
+               handle = phys_pg_pack->handle;
+       } else {
+               handle = lower_32_bits(args->map_device.handle);
+
+               spin_lock(&vm->idr_lock);
+               phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+               if (!phys_pg_pack) {
+                       spin_unlock(&vm->idr_lock);
+                       dev_err(hdev->dev,
+                               "no match for handle %u\n", handle);
+                       return -EINVAL;
+               }
+
+               /* increment now to avoid freeing device memory while mapping */
+               atomic_inc(&phys_pg_pack->mapping_cnt);
+
+               spin_unlock(&vm->idr_lock);
+
+               vm_type = (enum vm_type_t *) phys_pg_pack;
+
+               hint_addr = args->map_device.hint_addr;
+       }
+
+       /*
+        * relevant for mapping device physical memory only, as host memory is
+        * implicitly shared
+        */
+       if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
+                       phys_pg_pack->asid != ctx->asid) {
+               dev_err(hdev->dev,
+                       "Failed to map memory, handle %u is not shared\n",
+                       handle);
+               rc = -EPERM;
+               goto shared_err;
+       }
+
+       hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
+       if (!hnode) {
+               rc = -ENOMEM;
+               goto hnode_err;
+       }
+
+       if (is_userptr)
+               if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
+                       va_range = ctx->host_va_range;
+               else
+                       va_range = ctx->host_huge_va_range;
+       else
+               va_range = ctx->dram_va_range;
+
+       ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
+                                       hint_addr, is_userptr);
+       if (!ret_vaddr) {
+               dev_err(hdev->dev, "no available va block for handle %u\n",
+                               handle);
+               rc = -ENOMEM;
+               goto va_block_err;
+       }
+
+       mutex_lock(&ctx->mmu_lock);
+
+       rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
+       if (rc) {
+               mutex_unlock(&ctx->mmu_lock);
+               dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
+                               handle);
+               goto map_err;
+       }
+
+       rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
+
+       mutex_unlock(&ctx->mmu_lock);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "mapping handle %u failed due to MMU cache invalidation\n",
+                       handle);
+               goto map_err;
+       }
+
+       ret_vaddr += phys_pg_pack->offset;
+
+       hnode->ptr = vm_type;
+       hnode->vaddr = ret_vaddr;
+
+       mutex_lock(&ctx->mem_hash_lock);
+       hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
+       mutex_unlock(&ctx->mem_hash_lock);
+
+       *device_addr = ret_vaddr;
+
+       if (is_userptr)
+               free_phys_pg_pack(hdev, phys_pg_pack);
+
+       return 0;
+
+map_err:
+       if (add_va_block(hdev, va_range, ret_vaddr,
+                               ret_vaddr + phys_pg_pack->total_size - 1))
+               dev_warn(hdev->dev,
+                       "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
+                               handle, ret_vaddr);
+
+va_block_err:
+       kfree(hnode);
+hnode_err:
+shared_err:
+       atomic_dec(&phys_pg_pack->mapping_cnt);
+       if (is_userptr)
+               free_phys_pg_pack(hdev, phys_pg_pack);
+init_page_pack_err:
+       if (is_userptr)
+               dma_unmap_host_va(hdev, userptr);
+
+       return rc;
+}
+
+/*
+ * unmap_device_va      - unmap the given device virtual address
+ *
+ * @ctx                 : current context
+ * @vaddr               : device virtual address to unmap
+ * @ctx_free            : true if in context free flow, false otherwise.
+ *
+ * This function does the following:
+ * - Unmap the physical pages related to the given virtual address
+ * - return the device virtual block to the virtual block list
+ */
+static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+       struct hl_vm_hash_node *hnode = NULL;
+       struct hl_userptr *userptr = NULL;
+       struct hl_va_range *va_range;
+       enum vm_type_t *vm_type;
+       bool is_userptr;
+       int rc = 0;
+
+       /* protect from double entrance */
+       mutex_lock(&ctx->mem_hash_lock);
+       hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
+               if (vaddr == hnode->vaddr)
+                       break;
+
+       if (!hnode) {
+               mutex_unlock(&ctx->mem_hash_lock);
+               dev_err(hdev->dev,
+                       "unmap failed, no mem hnode for vaddr 0x%llx\n",
+                       vaddr);
+               return -EINVAL;
+       }
+
+       hash_del(&hnode->node);
+       mutex_unlock(&ctx->mem_hash_lock);
+
+       vm_type = hnode->ptr;
+
+       if (*vm_type == VM_TYPE_USERPTR) {
+               is_userptr = true;
+               userptr = hnode->ptr;
+               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+                                                       &phys_pg_pack);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "unable to init page pack for vaddr 0x%llx\n",
+                               vaddr);
+                       goto vm_type_err;
+               }
+
+               if (phys_pg_pack->page_size ==
+                                       hdev->asic_prop.pmmu.page_size)
+                       va_range = ctx->host_va_range;
+               else
+                       va_range = ctx->host_huge_va_range;
+       } else if (*vm_type == VM_TYPE_PHYS_PACK) {
+               is_userptr = false;
+               va_range = ctx->dram_va_range;
+               phys_pg_pack = hnode->ptr;
+       } else {
+               dev_warn(hdev->dev,
+                       "unmap failed, unknown vm desc for vaddr 0x%llx\n",
+                               vaddr);
+               rc = -EFAULT;
+               goto vm_type_err;
+       }
+
+       if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
+               dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
+               rc = -EINVAL;
+               goto mapping_cnt_err;
+       }
+
+       vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
+
+       mutex_lock(&ctx->mmu_lock);
+
+       unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
+
+       /*
+        * During context free this function is called in a loop to clean all
+        * the context mappings. Hence the cache invalidation can be called once
+        * at the loop end rather than for each iteration
+        */
+       if (!ctx_free)
+               rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
+                                                               *vm_type);
+
+       mutex_unlock(&ctx->mmu_lock);
+
+       /*
+        * If the context is closing we don't need to check for the MMU cache
+        * invalidation return code and update the VA free list as in this flow
+        * we invalidate the MMU cache outside of this unmap function and the VA
+        * free list will be freed anyway.
+        */
+       if (!ctx_free) {
+               int tmp_rc;
+
+               if (rc)
+                       dev_err(hdev->dev,
+                               "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
+                               vaddr);
+
+               tmp_rc = add_va_block(hdev, va_range, vaddr,
+                                       vaddr + phys_pg_pack->total_size - 1);
+               if (tmp_rc) {
+                       dev_warn(hdev->dev,
+                                       "add va block failed for vaddr: 0x%llx\n",
+                                       vaddr);
+                       if (!rc)
+                               rc = tmp_rc;
+               }
+       }
+
+       atomic_dec(&phys_pg_pack->mapping_cnt);
+       kfree(hnode);
+
+       if (is_userptr) {
+               free_phys_pg_pack(hdev, phys_pg_pack);
+               dma_unmap_host_va(hdev, userptr);
+       }
+
+       return rc;
+
+mapping_cnt_err:
+       if (is_userptr)
+               free_phys_pg_pack(hdev, phys_pg_pack);
+vm_type_err:
+       mutex_lock(&ctx->mem_hash_lock);
+       hash_add(ctx->mem_hash, &hnode->node, vaddr);
+       mutex_unlock(&ctx->mem_hash_lock);
+
+       return rc;
+}
+
+static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ctx *ctx = hpriv->ctx;
+       u64 device_addr = 0;
+       u32 handle = 0;
+       int rc;
+
+       switch (args->in.op) {
+       case HL_MEM_OP_ALLOC:
+               if (args->in.alloc.mem_size == 0) {
+                       dev_err(hdev->dev,
+                               "alloc size must be larger than 0\n");
+                       rc = -EINVAL;
+                       goto out;
+               }
+
+               /* Force contiguous as there are no real MMU
+                * translations to overcome physical memory gaps
+                */
+               args->in.flags |= HL_MEM_CONTIGUOUS;
+               rc = alloc_device_memory(ctx, &args->in, &handle);
+
+               memset(args, 0, sizeof(*args));
+               args->out.handle = (__u64) handle;
+               break;
+
+       case HL_MEM_OP_FREE:
+               rc = free_device_memory(ctx, args->in.free.handle);
+               break;
+
+       case HL_MEM_OP_MAP:
+               if (args->in.flags & HL_MEM_USERPTR) {
+                       device_addr = args->in.map_host.host_virt_addr;
+                       rc = 0;
+               } else {
+                       rc = get_paddr_from_handle(ctx, &args->in,
+                                       &device_addr);
+               }
+
+               memset(args, 0, sizeof(*args));
+               args->out.device_virt_addr = device_addr;
+               break;
+
+       case HL_MEM_OP_UNMAP:
+               rc = 0;
+               break;
+
+       default:
+               dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+               rc = -ENOTTY;
+               break;
+       }
+
+out:
+       return rc;
+}
+
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       union hl_mem_args *args = data;
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ctx *ctx = hpriv->ctx;
+       u64 device_addr = 0;
+       u32 handle = 0;
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               dev_warn_ratelimited(hdev->dev,
+                       "Device is %s. Can't execute MEMORY IOCTL\n",
+                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+               return -EBUSY;
+       }
+
+       if (!hdev->mmu_enable)
+               return mem_ioctl_no_mmu(hpriv, args);
+
+       switch (args->in.op) {
+       case HL_MEM_OP_ALLOC:
+               if (!hdev->dram_supports_virtual_memory) {
+                       dev_err(hdev->dev, "DRAM alloc is not supported\n");
+                       rc = -EINVAL;
+                       goto out;
+               }
+
+               if (args->in.alloc.mem_size == 0) {
+                       dev_err(hdev->dev,
+                               "alloc size must be larger than 0\n");
+                       rc = -EINVAL;
+                       goto out;
+               }
+               rc = alloc_device_memory(ctx, &args->in, &handle);
+
+               memset(args, 0, sizeof(*args));
+               args->out.handle = (__u64) handle;
+               break;
+
+       case HL_MEM_OP_FREE:
+               rc = free_device_memory(ctx, args->in.free.handle);
+               break;
+
+       case HL_MEM_OP_MAP:
+               rc = map_device_va(ctx, &args->in, &device_addr);
+
+               memset(args, 0, sizeof(*args));
+               args->out.device_virt_addr = device_addr;
+               break;
+
+       case HL_MEM_OP_UNMAP:
+               rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
+                                       false);
+               break;
+
+       default:
+               dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+               rc = -ENOTTY;
+               break;
+       }
+
+out:
+       return rc;
+}
+
+static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
+                               u32 npages, u64 start, u32 offset,
+                               struct hl_userptr *userptr)
+{
+       int rc;
+
+       if (!access_ok((void __user *) (uintptr_t) addr, size)) {
+               dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
+               return -EFAULT;
+       }
+
+       userptr->vec = frame_vector_create(npages);
+       if (!userptr->vec) {
+               dev_err(hdev->dev, "Failed to create frame vector\n");
+               return -ENOMEM;
+       }
+
+       rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
+                               userptr->vec);
+
+       if (rc != npages) {
+               dev_err(hdev->dev,
+                       "Failed to map host memory, user ptr probably wrong\n");
+               if (rc < 0)
+                       goto destroy_framevec;
+               rc = -EFAULT;
+               goto put_framevec;
+       }
+
+       if (frame_vector_to_pages(userptr->vec) < 0) {
+               dev_err(hdev->dev,
+                       "Failed to translate frame vector to pages\n");
+               rc = -EFAULT;
+               goto put_framevec;
+       }
+
+       rc = sg_alloc_table_from_pages(userptr->sgt,
+                                       frame_vector_pages(userptr->vec),
+                                       npages, offset, size, GFP_ATOMIC);
+       if (rc < 0) {
+               dev_err(hdev->dev, "failed to create SG table from pages\n");
+               goto put_framevec;
+       }
+
+       return 0;
+
+put_framevec:
+       put_vaddr_frames(userptr->vec);
+destroy_framevec:
+       frame_vector_destroy(userptr->vec);
+       return rc;
+}
+
+/*
+ * hl_pin_host_memory - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure
+ * @addr: the host virtual address of the memory area
+ * @size: the size of the memory area
+ * @userptr: pointer to hl_userptr structure
+ *
+ * This function does the following:
+ * - Pins the physical pages
+ * - Create an SG list from those pages
+ */
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
+                                       struct hl_userptr *userptr)
+{
+       u64 start, end;
+       u32 npages, offset;
+       int rc;
+
+       if (!size) {
+               dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
+               return -EINVAL;
+       }
+
+       /*
+        * If the combination of the address and size requested for this memory
+        * region causes an integer overflow, return error.
+        */
+       if (((addr + size) < addr) ||
+                       PAGE_ALIGN(addr + size) < (addr + size)) {
+               dev_err(hdev->dev,
+                       "user pointer 0x%llx + %llu causes integer overflow\n",
+                       addr, size);
+               return -EINVAL;
+       }
+
+       /*
+        * This function can be called also from data path, hence use atomic
+        * always as it is not a big allocation.
+        */
+       userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+       if (!userptr->sgt)
+               return -ENOMEM;
+
+       start = addr & PAGE_MASK;
+       offset = addr & ~PAGE_MASK;
+       end = PAGE_ALIGN(addr + size);
+       npages = (end - start) >> PAGE_SHIFT;
+
+       userptr->size = size;
+       userptr->addr = addr;
+       userptr->dma_mapped = false;
+       INIT_LIST_HEAD(&userptr->job_node);
+
+       rc = get_user_memory(hdev, addr, size, npages, start, offset,
+                               userptr);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "failed to get user memory for address 0x%llx\n",
+                       addr);
+               goto free_sgt;
+       }
+
+       hl_debugfs_add_userptr(hdev, userptr);
+
+       return 0;
+
+free_sgt:
+       kfree(userptr->sgt);
+       return rc;
+}
+
+/*
+ * hl_unpin_host_memory - unpins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure
+ * @userptr: pointer to hl_userptr structure
+ *
+ * This function does the following:
+ * - Unpins the physical pages related to the host memory
+ * - Free the SG list
+ */
+void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+       struct page **pages;
+
+       hl_debugfs_remove_userptr(hdev, userptr);
+
+       if (userptr->dma_mapped)
+               hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
+                                                       userptr->sgt->nents,
+                                                       userptr->dir);
+
+       pages = frame_vector_pages(userptr->vec);
+       if (!IS_ERR(pages)) {
+               int i;
+
+               for (i = 0; i < frame_vector_count(userptr->vec); i++)
+                       set_page_dirty_lock(pages[i]);
+       }
+       put_vaddr_frames(userptr->vec);
+       frame_vector_destroy(userptr->vec);
+
+       list_del(&userptr->job_node);
+
+       sg_free_table(userptr->sgt);
+       kfree(userptr->sgt);
+}
+
+/*
+ * hl_userptr_delete_list - clear userptr list
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @userptr_list        : pointer to the list to clear
+ *
+ * This function does the following:
+ * - Iterates over the list and unpins the host memory and frees the userptr
+ *   structure.
+ */
+void hl_userptr_delete_list(struct hl_device *hdev,
+                               struct list_head *userptr_list)
+{
+       struct hl_userptr *userptr, *tmp;
+
+       list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
+               hl_unpin_host_memory(hdev, userptr);
+               kfree(userptr);
+       }
+
+       INIT_LIST_HEAD(userptr_list);
+}
+
+/*
+ * hl_userptr_is_pinned - returns whether the given userptr is pinned
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @userptr_list        : pointer to the list to clear
+ * @userptr             : pointer to userptr to check
+ *
+ * This function does the following:
+ * - Iterates over the list and checks if the given userptr is in it, means is
+ *   pinned. If so, returns true, otherwise returns false.
+ */
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
+                               u32 size, struct list_head *userptr_list,
+                               struct hl_userptr **userptr)
+{
+       list_for_each_entry((*userptr), userptr_list, job_node) {
+               if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * va_range_init - initialize virtual addresses range
+ * @hdev: pointer to the habanalabs device structure
+ * @va_range: pointer to the range to initialize
+ * @start: range start address
+ * @end: range end address
+ *
+ * This function does the following:
+ * - Initializes the virtual addresses list of the given range with the given
+ *   addresses.
+ */
+static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
+                               u64 start, u64 end)
+{
+       int rc;
+
+       INIT_LIST_HEAD(&va_range->list);
+
+       /* PAGE_SIZE alignment */
+
+       if (start & (PAGE_SIZE - 1)) {
+               start &= PAGE_MASK;
+               start += PAGE_SIZE;
+       }
+
+       if (end & (PAGE_SIZE - 1))
+               end &= PAGE_MASK;
+
+       if (start >= end) {
+               dev_err(hdev->dev, "too small vm range for va list\n");
+               return -EFAULT;
+       }
+
+       rc = add_va_block(hdev, va_range, start, end);
+
+       if (rc) {
+               dev_err(hdev->dev, "Failed to init host va list\n");
+               return rc;
+       }
+
+       va_range->start_addr = start;
+       va_range->end_addr = end;
+
+       return 0;
+}
+
+/*
+ * va_range_fini() - clear a virtual addresses range
+ * @hdev: pointer to the habanalabs structure
+ * va_range: pointer to virtual addresses range
+ *
+ * This function does the following:
+ * - Frees the virtual addresses block list and its lock
+ */
+static void va_range_fini(struct hl_device *hdev,
+               struct hl_va_range *va_range)
+{
+       mutex_lock(&va_range->lock);
+       clear_va_list_locked(hdev, &va_range->list);
+       mutex_unlock(&va_range->lock);
+
+       mutex_destroy(&va_range->lock);
+       kfree(va_range);
+}
+
+/*
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context
+ * @ctx: pointer to the habanalabs context structure
+ * @host_range_start: host virtual addresses range start.
+ * @host_range_end: host virtual addresses range end.
+ * @host_huge_range_start: host virtual addresses range start for memory
+ *                          allocated with huge pages.
+ * @host_huge_range_end: host virtual addresses range end for memory allocated
+ *                        with huge pages.
+ * @dram_range_start: dram virtual addresses range start.
+ * @dram_range_end: dram virtual addresses range end.
+ *
+ * This function initializes the following:
+ * - MMU for context
+ * - Virtual address to area descriptor hashtable
+ * - Virtual block list of available virtual memory
+ */
+static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
+                                       u64 host_range_start,
+                                       u64 host_range_end,
+                                       u64 host_huge_range_start,
+                                       u64 host_huge_range_end,
+                                       u64 dram_range_start,
+                                       u64 dram_range_end)
+{
+       struct hl_device *hdev = ctx->hdev;
+       int rc;
+
+       ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
+       if (!ctx->host_va_range)
+               return -ENOMEM;
+
+       ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
+                                               GFP_KERNEL);
+       if (!ctx->host_huge_va_range) {
+               rc =  -ENOMEM;
+               goto host_huge_va_range_err;
+       }
+
+       ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
+       if (!ctx->dram_va_range) {
+               rc = -ENOMEM;
+               goto dram_va_range_err;
+       }
+
+       rc = hl_mmu_ctx_init(ctx);
+       if (rc) {
+               dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
+               goto mmu_ctx_err;
+       }
+
+       mutex_init(&ctx->mem_hash_lock);
+       hash_init(ctx->mem_hash);
+
+       mutex_init(&ctx->host_va_range->lock);
+
+       rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
+                               host_range_end);
+       if (rc) {
+               dev_err(hdev->dev, "failed to init host vm range\n");
+               goto host_page_range_err;
+       }
+
+       if (hdev->pmmu_huge_range) {
+               mutex_init(&ctx->host_huge_va_range->lock);
+
+               rc = va_range_init(hdev, ctx->host_huge_va_range,
+                                       host_huge_range_start,
+                                       host_huge_range_end);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "failed to init host huge vm range\n");
+                       goto host_hpage_range_err;
+               }
+       } else {
+               ctx->host_huge_va_range = ctx->host_va_range;
+       }
+
+       mutex_init(&ctx->dram_va_range->lock);
+
+       rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
+                       dram_range_end);
+       if (rc) {
+               dev_err(hdev->dev, "failed to init dram vm range\n");
+               goto dram_vm_err;
+       }
+
+       hl_debugfs_add_ctx_mem_hash(hdev, ctx);
+
+       return 0;
+
+dram_vm_err:
+       mutex_destroy(&ctx->dram_va_range->lock);
+
+       if (hdev->pmmu_huge_range) {
+               mutex_lock(&ctx->host_huge_va_range->lock);
+               clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
+               mutex_unlock(&ctx->host_huge_va_range->lock);
+       }
+host_hpage_range_err:
+       if (hdev->pmmu_huge_range)
+               mutex_destroy(&ctx->host_huge_va_range->lock);
+       mutex_lock(&ctx->host_va_range->lock);
+       clear_va_list_locked(hdev, &ctx->host_va_range->list);
+       mutex_unlock(&ctx->host_va_range->lock);
+host_page_range_err:
+       mutex_destroy(&ctx->host_va_range->lock);
+       mutex_destroy(&ctx->mem_hash_lock);
+       hl_mmu_ctx_fini(ctx);
+mmu_ctx_err:
+       kfree(ctx->dram_va_range);
+dram_va_range_err:
+       kfree(ctx->host_huge_va_range);
+host_huge_va_range_err:
+       kfree(ctx->host_va_range);
+
+       return rc;
+}
+
+int hl_vm_ctx_init(struct hl_ctx *ctx)
+{
+       struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+       u64 host_range_start, host_range_end, host_huge_range_start,
+               host_huge_range_end, dram_range_start, dram_range_end;
+
+       atomic64_set(&ctx->dram_phys_mem, 0);
+
+       /*
+        * - If MMU is enabled, init the ranges as usual.
+        * - If MMU is disabled, in case of host mapping, the returned address
+        *   is the given one.
+        *   In case of DRAM mapping, the returned address is the physical
+        *   address of the memory related to the given handle.
+        */
+       if (ctx->hdev->mmu_enable) {
+               dram_range_start = prop->dmmu.start_addr;
+               dram_range_end = prop->dmmu.end_addr;
+               host_range_start = prop->pmmu.start_addr;
+               host_range_end = prop->pmmu.end_addr;
+               host_huge_range_start = prop->pmmu_huge.start_addr;
+               host_huge_range_end = prop->pmmu_huge.end_addr;
+       } else {
+               dram_range_start = prop->dram_user_base_address;
+               dram_range_end = prop->dram_end_address;
+               host_range_start = prop->dram_user_base_address;
+               host_range_end = prop->dram_end_address;
+               host_huge_range_start = prop->dram_user_base_address;
+               host_huge_range_end = prop->dram_end_address;
+       }
+
+       return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+                                       host_huge_range_start,
+                                       host_huge_range_end,
+                                       dram_range_start,
+                                       dram_range_end);
+}
+
+/*
+ * hl_vm_ctx_fini       - virtual memory teardown of context
+ *
+ * @ctx                 : pointer to the habanalabs context structure
+ *
+ * This function perform teardown the following:
+ * - Virtual block list of available virtual memory
+ * - Virtual address to area descriptor hashtable
+ * - MMU for context
+ *
+ * In addition this function does the following:
+ * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
+ *   hashtable should be empty as no valid mappings should exist at this
+ *   point.
+ * - Frees any existing physical page list from the idr which relates to the
+ *   current context asid.
+ * - This function checks the virtual block list for correctness. At this point
+ *   the list should contain one element which describes the whole virtual
+ *   memory range of the context. Otherwise, a warning is printed.
+ */
+void hl_vm_ctx_fini(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm *vm = &hdev->vm;
+       struct hl_vm_phys_pg_pack *phys_pg_list;
+       struct hl_vm_hash_node *hnode;
+       struct hlist_node *tmp_node;
+       int i;
+
+       hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
+
+       /*
+        * Clearly something went wrong on hard reset so no point in printing
+        * another side effect error
+        */
+       if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
+               dev_notice(hdev->dev,
+                       "user released device without removing its memory mappings\n");
+
+       hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
+               dev_dbg(hdev->dev,
+                       "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
+                       hnode->vaddr, ctx->asid);
+               unmap_device_va(ctx, hnode->vaddr, true);
+       }
+
+       /* invalidate the cache once after the unmapping loop */
+       hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+       hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+
+       spin_lock(&vm->idr_lock);
+       idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
+               if (phys_pg_list->asid == ctx->asid) {
+                       dev_dbg(hdev->dev,
+                               "page list 0x%px of asid %d is still alive\n",
+                               phys_pg_list, ctx->asid);
+                       atomic64_sub(phys_pg_list->total_size,
+                                       &hdev->dram_used_mem);
+                       free_phys_pg_pack(hdev, phys_pg_list);
+                       idr_remove(&vm->phys_pg_pack_handles, i);
+               }
+       spin_unlock(&vm->idr_lock);
+
+       va_range_fini(hdev, ctx->dram_va_range);
+       if (hdev->pmmu_huge_range)
+               va_range_fini(hdev, ctx->host_huge_va_range);
+       va_range_fini(hdev, ctx->host_va_range);
+
+       mutex_destroy(&ctx->mem_hash_lock);
+       hl_mmu_ctx_fini(ctx);
+}
+
+/*
+ * hl_vm_init           - initialize virtual memory module
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ *
+ * This function initializes the following:
+ * - MMU module
+ * - DRAM physical pages pool of 2MB
+ * - Idr for device memory allocation handles
+ */
+int hl_vm_init(struct hl_device *hdev)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_vm *vm = &hdev->vm;
+       int rc;
+
+       vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+       if (!vm->dram_pg_pool) {
+               dev_err(hdev->dev, "Failed to create dram page pool\n");
+               return -ENOMEM;
+       }
+
+       kref_init(&vm->dram_pg_pool_refcount);
+
+       rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
+                       prop->dram_end_address - prop->dram_user_base_address,
+                       -1);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to add memory to dram page pool %d\n", rc);
+               goto pool_add_err;
+       }
+
+       spin_lock_init(&vm->idr_lock);
+       idr_init(&vm->phys_pg_pack_handles);
+
+       atomic64_set(&hdev->dram_used_mem, 0);
+
+       vm->init_done = true;
+
+       return 0;
+
+pool_add_err:
+       gen_pool_destroy(vm->dram_pg_pool);
+
+       return rc;
+}
+
+/*
+ * hl_vm_fini           - virtual memory module teardown
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ *
+ * This function perform teardown to the following:
+ * - Idr for device memory allocation handles
+ * - DRAM physical pages pool of 2MB
+ * - MMU module
+ */
+void hl_vm_fini(struct hl_device *hdev)
+{
+       struct hl_vm *vm = &hdev->vm;
+
+       if (!vm->init_done)
+               return;
+
+       /*
+        * At this point all the contexts should be freed and hence no DRAM
+        * memory should be in use. Hence the DRAM pool should be freed here.
+        */
+       if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
+               dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
+                               __func__);
+
+       vm->init_done = false;
+}
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
new file mode 100644 (file)
index 0000000..0430395
--- /dev/null
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+
+static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
+
+static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
+{
+       struct pgt_info *pgt_info = NULL;
+
+       hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
+                               (unsigned long) hop_addr)
+               if (hop_addr == pgt_info->shadow_addr)
+                       break;
+
+       return pgt_info;
+}
+
+static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
+{
+       struct hl_device *hdev = ctx->hdev;
+
+       gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
+                       hdev->asic_prop.mmu_hop_table_size);
+       hash_del(&pgt_info->node);
+       kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
+       kfree(pgt_info);
+}
+
+static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+       struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+
+       _free_hop(ctx, pgt_info);
+}
+
+static u64 alloc_hop(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct pgt_info *pgt_info;
+       u64 phys_addr, shadow_addr;
+
+       pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
+       if (!pgt_info)
+               return ULLONG_MAX;
+
+       phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
+                                       prop->mmu_hop_table_size);
+       if (!phys_addr) {
+               dev_err(hdev->dev, "failed to allocate page\n");
+               goto pool_add_err;
+       }
+
+       shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
+                                               GFP_KERNEL);
+       if (!shadow_addr)
+               goto shadow_err;
+
+       pgt_info->phys_addr = phys_addr;
+       pgt_info->shadow_addr = shadow_addr;
+       pgt_info->ctx = ctx;
+       pgt_info->num_of_ptes = 0;
+       hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
+
+       return shadow_addr;
+
+shadow_err:
+       gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
+pool_add_err:
+       kfree(pgt_info);
+
+       return ULLONG_MAX;
+}
+
+static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
+{
+       return ctx->hdev->asic_prop.mmu_pgt_addr +
+                       (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+       return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
+                       (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline void flush(struct hl_ctx *ctx)
+{
+       /* flush all writes from all cores to reach PCI */
+       mb();
+       ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
+}
+
+/* transform the value to physical address when writing to H/W */
+static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+       /*
+        * The value to write is actually the address of the next shadow hop +
+        * flags at the 12 LSBs.
+        * Hence in order to get the value to write to the physical PTE, we
+        * clear the 12 LSBs and translate the shadow hop to its associated
+        * physical hop, and add back the original 12 LSBs.
+        */
+       u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
+                               (val & FLAGS_MASK);
+
+       ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+                                       get_phys_addr(ctx, shadow_pte_addr),
+                                       phys_val);
+
+       *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+/* do not transform the value to physical address when writing to H/W */
+static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
+                                       u64 val)
+{
+       ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+                                       get_phys_addr(ctx, shadow_pte_addr),
+                                       val);
+       *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+/* clear the last and present bits */
+static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
+{
+       /* no need to transform the value to physical address */
+       write_final_pte(ctx, pte_addr, 0);
+}
+
+static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+       get_pgt_info(ctx, hop_addr)->num_of_ptes++;
+}
+
+/*
+ * put_pte - decrement the num of ptes and free the hop if possible
+ *
+ * @ctx: pointer to the context structure
+ * @hop_addr: addr of the hop
+ *
+ * This function returns the number of ptes left on this hop. If the number is
+ * 0, it means the pte was freed.
+ */
+static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+       struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+       int num_of_ptes_left;
+
+       pgt_info->num_of_ptes--;
+
+       /*
+        * Need to save the number of ptes left because free_hop might free
+        * the pgt_info
+        */
+       num_of_ptes_left = pgt_info->num_of_ptes;
+       if (!num_of_ptes_left)
+               _free_hop(ctx, pgt_info);
+
+       return num_of_ptes_left;
+}
+
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+                                       u64 virt_addr, u64 mask, u64 shift)
+{
+       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+                       ((virt_addr & mask) >> shift);
+}
+
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
+                                       mmu_prop->hop0_shift);
+}
+
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
+                                       mmu_prop->hop1_shift);
+}
+
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
+                                       mmu_prop->hop2_shift);
+}
+
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
+                                       mmu_prop->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
+                                       mmu_prop->hop4_shift);
+}
+
+static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
+{
+       if (curr_pte & PAGE_PRESENT_MASK)
+               return curr_pte & HOP_PHYS_ADDR_MASK;
+       else
+               return ULLONG_MAX;
+}
+
+static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
+                                               bool *is_new_hop)
+{
+       u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
+
+       if (hop_addr == ULLONG_MAX) {
+               hop_addr = alloc_hop(ctx);
+               *is_new_hop = (hop_addr != ULLONG_MAX);
+       }
+
+       return hop_addr;
+}
+
+/* translates shadow address inside hop to a physical address */
+static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
+{
+       u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
+       u64 shadow_hop_addr = shadow_addr & ~page_mask;
+       u64 pte_offset = shadow_addr & page_mask;
+       u64 phys_hop_addr;
+
+       if (shadow_hop_addr != get_hop0_addr(ctx))
+               phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
+       else
+               phys_hop_addr = get_phys_hop0_addr(ctx);
+
+       return phys_hop_addr + pte_offset;
+}
+
+static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+       return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                                       prop->dmmu.start_addr,
+                                       prop->dmmu.end_addr);
+}
+
+static int dram_default_mapping_init(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
+               hop2_pte_addr, hop3_pte_addr, pte_val;
+       int rc, i, j, hop3_allocated = 0;
+
+       if ((!hdev->dram_supports_virtual_memory) ||
+                       (!hdev->dram_default_page_mapping) ||
+                       (ctx->asid == HL_KERNEL_ASID_ID))
+               return 0;
+
+       num_of_hop3 = prop->dram_size_for_default_page_mapping;
+       do_div(num_of_hop3, prop->dram_page_size);
+       do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+
+       /* add hop1 and hop2 */
+       total_hops = num_of_hop3 + 2;
+
+       ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
+       if (!ctx->dram_default_hops)
+               return -ENOMEM;
+
+       hop0_addr = get_hop0_addr(ctx);
+
+       hop1_addr = alloc_hop(ctx);
+       if (hop1_addr == ULLONG_MAX) {
+               dev_err(hdev->dev, "failed to alloc hop 1\n");
+               rc = -ENOMEM;
+               goto hop1_err;
+       }
+
+       ctx->dram_default_hops[total_hops - 1] = hop1_addr;
+
+       hop2_addr = alloc_hop(ctx);
+       if (hop2_addr == ULLONG_MAX) {
+               dev_err(hdev->dev, "failed to alloc hop 2\n");
+               rc = -ENOMEM;
+               goto hop2_err;
+       }
+
+       ctx->dram_default_hops[total_hops - 2] = hop2_addr;
+
+       for (i = 0 ; i < num_of_hop3 ; i++) {
+               ctx->dram_default_hops[i] = alloc_hop(ctx);
+               if (ctx->dram_default_hops[i] == ULLONG_MAX) {
+                       dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
+                       rc = -ENOMEM;
+                       goto hop3_err;
+               }
+               hop3_allocated++;
+       }
+
+       /* need only pte 0 in hops 0 and 1 */
+       pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+       write_pte(ctx, hop0_addr, pte_val);
+
+       pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+       write_pte(ctx, hop1_addr, pte_val);
+       get_pte(ctx, hop1_addr);
+
+       hop2_pte_addr = hop2_addr;
+       for (i = 0 ; i < num_of_hop3 ; i++) {
+               pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
+                               PAGE_PRESENT_MASK;
+               write_pte(ctx, hop2_pte_addr, pte_val);
+               get_pte(ctx, hop2_addr);
+               hop2_pte_addr += HL_PTE_SIZE;
+       }
+
+       pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
+                       LAST_MASK | PAGE_PRESENT_MASK;
+
+       for (i = 0 ; i < num_of_hop3 ; i++) {
+               hop3_pte_addr = ctx->dram_default_hops[i];
+               for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+                       write_final_pte(ctx, hop3_pte_addr, pte_val);
+                       get_pte(ctx, ctx->dram_default_hops[i]);
+                       hop3_pte_addr += HL_PTE_SIZE;
+               }
+       }
+
+       flush(ctx);
+
+       return 0;
+
+hop3_err:
+       for (i = 0 ; i < hop3_allocated ; i++)
+               free_hop(ctx, ctx->dram_default_hops[i]);
+
+       free_hop(ctx, hop2_addr);
+hop2_err:
+       free_hop(ctx, hop1_addr);
+hop1_err:
+       kfree(ctx->dram_default_hops);
+
+       return rc;
+}
+
+static void dram_default_mapping_fini(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
+               hop2_pte_addr, hop3_pte_addr;
+       int i, j;
+
+       if ((!hdev->dram_supports_virtual_memory) ||
+                       (!hdev->dram_default_page_mapping) ||
+                       (ctx->asid == HL_KERNEL_ASID_ID))
+               return;
+
+       num_of_hop3 = prop->dram_size_for_default_page_mapping;
+       do_div(num_of_hop3, prop->dram_page_size);
+       do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+
+       hop0_addr = get_hop0_addr(ctx);
+       /* add hop1 and hop2 */
+       total_hops = num_of_hop3 + 2;
+       hop1_addr = ctx->dram_default_hops[total_hops - 1];
+       hop2_addr = ctx->dram_default_hops[total_hops - 2];
+
+       for (i = 0 ; i < num_of_hop3 ; i++) {
+               hop3_pte_addr = ctx->dram_default_hops[i];
+               for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+                       clear_pte(ctx, hop3_pte_addr);
+                       put_pte(ctx, ctx->dram_default_hops[i]);
+                       hop3_pte_addr += HL_PTE_SIZE;
+               }
+       }
+
+       hop2_pte_addr = hop2_addr;
+       hop2_pte_addr = hop2_addr;
+       for (i = 0 ; i < num_of_hop3 ; i++) {
+               clear_pte(ctx, hop2_pte_addr);
+               put_pte(ctx, hop2_addr);
+               hop2_pte_addr += HL_PTE_SIZE;
+       }
+
+       clear_pte(ctx, hop1_addr);
+       put_pte(ctx, hop1_addr);
+       clear_pte(ctx, hop0_addr);
+
+       kfree(ctx->dram_default_hops);
+
+       flush(ctx);
+}
+
+/**
+ * hl_mmu_init() - initialize the MMU module.
+ * @hdev: habanalabs device structure.
+ *
+ * This function does the following:
+ * - Create a pool of pages for pgt_infos.
+ * - Create a shadow table for pgt
+ *
+ * Return: 0 for success, non-zero for failure.
+ */
+int hl_mmu_init(struct hl_device *hdev)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       int rc;
+
+       if (!hdev->mmu_enable)
+               return 0;
+
+       hdev->mmu_pgt_pool =
+                       gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
+
+       if (!hdev->mmu_pgt_pool) {
+               dev_err(hdev->dev, "Failed to create page gen pool\n");
+               return -ENOMEM;
+       }
+
+       rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
+                       prop->mmu_hop0_tables_total_size,
+                       prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
+                       -1);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+               goto err_pool_add;
+       }
+
+       hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
+                                       prop->mmu_hop_table_size,
+                                       GFP_KERNEL | __GFP_ZERO);
+       if (!hdev->mmu_shadow_hop0) {
+               rc = -ENOMEM;
+               goto err_pool_add;
+       }
+
+       /* MMU H/W init will be done in device hw_init() */
+
+       return 0;
+
+err_pool_add:
+       gen_pool_destroy(hdev->mmu_pgt_pool);
+
+       return rc;
+}
+
+/**
+ * hl_mmu_fini() - release the MMU module.
+ * @hdev: habanalabs device structure.
+ *
+ * This function does the following:
+ * - Disable MMU in H/W.
+ * - Free the pgt_infos pool.
+ *
+ * All contexts should be freed before calling this function.
+ */
+void hl_mmu_fini(struct hl_device *hdev)
+{
+       if (!hdev->mmu_enable)
+               return;
+
+       /* MMU H/W fini was already done in device hw_fini() */
+
+       kvfree(hdev->mmu_shadow_hop0);
+       gen_pool_destroy(hdev->mmu_pgt_pool);
+}
+
+/**
+ * hl_mmu_ctx_init() - initialize a context for using the MMU module.
+ * @ctx: pointer to the context structure to initialize.
+ *
+ * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
+ * page tables hops related to this context.
+ * Return: 0 on success, non-zero otherwise.
+ */
+int hl_mmu_ctx_init(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+
+       if (!hdev->mmu_enable)
+               return 0;
+
+       mutex_init(&ctx->mmu_lock);
+       hash_init(ctx->mmu_shadow_hash);
+
+       return dram_default_mapping_init(ctx);
+}
+
+/*
+ * hl_mmu_ctx_fini - disable a ctx from using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Free any pgts which were not freed yet
+ * - Free the mutex
+ * - Free DRAM default page mapping hops
+ */
+void hl_mmu_ctx_fini(struct hl_ctx *ctx)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct pgt_info *pgt_info;
+       struct hlist_node *tmp;
+       int i;
+
+       if (!hdev->mmu_enable)
+               return;
+
+       dram_default_mapping_fini(ctx);
+
+       if (!hash_empty(ctx->mmu_shadow_hash))
+               dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
+                       ctx->asid);
+
+       hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
+               dev_err_ratelimited(hdev->dev,
+                       "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
+                       pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
+               _free_hop(ctx, pgt_info);
+       }
+
+       mutex_destroy(&ctx->mmu_lock);
+}
+
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
+       u64 hop0_addr = 0, hop0_pte_addr = 0,
+               hop1_addr = 0, hop1_pte_addr = 0,
+               hop2_addr = 0, hop2_pte_addr = 0,
+               hop3_addr = 0, hop3_pte_addr = 0,
+               hop4_addr = 0, hop4_pte_addr = 0,
+               curr_pte;
+       bool is_huge, clear_hop3 = true;
+
+       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+       hop0_addr = get_hop0_addr(ctx);
+       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+
+       curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
+
+       hop1_addr = get_next_hop_addr(ctx, curr_pte);
+
+       if (hop1_addr == ULLONG_MAX)
+               goto not_mapped;
+
+       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+
+       curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
+
+       hop2_addr = get_next_hop_addr(ctx, curr_pte);
+
+       if (hop2_addr == ULLONG_MAX)
+               goto not_mapped;
+
+       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+
+       curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
+
+       hop3_addr = get_next_hop_addr(ctx, curr_pte);
+
+       if (hop3_addr == ULLONG_MAX)
+               goto not_mapped;
+
+       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+
+       curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
+
+       is_huge = curr_pte & LAST_MASK;
+
+       if (is_dram_addr && !is_huge) {
+               dev_err(hdev->dev,
+                               "DRAM unmapping should use huge pages only\n");
+               return -EFAULT;
+       }
+
+       if (!is_huge) {
+               hop4_addr = get_next_hop_addr(ctx, curr_pte);
+
+               if (hop4_addr == ULLONG_MAX)
+                       goto not_mapped;
+
+               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+                                                       virt_addr);
+
+               curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
+
+               clear_hop3 = false;
+       }
+
+       if (hdev->dram_default_page_mapping && is_dram_addr) {
+               u64 default_pte = (prop->mmu_dram_default_page_addr &
+                               HOP_PHYS_ADDR_MASK) | LAST_MASK |
+                                       PAGE_PRESENT_MASK;
+               if (curr_pte == default_pte) {
+                       dev_err(hdev->dev,
+                               "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
+                                       virt_addr);
+                       goto not_mapped;
+               }
+
+               if (!(curr_pte & PAGE_PRESENT_MASK)) {
+                       dev_err(hdev->dev,
+                               "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
+                                       virt_addr);
+                       goto not_mapped;
+               }
+
+               write_final_pte(ctx, hop3_pte_addr, default_pte);
+               put_pte(ctx, hop3_addr);
+       } else {
+               if (!(curr_pte & PAGE_PRESENT_MASK))
+                       goto not_mapped;
+
+               if (hop4_addr)
+                       clear_pte(ctx, hop4_pte_addr);
+               else
+                       clear_pte(ctx, hop3_pte_addr);
+
+               if (hop4_addr && !put_pte(ctx, hop4_addr))
+                       clear_hop3 = true;
+
+               if (!clear_hop3)
+                       goto mapped;
+
+               clear_pte(ctx, hop3_pte_addr);
+
+               if (put_pte(ctx, hop3_addr))
+                       goto mapped;
+
+               clear_pte(ctx, hop2_pte_addr);
+
+               if (put_pte(ctx, hop2_addr))
+                       goto mapped;
+
+               clear_pte(ctx, hop1_pte_addr);
+
+               if (put_pte(ctx, hop1_addr))
+                       goto mapped;
+
+               clear_pte(ctx, hop0_pte_addr);
+       }
+
+mapped:
+       return 0;
+
+not_mapped:
+       dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+               virt_addr);
+
+       return -EINVAL;
+}
+
+/*
+ * hl_mmu_unmap - unmaps a virtual addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @page_size: size of the page to unmap
+ * @flush_pte: whether to do a PCI flush
+ *
+ * This function does the following:
+ * - Check that the virt addr is mapped
+ * - Unmap the virt addr and frees pgts if possible
+ * - Returns 0 on success, -EINVAL if the given addr is not mapped
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after unmapping of
+ * large area.
+ */
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+               bool flush_pte)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
+       u64 real_virt_addr;
+       u32 real_page_size, npages;
+       int i, rc = 0;
+       bool is_dram_addr;
+
+       if (!hdev->mmu_enable)
+               return 0;
+
+       is_dram_addr = is_dram_va(hdev, virt_addr);
+
+       if (is_dram_addr)
+               mmu_prop = &prop->dmmu;
+       else if ((page_size % prop->pmmu_huge.page_size) == 0)
+               mmu_prop = &prop->pmmu_huge;
+       else
+               mmu_prop = &prop->pmmu;
+
+       /*
+        * The H/W handles mapping of specific page sizes. Hence if the page
+        * size is bigger, we break it to sub-pages and unmap them separately.
+        */
+       if ((page_size % mmu_prop->page_size) == 0) {
+               real_page_size = mmu_prop->page_size;
+       } else {
+               dev_err(hdev->dev,
+                       "page size of %u is not %uKB aligned, can't unmap\n",
+                       page_size, mmu_prop->page_size >> 10);
+
+               return -EFAULT;
+       }
+
+       npages = page_size / real_page_size;
+       real_virt_addr = virt_addr;
+
+       for (i = 0 ; i < npages ; i++) {
+               rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
+               if (rc)
+                       break;
+
+               real_virt_addr += real_page_size;
+       }
+
+       if (flush_pte)
+               flush(ctx);
+
+       return rc;
+}
+
+static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+                       u32 page_size, bool is_dram_addr)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
+       u64 hop0_addr = 0, hop0_pte_addr = 0,
+               hop1_addr = 0, hop1_pte_addr = 0,
+               hop2_addr = 0, hop2_pte_addr = 0,
+               hop3_addr = 0, hop3_pte_addr = 0,
+               hop4_addr = 0, hop4_pte_addr = 0,
+               curr_pte = 0;
+       bool hop1_new = false, hop2_new = false, hop3_new = false,
+               hop4_new = false, is_huge;
+       int rc = -ENOMEM;
+
+       /*
+        * This mapping function can map a page or a huge page. For huge page
+        * there are only 3 hops rather than 4. Currently the DRAM allocation
+        * uses huge pages only but user memory could have been allocated with
+        * one of the two page sizes. Since this is a common code for all the
+        * three cases, we need this hugs page check.
+        */
+       if (is_dram_addr) {
+               mmu_prop = &prop->dmmu;
+               is_huge = true;
+       } else if (page_size == prop->pmmu_huge.page_size) {
+               mmu_prop = &prop->pmmu_huge;
+               is_huge = true;
+       } else {
+               mmu_prop = &prop->pmmu;
+               is_huge = false;
+       }
+
+       hop0_addr = get_hop0_addr(ctx);
+       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+       curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
+
+       hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
+       if (hop1_addr == ULLONG_MAX)
+               goto err;
+
+       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+       curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
+
+       hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
+       if (hop2_addr == ULLONG_MAX)
+               goto err;
+
+       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+       curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
+
+       hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
+       if (hop3_addr == ULLONG_MAX)
+               goto err;
+
+       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+       curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
+
+       if (!is_huge) {
+               hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
+               if (hop4_addr == ULLONG_MAX)
+                       goto err;
+
+               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+                                                       virt_addr);
+               curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
+       }
+
+       if (hdev->dram_default_page_mapping && is_dram_addr) {
+               u64 default_pte = (prop->mmu_dram_default_page_addr &
+                                       HOP_PHYS_ADDR_MASK) | LAST_MASK |
+                                               PAGE_PRESENT_MASK;
+
+               if (curr_pte != default_pte) {
+                       dev_err(hdev->dev,
+                               "DRAM: mapping already exists for virt_addr 0x%llx\n",
+                                       virt_addr);
+                       rc = -EINVAL;
+                       goto err;
+               }
+
+               if (hop1_new || hop2_new || hop3_new || hop4_new) {
+                       dev_err(hdev->dev,
+                               "DRAM mapping should not allocate more hops\n");
+                       rc = -EFAULT;
+                       goto err;
+               }
+       } else if (curr_pte & PAGE_PRESENT_MASK) {
+               dev_err(hdev->dev,
+                       "mapping already exists for virt_addr 0x%llx\n",
+                               virt_addr);
+
+               dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
+                       *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
+               dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
+                       *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
+               dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
+                       *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
+               dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
+                       *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
+
+               if (!is_huge)
+                       dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
+                               *(u64 *) (uintptr_t) hop4_pte_addr,
+                               hop4_pte_addr);
+
+               rc = -EINVAL;
+               goto err;
+       }
+
+       curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
+                       | PAGE_PRESENT_MASK;
+
+       if (is_huge)
+               write_final_pte(ctx, hop3_pte_addr, curr_pte);
+       else
+               write_final_pte(ctx, hop4_pte_addr, curr_pte);
+
+       if (hop1_new) {
+               curr_pte =
+                       (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+               write_pte(ctx, hop0_pte_addr, curr_pte);
+       }
+       if (hop2_new) {
+               curr_pte =
+                       (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+               write_pte(ctx, hop1_pte_addr, curr_pte);
+               get_pte(ctx, hop1_addr);
+       }
+       if (hop3_new) {
+               curr_pte =
+                       (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+               write_pte(ctx, hop2_pte_addr, curr_pte);
+               get_pte(ctx, hop2_addr);
+       }
+
+       if (!is_huge) {
+               if (hop4_new) {
+                       curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
+                                       PAGE_PRESENT_MASK;
+                       write_pte(ctx, hop3_pte_addr, curr_pte);
+                       get_pte(ctx, hop3_addr);
+               }
+
+               get_pte(ctx, hop4_addr);
+       } else {
+               get_pte(ctx, hop3_addr);
+       }
+
+       return 0;
+
+err:
+       if (hop4_new)
+               free_hop(ctx, hop4_addr);
+       if (hop3_new)
+               free_hop(ctx, hop3_addr);
+       if (hop2_new)
+               free_hop(ctx, hop2_addr);
+       if (hop1_new)
+               free_hop(ctx, hop1_addr);
+
+       return rc;
+}
+
+/*
+ * hl_mmu_map - maps a virtual addr to physical addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @page_size: physical page size
+ * @flush_pte: whether to do a PCI flush
+ *
+ * This function does the following:
+ * - Check that the virt addr is not mapped
+ * - Allocate pgts as necessary in order to map the virt addr to the phys
+ * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after mapping of
+ * large area.
+ */
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
+               bool flush_pte)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
+       u64 real_virt_addr, real_phys_addr;
+       u32 real_page_size, npages;
+       int i, rc, mapped_cnt = 0;
+       bool is_dram_addr;
+
+       if (!hdev->mmu_enable)
+               return 0;
+
+       is_dram_addr = is_dram_va(hdev, virt_addr);
+
+       if (is_dram_addr)
+               mmu_prop = &prop->dmmu;
+       else if ((page_size % prop->pmmu_huge.page_size) == 0)
+               mmu_prop = &prop->pmmu_huge;
+       else
+               mmu_prop = &prop->pmmu;
+
+       /*
+        * The H/W handles mapping of specific page sizes. Hence if the page
+        * size is bigger, we break it to sub-pages and map them separately.
+        */
+       if ((page_size % mmu_prop->page_size) == 0) {
+               real_page_size = mmu_prop->page_size;
+       } else {
+               dev_err(hdev->dev,
+                       "page size of %u is not %uKB aligned, can't unmap\n",
+                       page_size, mmu_prop->page_size >> 10);
+
+               return -EFAULT;
+       }
+
+       WARN_ONCE((phys_addr & (real_page_size - 1)),
+               "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
+               phys_addr, real_page_size);
+
+       npages = page_size / real_page_size;
+       real_virt_addr = virt_addr;
+       real_phys_addr = phys_addr;
+
+       for (i = 0 ; i < npages ; i++) {
+               rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
+                               real_page_size, is_dram_addr);
+               if (rc)
+                       goto err;
+
+               real_virt_addr += real_page_size;
+               real_phys_addr += real_page_size;
+               mapped_cnt++;
+       }
+
+       if (flush_pte)
+               flush(ctx);
+
+       return 0;
+
+err:
+       real_virt_addr = virt_addr;
+       for (i = 0 ; i < mapped_cnt ; i++) {
+               if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
+                       dev_warn_ratelimited(hdev->dev,
+                               "failed to unmap va: 0x%llx\n", real_virt_addr);
+
+               real_virt_addr += real_page_size;
+       }
+
+       flush(ctx);
+
+       return rc;
+}
+
+/*
+ * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_out(struct hl_ctx *ctx)
+{
+
+}
+
+/*
+ * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_in(struct hl_ctx *ctx)
+{
+
+}
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c
new file mode 100644 (file)
index 0000000..1791f66
--- /dev/null
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/pci/pci_general.h"
+
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+
+#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC  (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
+
+#define IATU_REGION_CTRL_REGION_EN_MASK                BIT(31)
+#define IATU_REGION_CTRL_MATCH_MODE_MASK       BIT(30)
+#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK     BIT(19)
+#define IATU_REGION_CTRL_BAR_NUM_MASK          GENMASK(10, 8)
+
+/**
+ * hl_pci_bars_map() - Map PCI BARs.
+ * @hdev: Pointer to hl_device structure.
+ * @name: Array of BAR names.
+ * @is_wc: Array with flag per BAR whether a write-combined mapping is needed.
+ *
+ * Request PCI regions and map them to kernel virtual addresses.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+                       bool is_wc[3])
+{
+       struct pci_dev *pdev = hdev->pdev;
+       int rc, i, bar;
+
+       rc = pci_request_regions(pdev, HL_NAME);
+       if (rc) {
+               dev_err(hdev->dev, "Cannot obtain PCI resources\n");
+               return rc;
+       }
+
+       for (i = 0 ; i < 3 ; i++) {
+               bar = i * 2; /* 64-bit BARs */
+               hdev->pcie_bar[bar] = is_wc[i] ?
+                               pci_ioremap_wc_bar(pdev, bar) :
+                               pci_ioremap_bar(pdev, bar);
+               if (!hdev->pcie_bar[bar]) {
+                       dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n",
+                                       is_wc[i] ? "_wc" : "", name[i]);
+                       rc = -ENODEV;
+                       goto err;
+               }
+       }
+
+       return 0;
+
+err:
+       for (i = 2 ; i >= 0 ; i--) {
+               bar = i * 2; /* 64-bit BARs */
+               if (hdev->pcie_bar[bar])
+                       iounmap(hdev->pcie_bar[bar]);
+       }
+
+       pci_release_regions(pdev);
+
+       return rc;
+}
+
+/**
+ * hl_pci_bars_unmap() - Unmap PCI BARS.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Release all PCI BARs and unmap their virtual addresses.
+ */
+static void hl_pci_bars_unmap(struct hl_device *hdev)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       int i, bar;
+
+       for (i = 2 ; i >= 0 ; i--) {
+               bar = i * 2; /* 64-bit BARs */
+               iounmap(hdev->pcie_bar[bar]);
+       }
+
+       pci_release_regions(pdev);
+}
+
+/**
+ * hl_pci_elbi_write() - Write through the ELBI interface.
+ * @hdev: Pointer to hl_device structure.
+ * @addr: Address to write to
+ * @data: Data to write
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       ktime_t timeout;
+       u64 msec;
+       u32 val;
+
+       if (hdev->pldm)
+               msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
+       else
+               msec = HL_PCI_ELBI_TIMEOUT_MSEC;
+
+       /* Clear previous status */
+       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
+                               PCI_CONFIG_ELBI_CTRL_WRITE);
+
+       timeout = ktime_add_ms(ktime_get(), msec);
+       for (;;) {
+               pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+               if (val & PCI_CONFIG_ELBI_STS_MASK)
+                       break;
+               if (ktime_compare(ktime_get(), timeout) > 0) {
+                       pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+                                               &val);
+                       break;
+               }
+
+               usleep_range(300, 500);
+       }
+
+       if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
+               return 0;
+
+       if (val & PCI_CONFIG_ELBI_STS_ERR) {
+               dev_err(hdev->dev, "Error writing to ELBI\n");
+               return -EIO;
+       }
+
+       if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+               dev_err(hdev->dev, "ELBI write didn't finish in time\n");
+               return -EIO;
+       }
+
+       dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
+       return -EIO;
+}
+
+/**
+ * hl_pci_iatu_write() - iatu write routine.
+ * @hdev: Pointer to hl_device structure.
+ * @addr: Address to write to
+ * @data: Data to write
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u32 dbi_offset;
+       int rc;
+
+       dbi_offset = addr & 0xFFF;
+
+       rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+       rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+                               data);
+
+       if (rc)
+               return -EIO;
+
+       return 0;
+}
+
+/**
+ * hl_pci_reset_link_through_bridge() - Reset PCI link.
+ * @hdev: Pointer to hl_device structure.
+ */
+static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       struct pci_dev *parent_port;
+       u16 val;
+
+       parent_port = pdev->bus->self;
+       pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
+       val |= PCI_BRIDGE_CTL_BUS_RESET;
+       pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+       ssleep(1);
+
+       val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
+       pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+       ssleep(3);
+}
+
+/**
+ * hl_pci_set_inbound_region() - Configure inbound region
+ * @hdev: Pointer to hl_device structure.
+ * @region: Inbound region number.
+ * @pci_region: Inbound region parameters.
+ *
+ * Configure the iATU inbound region.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+               struct hl_inbound_pci_region *pci_region)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 bar_phys_base, region_base, region_end_address;
+       u32 offset, ctrl_reg_val;
+       int rc = 0;
+
+       /* region offset */
+       offset = (0x200 * region) + 0x100;
+
+       if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
+               bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
+               region_base = bar_phys_base + pci_region->offset_in_bar;
+               region_end_address = region_base + pci_region->size - 1;
+
+               rc |= hl_pci_iatu_write(hdev, offset + 0x8,
+                               lower_32_bits(region_base));
+               rc |= hl_pci_iatu_write(hdev, offset + 0xC,
+                               upper_32_bits(region_base));
+               rc |= hl_pci_iatu_write(hdev, offset + 0x10,
+                               lower_32_bits(region_end_address));
+       }
+
+       /* Point to the specified address */
+       rc = hl_pci_iatu_write(hdev, offset + 0x14,
+                       lower_32_bits(pci_region->addr));
+       rc |= hl_pci_iatu_write(hdev, offset + 0x18,
+                       upper_32_bits(pci_region->addr));
+       rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
+
+       /* Enable + bar/address match + match enable + bar number */
+       ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
+       ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
+                       pci_region->mode);
+       ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
+
+       if (pci_region->mode == PCI_BAR_MATCH_MODE)
+               ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
+                               pci_region->bar);
+
+       rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
+
+       /* Return the DBI window to the default location */
+       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+       if (rc)
+               dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
+                               pci_region->bar, pci_region->addr);
+
+       return rc;
+}
+
+/**
+ * hl_pci_set_outbound_region() - Configure outbound region 0
+ * @hdev: Pointer to hl_device structure.
+ * @pci_region: Outbound region parameters.
+ *
+ * Configure the iATU outbound region 0.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_set_outbound_region(struct hl_device *hdev,
+               struct hl_outbound_pci_region *pci_region)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 outbound_region_end_address;
+       int rc = 0;
+
+       /* Outbound Region 0 */
+       outbound_region_end_address =
+                       pci_region->addr + pci_region->size - 1;
+       rc |= hl_pci_iatu_write(hdev, 0x008,
+                               lower_32_bits(pci_region->addr));
+       rc |= hl_pci_iatu_write(hdev, 0x00C,
+                               upper_32_bits(pci_region->addr));
+       rc |= hl_pci_iatu_write(hdev, 0x010,
+                               lower_32_bits(outbound_region_end_address));
+       rc |= hl_pci_iatu_write(hdev, 0x014, 0);
+
+       if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
+               rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
+       else
+               rc |= hl_pci_iatu_write(hdev, 0x018, 0);
+
+       rc |= hl_pci_iatu_write(hdev, 0x020,
+                               upper_32_bits(outbound_region_end_address));
+       /* Increase region size */
+       rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
+       /* Enable */
+       rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
+
+       /* Return the DBI window to the default location */
+       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+       return rc;
+}
+
+/**
+ * hl_pci_set_dma_mask() - Set DMA masks for the device.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * This function sets the DMA masks (regular and consistent) for a specified
+ * value. If it doesn't succeed, it tries to set it to a fall-back value
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int hl_pci_set_dma_mask(struct hl_device *hdev)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       int rc;
+
+       /* set DMA mask */
+       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to set pci dma mask to %d bits, error %d\n",
+                       hdev->dma_mask, rc);
+               return rc;
+       }
+
+       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to set pci consistent dma mask to %d bits, error %d\n",
+                       hdev->dma_mask, rc);
+               return rc;
+       }
+
+       return 0;
+}
+
+/**
+ * hl_pci_init() - PCI initialization code.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Set DMA masks, initialize the PCI controller and map the PCI BARs.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_init(struct hl_device *hdev)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       int rc;
+
+       if (hdev->reset_pcilink)
+               hl_pci_reset_link_through_bridge(hdev);
+
+       rc = pci_enable_device_mem(pdev);
+       if (rc) {
+               dev_err(hdev->dev, "can't enable PCI device\n");
+               return rc;
+       }
+
+       pci_set_master(pdev);
+
+       rc = hdev->asic_funcs->pci_bars_map(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
+               goto disable_device;
+       }
+
+       rc = hdev->asic_funcs->init_iatu(hdev);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to initialize iATU\n");
+               goto disable_device;
+       }
+
+       rc = hl_pci_set_dma_mask(hdev);
+       if (rc)
+               goto disable_device;
+
+       return 0;
+
+disable_device:
+       pci_clear_master(pdev);
+       pci_disable_device(pdev);
+
+       return rc;
+}
+
+/**
+ * hl_fw_fini() - PCI finalization code.
+ * @hdev: Pointer to hl_device structure
+ *
+ * Unmap PCI bars and disable PCI device.
+ */
+void hl_pci_fini(struct hl_device *hdev)
+{
+       hl_pci_bars_unmap(hdev);
+
+       pci_clear_master(hdev->pdev);
+       pci_disable_device(hdev->pdev);
+}
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
new file mode 100644 (file)
index 0000000..5d78d5e
--- /dev/null
@@ -0,0 +1,442 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+
+#define SET_CLK_PKT_TIMEOUT    1000000 /* 1s */
+#define SET_PWR_PKT_TIMEOUT    1000000 /* 1s */
+
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
+{
+       struct armcp_packet pkt;
+       long result;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       if (curr)
+               pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
+                                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       else
+               pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
+                                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.pll_index = cpu_to_le32(pll_index);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                               SET_CLK_PKT_TIMEOUT, &result);
+
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to get frequency of PLL %d, error %d\n",
+                       pll_index, rc);
+               result = rc;
+       }
+
+       return result;
+}
+
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
+                                       ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.pll_index = cpu_to_le32(pll_index);
+       pkt.value = cpu_to_le64(freq);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SET_CLK_PKT_TIMEOUT, NULL);
+
+       if (rc)
+               dev_err(hdev->dev,
+                       "Failed to set frequency to PLL %d, error %d\n",
+                       pll_index, rc);
+}
+
+u64 hl_get_max_power(struct hl_device *hdev)
+{
+       struct armcp_packet pkt;
+       long result;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                               SET_PWR_PKT_TIMEOUT, &result);
+
+       if (rc) {
+               dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
+               result = rc;
+       }
+
+       return result;
+}
+
+void hl_set_max_power(struct hl_device *hdev, u64 value)
+{
+       struct armcp_packet pkt;
+       int rc;
+
+       memset(&pkt, 0, sizeof(pkt));
+
+       pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
+                               ARMCP_PKT_CTL_OPCODE_SHIFT);
+       pkt.value = cpu_to_le64(value);
+
+       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+                                       SET_PWR_PKT_TIMEOUT, NULL);
+
+       if (rc)
+               dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
+}
+
+static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver);
+}
+
+static ssize_t armcp_kernel_ver_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
+}
+
+static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
+}
+
+static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "0x%08x\n",
+                       hdev->asic_prop.armcp_info.cpld_version);
+}
+
+static ssize_t infineon_ver_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "0x%04x\n",
+                       hdev->asic_prop.armcp_info.infineon_version);
+}
+
+static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
+}
+
+static ssize_t thermal_ver_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
+}
+
+static ssize_t preboot_btl_ver_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver);
+}
+
+static ssize_t soft_reset_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       long value;
+       int rc;
+
+       rc = kstrtoul(buf, 0, &value);
+
+       if (rc) {
+               count = -EINVAL;
+               goto out;
+       }
+
+       if (!hdev->supports_soft_reset) {
+               dev_err(hdev->dev, "Device does not support soft-reset\n");
+               goto out;
+       }
+
+       dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
+
+       hl_device_reset(hdev, false, false);
+
+out:
+       return count;
+}
+
+static ssize_t hard_reset_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       long value;
+       int rc;
+
+       rc = kstrtoul(buf, 0, &value);
+
+       if (rc) {
+               count = -EINVAL;
+               goto out;
+       }
+
+       dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
+
+       hl_device_reset(hdev, true, false);
+
+out:
+       return count;
+}
+
+static ssize_t device_type_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       char *str;
+
+       switch (hdev->asic_type) {
+       case ASIC_GOYA:
+               str = "GOYA";
+               break;
+       case ASIC_GAUDI:
+               str = "GAUDI";
+               break;
+       default:
+               dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
+                               hdev->asic_type);
+               return -EINVAL;
+       }
+
+       return sprintf(buf, "%s\n", str);
+}
+
+static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%04x:%02x:%02x.%x\n",
+                       pci_domain_nr(hdev->pdev->bus),
+                       hdev->pdev->bus->number,
+                       PCI_SLOT(hdev->pdev->devfn),
+                       PCI_FUNC(hdev->pdev->devfn));
+}
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       char *str;
+
+       if (atomic_read(&hdev->in_reset))
+               str = "In reset";
+       else if (hdev->disabled)
+               str = "Malfunction";
+       else
+               str = "Operational";
+
+       return sprintf(buf, "%s\n", str);
+}
+
+static ssize_t soft_reset_cnt_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
+}
+
+static ssize_t hard_reset_cnt_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
+}
+
+static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
+                               char *buf)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       long val;
+
+       if (hl_device_disabled_or_in_reset(hdev))
+               return -ENODEV;
+
+       val = hl_get_max_power(hdev);
+
+       return sprintf(buf, "%lu\n", val);
+}
+
+static ssize_t max_power_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       unsigned long value;
+       int rc;
+
+       if (hl_device_disabled_or_in_reset(hdev)) {
+               count = -ENODEV;
+               goto out;
+       }
+
+       rc = kstrtoul(buf, 0, &value);
+
+       if (rc) {
+               count = -EINVAL;
+               goto out;
+       }
+
+       hdev->max_power = value;
+       hl_set_max_power(hdev, value);
+
+out:
+       return count;
+}
+
+static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
+                       struct bin_attribute *attr, char *buf, loff_t offset,
+                       size_t max_size)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct hl_device *hdev = dev_get_drvdata(dev);
+       char *data;
+       int rc;
+
+       if (!max_size)
+               return -EINVAL;
+
+       data = kzalloc(max_size, GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size);
+       if (rc)
+               goto out;
+
+       memcpy(buf, data, max_size);
+
+out:
+       kfree(data);
+
+       return max_size;
+}
+
+static DEVICE_ATTR_RO(armcp_kernel_ver);
+static DEVICE_ATTR_RO(armcp_ver);
+static DEVICE_ATTR_RO(cpld_ver);
+static DEVICE_ATTR_RO(device_type);
+static DEVICE_ATTR_RO(fuse_ver);
+static DEVICE_ATTR_WO(hard_reset);
+static DEVICE_ATTR_RO(hard_reset_cnt);
+static DEVICE_ATTR_RO(infineon_ver);
+static DEVICE_ATTR_RW(max_power);
+static DEVICE_ATTR_RO(pci_addr);
+static DEVICE_ATTR_RO(preboot_btl_ver);
+static DEVICE_ATTR_WO(soft_reset);
+static DEVICE_ATTR_RO(soft_reset_cnt);
+static DEVICE_ATTR_RO(status);
+static DEVICE_ATTR_RO(thermal_ver);
+static DEVICE_ATTR_RO(uboot_ver);
+
+static struct bin_attribute bin_attr_eeprom = {
+       .attr = {.name = "eeprom", .mode = (0444)},
+       .size = PAGE_SIZE,
+       .read = eeprom_read_handler
+};
+
+static struct attribute *hl_dev_attrs[] = {
+       &dev_attr_armcp_kernel_ver.attr,
+       &dev_attr_armcp_ver.attr,
+       &dev_attr_cpld_ver.attr,
+       &dev_attr_device_type.attr,
+       &dev_attr_fuse_ver.attr,
+       &dev_attr_hard_reset.attr,
+       &dev_attr_hard_reset_cnt.attr,
+       &dev_attr_infineon_ver.attr,
+       &dev_attr_max_power.attr,
+       &dev_attr_pci_addr.attr,
+       &dev_attr_preboot_btl_ver.attr,
+       &dev_attr_soft_reset.attr,
+       &dev_attr_soft_reset_cnt.attr,
+       &dev_attr_status.attr,
+       &dev_attr_thermal_ver.attr,
+       &dev_attr_uboot_ver.attr,
+       NULL,
+};
+
+static struct bin_attribute *hl_dev_bin_attrs[] = {
+       &bin_attr_eeprom,
+       NULL
+};
+
+static struct attribute_group hl_dev_attr_group = {
+       .attrs = hl_dev_attrs,
+       .bin_attrs = hl_dev_bin_attrs,
+};
+
+static struct attribute_group hl_dev_clks_attr_group;
+
+static const struct attribute_group *hl_dev_attr_groups[] = {
+       &hl_dev_attr_group,
+       &hl_dev_clks_attr_group,
+       NULL,
+};
+
+int hl_sysfs_init(struct hl_device *hdev)
+{
+       int rc;
+
+       if (hdev->asic_type == ASIC_GOYA)
+               hdev->pm_mng_profile = PM_AUTO;
+       else
+               hdev->pm_mng_profile = PM_MANUAL;
+       hdev->max_power = hdev->asic_prop.max_power_default;
+
+       hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
+
+       rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to add groups to device, error %d\n", rc);
+               return rc;
+       }
+
+       return 0;
+}
+
+void hl_sysfs_fini(struct hl_device *hdev)
+{
+       device_remove_groups(hdev->dev, hl_dev_attr_groups);
+}
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
deleted file mode 100644 (file)
index 1e3e5b1..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-static void hl_ctx_fini(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-       int i;
-
-       /*
-        * If we arrived here, there are no jobs waiting for this context
-        * on its queues so we can safely remove it.
-        * This is because for each CS, we increment the ref count and for
-        * every CS that was finished we decrement it and we won't arrive
-        * to this function unless the ref count is 0
-        */
-
-       for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
-               dma_fence_put(ctx->cs_pending[i]);
-
-       kfree(ctx->cs_pending);
-
-       if (ctx->asid != HL_KERNEL_ASID_ID) {
-               /* The engines are stopped as there is no executing CS, but the
-                * Coresight might be still working by accessing addresses
-                * related to the stopped engines. Hence stop it explicitly.
-                * Stop only if this is the compute context, as there can be
-                * only one compute context
-                */
-               if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
-                       hl_device_set_debug_mode(hdev, false);
-
-               hl_vm_ctx_fini(ctx);
-               hl_asid_free(hdev, ctx->asid);
-       } else {
-               hl_mmu_ctx_fini(ctx);
-       }
-}
-
-void hl_ctx_do_release(struct kref *ref)
-{
-       struct hl_ctx *ctx;
-
-       ctx = container_of(ref, struct hl_ctx, refcount);
-
-       hl_ctx_fini(ctx);
-
-       if (ctx->hpriv)
-               hl_hpriv_put(ctx->hpriv);
-
-       kfree(ctx);
-}
-
-int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
-{
-       struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
-       struct hl_ctx *ctx;
-       int rc;
-
-       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-       if (!ctx) {
-               rc = -ENOMEM;
-               goto out_err;
-       }
-
-       mutex_lock(&mgr->ctx_lock);
-       rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
-       mutex_unlock(&mgr->ctx_lock);
-
-       if (rc < 0) {
-               dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
-               goto free_ctx;
-       }
-
-       ctx->handle = rc;
-
-       rc = hl_ctx_init(hdev, ctx, false);
-       if (rc)
-               goto remove_from_idr;
-
-       hl_hpriv_get(hpriv);
-       ctx->hpriv = hpriv;
-
-       /* TODO: remove for multiple contexts per process */
-       hpriv->ctx = ctx;
-
-       /* TODO: remove the following line for multiple process support */
-       hdev->compute_ctx = ctx;
-
-       return 0;
-
-remove_from_idr:
-       mutex_lock(&mgr->ctx_lock);
-       idr_remove(&mgr->ctx_handles, ctx->handle);
-       mutex_unlock(&mgr->ctx_lock);
-free_ctx:
-       kfree(ctx);
-out_err:
-       return rc;
-}
-
-void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
-{
-       if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
-               return;
-
-       dev_warn(hdev->dev,
-               "user process released device but its command submissions are still executing\n");
-}
-
-int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
-{
-       int rc = 0;
-
-       ctx->hdev = hdev;
-
-       kref_init(&ctx->refcount);
-
-       ctx->cs_sequence = 1;
-       spin_lock_init(&ctx->cs_lock);
-       atomic_set(&ctx->thread_ctx_switch_token, 1);
-       ctx->thread_ctx_switch_wait_token = 0;
-       ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
-                               sizeof(struct dma_fence *),
-                               GFP_KERNEL);
-       if (!ctx->cs_pending)
-               return -ENOMEM;
-
-       if (is_kernel_ctx) {
-               ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
-               rc = hl_mmu_ctx_init(ctx);
-               if (rc) {
-                       dev_err(hdev->dev, "Failed to init mmu ctx module\n");
-                       goto mem_ctx_err;
-               }
-       } else {
-               ctx->asid = hl_asid_alloc(hdev);
-               if (!ctx->asid) {
-                       dev_err(hdev->dev, "No free ASID, failed to create context\n");
-                       return -ENOMEM;
-               }
-
-               rc = hl_vm_ctx_init(ctx);
-               if (rc) {
-                       dev_err(hdev->dev, "Failed to init mem ctx module\n");
-                       rc = -ENOMEM;
-                       goto mem_ctx_err;
-               }
-       }
-
-       return 0;
-
-mem_ctx_err:
-       if (ctx->asid != HL_KERNEL_ASID_ID)
-               hl_asid_free(hdev, ctx->asid);
-
-       return rc;
-}
-
-void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
-{
-       kref_get(&ctx->refcount);
-}
-
-int hl_ctx_put(struct hl_ctx *ctx)
-{
-       return kref_put(&ctx->refcount, hl_ctx_do_release);
-}
-
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
-{
-       struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
-       struct dma_fence *fence;
-
-       spin_lock(&ctx->cs_lock);
-
-       if (seq >= ctx->cs_sequence) {
-               spin_unlock(&ctx->cs_lock);
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
-               spin_unlock(&ctx->cs_lock);
-               return NULL;
-       }
-
-       fence = dma_fence_get(
-                       ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
-       spin_unlock(&ctx->cs_lock);
-
-       return fence;
-}
-
-/*
- * hl_ctx_mgr_init - initialize the context manager
- *
- * @mgr: pointer to context manager structure
- *
- * This manager is an object inside the hpriv object of the user process.
- * The function is called when a user process opens the FD.
- */
-void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
-{
-       mutex_init(&mgr->ctx_lock);
-       idr_init(&mgr->ctx_handles);
-}
-
-/*
- * hl_ctx_mgr_fini - finalize the context manager
- *
- * @hdev: pointer to device structure
- * @mgr: pointer to context manager structure
- *
- * This function goes over all the contexts in the manager and frees them.
- * It is called when a process closes the FD.
- */
-void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
-{
-       struct hl_ctx *ctx;
-       struct idr *idp;
-       u32 id;
-
-       idp = &mgr->ctx_handles;
-
-       idr_for_each_entry(idp, ctx, id)
-               hl_ctx_free(hdev, ctx);
-
-       idr_destroy(&mgr->ctx_handles);
-       mutex_destroy(&mgr->ctx_lock);
-}
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
deleted file mode 100644 (file)
index fc4372c..0000000
+++ /dev/null
@@ -1,1411 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-
-#include <linux/pci.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-
-#define MMU_ADDR_BUF_SIZE      40
-#define MMU_ASID_BUF_SIZE      10
-#define MMU_KBUF_SIZE          (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
-
-static struct dentry *hl_debug_root;
-
-static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
-                               u8 i2c_reg, u32 *val)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               return -EBUSY;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.i2c_bus = i2c_bus;
-       pkt.i2c_addr = i2c_addr;
-       pkt.i2c_reg = i2c_reg;
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       HL_DEVICE_TIMEOUT_USEC, (long *) val);
-
-       if (rc)
-               dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
-
-       return rc;
-}
-
-static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
-                               u8 i2c_reg, u32 val)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               return -EBUSY;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.i2c_bus = i2c_bus;
-       pkt.i2c_addr = i2c_addr;
-       pkt.i2c_reg = i2c_reg;
-       pkt.value = cpu_to_le64(val);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       HL_DEVICE_TIMEOUT_USEC, NULL);
-
-       if (rc)
-               dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
-
-       return rc;
-}
-
-static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               return;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.led_index = cpu_to_le32(led);
-       pkt.value = cpu_to_le64(state);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               HL_DEVICE_TIMEOUT_USEC, NULL);
-
-       if (rc)
-               dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
-}
-
-static int command_buffers_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_cb *cb;
-       bool first = true;
-
-       spin_lock(&dev_entry->cb_spinlock);
-
-       list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
-               if (first) {
-                       first = false;
-                       seq_puts(s, "\n");
-                       seq_puts(s, " CB ID   CTX ID   CB size    CB RefCnt    mmap?   CS counter\n");
-                       seq_puts(s, "---------------------------------------------------------------\n");
-               }
-               seq_printf(s,
-                       "   %03d        %d    0x%08x      %d          %d          %d\n",
-                       cb->id, cb->ctx_id, cb->size,
-                       kref_read(&cb->refcount),
-                       cb->mmap, cb->cs_cnt);
-       }
-
-       spin_unlock(&dev_entry->cb_spinlock);
-
-       if (!first)
-               seq_puts(s, "\n");
-
-       return 0;
-}
-
-static int command_submission_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_cs *cs;
-       bool first = true;
-
-       spin_lock(&dev_entry->cs_spinlock);
-
-       list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
-               if (first) {
-                       first = false;
-                       seq_puts(s, "\n");
-                       seq_puts(s, " CS ID   CTX ASID   CS RefCnt   Submitted    Completed\n");
-                       seq_puts(s, "------------------------------------------------------\n");
-               }
-               seq_printf(s,
-                       "   %llu       %d          %d           %d            %d\n",
-                       cs->sequence, cs->ctx->asid,
-                       kref_read(&cs->refcount),
-                       cs->submitted, cs->completed);
-       }
-
-       spin_unlock(&dev_entry->cs_spinlock);
-
-       if (!first)
-               seq_puts(s, "\n");
-
-       return 0;
-}
-
-static int command_submission_jobs_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_cs_job *job;
-       bool first = true;
-
-       spin_lock(&dev_entry->cs_job_spinlock);
-
-       list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
-               if (first) {
-                       first = false;
-                       seq_puts(s, "\n");
-                       seq_puts(s, " JOB ID   CS ID    CTX ASID   H/W Queue\n");
-                       seq_puts(s, "---------------------------------------\n");
-               }
-               if (job->cs)
-                       seq_printf(s,
-                               "    %02d       %llu         %d         %d\n",
-                               job->id, job->cs->sequence, job->cs->ctx->asid,
-                               job->hw_queue_id);
-               else
-                       seq_printf(s,
-                               "    %02d       0         %d         %d\n",
-                               job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
-       }
-
-       spin_unlock(&dev_entry->cs_job_spinlock);
-
-       if (!first)
-               seq_puts(s, "\n");
-
-       return 0;
-}
-
-static int userptr_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_userptr *userptr;
-       char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
-                               "DMA_FROM_DEVICE", "DMA_NONE"};
-       bool first = true;
-
-       spin_lock(&dev_entry->userptr_spinlock);
-
-       list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
-               if (first) {
-                       first = false;
-                       seq_puts(s, "\n");
-                       seq_puts(s, " user virtual address     size             dma dir\n");
-                       seq_puts(s, "----------------------------------------------------------\n");
-               }
-               seq_printf(s,
-                       "    0x%-14llx      %-10u    %-30s\n",
-                       userptr->addr, userptr->size, dma_dir[userptr->dir]);
-       }
-
-       spin_unlock(&dev_entry->userptr_spinlock);
-
-       if (!first)
-               seq_puts(s, "\n");
-
-       return 0;
-}
-
-static int vm_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_ctx *ctx;
-       struct hl_vm *vm;
-       struct hl_vm_hash_node *hnode;
-       struct hl_userptr *userptr;
-       struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
-       enum vm_type_t *vm_type;
-       bool once = true;
-       u64 j;
-       int i;
-
-       if (!dev_entry->hdev->mmu_enable)
-               return 0;
-
-       spin_lock(&dev_entry->ctx_mem_hash_spinlock);
-
-       list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
-               once = false;
-               seq_puts(s, "\n\n----------------------------------------------------");
-               seq_puts(s, "\n----------------------------------------------------\n\n");
-               seq_printf(s, "ctx asid: %u\n", ctx->asid);
-
-               seq_puts(s, "\nmappings:\n\n");
-               seq_puts(s, "    virtual address        size          handle\n");
-               seq_puts(s, "----------------------------------------------------\n");
-               mutex_lock(&ctx->mem_hash_lock);
-               hash_for_each(ctx->mem_hash, i, hnode, node) {
-                       vm_type = hnode->ptr;
-
-                       if (*vm_type == VM_TYPE_USERPTR) {
-                               userptr = hnode->ptr;
-                               seq_printf(s,
-                                       "    0x%-14llx      %-10u\n",
-                                       hnode->vaddr, userptr->size);
-                       } else {
-                               phys_pg_pack = hnode->ptr;
-                               seq_printf(s,
-                                       "    0x%-14llx      %-10llu       %-4u\n",
-                                       hnode->vaddr, phys_pg_pack->total_size,
-                                       phys_pg_pack->handle);
-                       }
-               }
-               mutex_unlock(&ctx->mem_hash_lock);
-
-               vm = &ctx->hdev->vm;
-               spin_lock(&vm->idr_lock);
-
-               if (!idr_is_empty(&vm->phys_pg_pack_handles))
-                       seq_puts(s, "\n\nallocations:\n");
-
-               idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
-                       if (phys_pg_pack->asid != ctx->asid)
-                               continue;
-
-                       seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
-                       seq_printf(s, "page size: %u\n\n",
-                                               phys_pg_pack->page_size);
-                       seq_puts(s, "   physical address\n");
-                       seq_puts(s, "---------------------\n");
-                       for (j = 0 ; j < phys_pg_pack->npages ; j++) {
-                               seq_printf(s, "    0x%-14llx\n",
-                                               phys_pg_pack->pages[j]);
-                       }
-               }
-               spin_unlock(&vm->idr_lock);
-
-       }
-
-       spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
-
-       if (!once)
-               seq_puts(s, "\n");
-
-       return 0;
-}
-
-/* these inline functions are copied from mmu.c */
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
-       return ctx->hdev->asic_prop.mmu_pgt_addr +
-                       (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-                                       u64 virt_addr, u64 mask, u64 shift)
-{
-       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_specs,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
-                                       mmu_specs->hop0_shift);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_specs,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
-                                       mmu_specs->hop1_shift);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_specs,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
-                                       mmu_specs->hop2_shift);
-}
-
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_specs,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
-                                       mmu_specs->hop3_shift);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_specs,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
-                                       mmu_specs->hop4_shift);
-}
-
-static inline u64 get_next_hop_addr(u64 curr_pte)
-{
-       if (curr_pte & PAGE_PRESENT_MASK)
-               return curr_pte & HOP_PHYS_ADDR_MASK;
-       else
-               return ULLONG_MAX;
-}
-
-static int mmu_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_device *hdev = dev_entry->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_mmu_properties *mmu_prop;
-       struct hl_ctx *ctx;
-       bool is_dram_addr;
-
-       u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
-               hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
-               hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
-               hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
-               hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
-               virt_addr = dev_entry->mmu_addr;
-
-       if (!hdev->mmu_enable)
-               return 0;
-
-       if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
-               ctx = hdev->kernel_ctx;
-       else
-               ctx = hdev->compute_ctx;
-
-       if (!ctx) {
-               dev_err(hdev->dev, "no ctx available\n");
-               return 0;
-       }
-
-       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                                               prop->dmmu.start_addr,
-                                               prop->dmmu.end_addr);
-
-       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
-       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
-       mutex_lock(&ctx->mmu_lock);
-
-       /* the following lookup is copied from unmap() in mmu.c */
-
-       hop0_addr = get_hop0_addr(ctx);
-       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-       hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
-       hop1_addr = get_next_hop_addr(hop0_pte);
-
-       if (hop1_addr == ULLONG_MAX)
-               goto not_mapped;
-
-       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-       hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
-       hop2_addr = get_next_hop_addr(hop1_pte);
-
-       if (hop2_addr == ULLONG_MAX)
-               goto not_mapped;
-
-       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-       hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
-       hop3_addr = get_next_hop_addr(hop2_pte);
-
-       if (hop3_addr == ULLONG_MAX)
-               goto not_mapped;
-
-       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
-       hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
-
-       if (!(hop3_pte & LAST_MASK)) {
-               hop4_addr = get_next_hop_addr(hop3_pte);
-
-               if (hop4_addr == ULLONG_MAX)
-                       goto not_mapped;
-
-               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
-                                                       virt_addr);
-               hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
-               if (!(hop4_pte & PAGE_PRESENT_MASK))
-                       goto not_mapped;
-       } else {
-               if (!(hop3_pte & PAGE_PRESENT_MASK))
-                       goto not_mapped;
-       }
-
-       seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
-                       dev_entry->mmu_asid, dev_entry->mmu_addr);
-
-       seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
-       seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
-       seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
-
-       seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
-       seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
-       seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
-
-       seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
-       seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
-       seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
-
-       seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
-       seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
-       seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
-
-       if (!(hop3_pte & LAST_MASK)) {
-               seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
-               seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
-               seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
-       }
-
-       goto out;
-
-not_mapped:
-       dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
-                       virt_addr);
-out:
-       mutex_unlock(&ctx->mmu_lock);
-
-       return 0;
-}
-
-static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
-               size_t count, loff_t *f_pos)
-{
-       struct seq_file *s = file->private_data;
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_device *hdev = dev_entry->hdev;
-       char kbuf[MMU_KBUF_SIZE];
-       char *c;
-       ssize_t rc;
-
-       if (!hdev->mmu_enable)
-               return count;
-
-       if (count > sizeof(kbuf) - 1)
-               goto err;
-       if (copy_from_user(kbuf, buf, count))
-               goto err;
-       kbuf[count] = 0;
-
-       c = strchr(kbuf, ' ');
-       if (!c)
-               goto err;
-       *c = '\0';
-
-       rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
-       if (rc)
-               goto err;
-
-       if (strncmp(c+1, "0x", 2))
-               goto err;
-       rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
-       if (rc)
-               goto err;
-
-       return count;
-
-err:
-       dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
-
-       return -EINVAL;
-}
-
-static int engines_show(struct seq_file *s, void *data)
-{
-       struct hl_debugfs_entry *entry = s->private;
-       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
-       struct hl_device *hdev = dev_entry->hdev;
-
-       if (atomic_read(&hdev->in_reset)) {
-               dev_warn_ratelimited(hdev->dev,
-                               "Can't check device idle during reset\n");
-               return 0;
-       }
-
-       hdev->asic_funcs->is_device_idle(hdev, NULL, s);
-
-       return 0;
-}
-
-static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-
-       if (!hdev->mmu_enable)
-               goto out;
-
-       if (hdev->dram_supports_virtual_memory &&
-               (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
-               return true;
-
-       if (addr >= prop->pmmu.start_addr &&
-               addr < prop->pmmu.end_addr)
-               return true;
-
-       if (addr >= prop->pmmu_huge.start_addr &&
-               addr < prop->pmmu_huge.end_addr)
-               return true;
-out:
-       return false;
-}
-
-static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
-                               u64 *phys_addr)
-{
-       struct hl_ctx *ctx = hdev->compute_ctx;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_mmu_properties *mmu_prop;
-       u64 hop_addr, hop_pte_addr, hop_pte;
-       u64 offset_mask = HOP4_MASK | FLAGS_MASK;
-       int rc = 0;
-       bool is_dram_addr;
-
-       if (!ctx) {
-               dev_err(hdev->dev, "no ctx available\n");
-               return -EINVAL;
-       }
-
-       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                                               prop->dmmu.start_addr,
-                                               prop->dmmu.end_addr);
-
-       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
-       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
-       mutex_lock(&ctx->mmu_lock);
-
-       /* hop 0 */
-       hop_addr = get_hop0_addr(ctx);
-       hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
-       /* hop 1 */
-       hop_addr = get_next_hop_addr(hop_pte);
-       if (hop_addr == ULLONG_MAX)
-               goto not_mapped;
-       hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
-       /* hop 2 */
-       hop_addr = get_next_hop_addr(hop_pte);
-       if (hop_addr == ULLONG_MAX)
-               goto not_mapped;
-       hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
-       /* hop 3 */
-       hop_addr = get_next_hop_addr(hop_pte);
-       if (hop_addr == ULLONG_MAX)
-               goto not_mapped;
-       hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
-       hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
-       if (!(hop_pte & LAST_MASK)) {
-               /* hop 4 */
-               hop_addr = get_next_hop_addr(hop_pte);
-               if (hop_addr == ULLONG_MAX)
-                       goto not_mapped;
-               hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
-                                                       virt_addr);
-               hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
-               offset_mask = FLAGS_MASK;
-       }
-
-       if (!(hop_pte & PAGE_PRESENT_MASK))
-               goto not_mapped;
-
-       *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
-
-       goto out;
-
-not_mapped:
-       dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
-                       virt_addr);
-       rc = -EINVAL;
-out:
-       mutex_unlock(&ctx->mmu_lock);
-       return rc;
-}
-
-static ssize_t hl_data_read32(struct file *f, char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char tmp_buf[32];
-       u64 addr = entry->addr;
-       u32 val;
-       ssize_t rc;
-
-       if (atomic_read(&hdev->in_reset)) {
-               dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
-               return 0;
-       }
-
-       if (*ppos)
-               return 0;
-
-       if (hl_is_device_va(hdev, addr)) {
-               rc = device_va_to_pa(hdev, addr, &addr);
-               if (rc)
-                       return rc;
-       }
-
-       rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
-               return rc;
-       }
-
-       sprintf(tmp_buf, "0x%08x\n", val);
-       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
-                       strlen(tmp_buf));
-}
-
-static ssize_t hl_data_write32(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u64 addr = entry->addr;
-       u32 value;
-       ssize_t rc;
-
-       if (atomic_read(&hdev->in_reset)) {
-               dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
-               return 0;
-       }
-
-       rc = kstrtouint_from_user(buf, count, 16, &value);
-       if (rc)
-               return rc;
-
-       if (hl_is_device_va(hdev, addr)) {
-               rc = device_va_to_pa(hdev, addr, &addr);
-               if (rc)
-                       return rc;
-       }
-
-       rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
-                       value, addr);
-               return rc;
-       }
-
-       return count;
-}
-
-static ssize_t hl_data_read64(struct file *f, char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char tmp_buf[32];
-       u64 addr = entry->addr;
-       u64 val;
-       ssize_t rc;
-
-       if (*ppos)
-               return 0;
-
-       if (hl_is_device_va(hdev, addr)) {
-               rc = device_va_to_pa(hdev, addr, &addr);
-               if (rc)
-                       return rc;
-       }
-
-       rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
-               return rc;
-       }
-
-       sprintf(tmp_buf, "0x%016llx\n", val);
-       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
-                       strlen(tmp_buf));
-}
-
-static ssize_t hl_data_write64(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u64 addr = entry->addr;
-       u64 value;
-       ssize_t rc;
-
-       rc = kstrtoull_from_user(buf, count, 16, &value);
-       if (rc)
-               return rc;
-
-       if (hl_is_device_va(hdev, addr)) {
-               rc = device_va_to_pa(hdev, addr, &addr);
-               if (rc)
-                       return rc;
-       }
-
-       rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
-                       value, addr);
-               return rc;
-       }
-
-       return count;
-}
-
-static ssize_t hl_get_power_state(struct file *f, char __user *buf,
-               size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char tmp_buf[200];
-       int i;
-
-       if (*ppos)
-               return 0;
-
-       if (hdev->pdev->current_state == PCI_D0)
-               i = 1;
-       else if (hdev->pdev->current_state == PCI_D3hot)
-               i = 2;
-       else
-               i = 3;
-
-       sprintf(tmp_buf,
-               "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
-       return simple_read_from_buffer(buf, count, ppos, tmp_buf,
-                       strlen(tmp_buf));
-}
-
-static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       rc = kstrtouint_from_user(buf, count, 10, &value);
-       if (rc)
-               return rc;
-
-       if (value == 1) {
-               pci_set_power_state(hdev->pdev, PCI_D0);
-               pci_restore_state(hdev->pdev);
-               rc = pci_enable_device(hdev->pdev);
-       } else if (value == 2) {
-               pci_save_state(hdev->pdev);
-               pci_disable_device(hdev->pdev);
-               pci_set_power_state(hdev->pdev, PCI_D3hot);
-       } else {
-               dev_dbg(hdev->dev, "invalid power state value %u\n", value);
-               return -EINVAL;
-       }
-
-       return count;
-}
-
-static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char tmp_buf[32];
-       u32 val;
-       ssize_t rc;
-
-       if (*ppos)
-               return 0;
-
-       rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
-                       entry->i2c_reg, &val);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to read from I2C bus %d, addr %d, reg %d\n",
-                       entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
-               return rc;
-       }
-
-       sprintf(tmp_buf, "0x%02x\n", val);
-       rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
-                       strlen(tmp_buf));
-
-       return rc;
-}
-
-static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       rc = kstrtouint_from_user(buf, count, 16, &value);
-       if (rc)
-               return rc;
-
-       rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
-                       entry->i2c_reg, value);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
-                       value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
-               return rc;
-       }
-
-       return count;
-}
-
-static ssize_t hl_led0_write(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       rc = kstrtouint_from_user(buf, count, 10, &value);
-       if (rc)
-               return rc;
-
-       value = value ? 1 : 0;
-
-       hl_debugfs_led_set(hdev, 0, value);
-
-       return count;
-}
-
-static ssize_t hl_led1_write(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       rc = kstrtouint_from_user(buf, count, 10, &value);
-       if (rc)
-               return rc;
-
-       value = value ? 1 : 0;
-
-       hl_debugfs_led_set(hdev, 1, value);
-
-       return count;
-}
-
-static ssize_t hl_led2_write(struct file *f, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       rc = kstrtouint_from_user(buf, count, 10, &value);
-       if (rc)
-               return rc;
-
-       value = value ? 1 : 0;
-
-       hl_debugfs_led_set(hdev, 2, value);
-
-       return count;
-}
-
-static ssize_t hl_device_read(struct file *f, char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       static const char *help =
-               "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
-       return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
-}
-
-static ssize_t hl_device_write(struct file *f, const char __user *buf,
-                                    size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char data[30] = {0};
-
-       /* don't allow partial writes */
-       if (*ppos != 0)
-               return 0;
-
-       simple_write_to_buffer(data, 29, ppos, buf, count);
-
-       if (strncmp("disable", data, strlen("disable")) == 0) {
-               hdev->disabled = true;
-       } else if (strncmp("enable", data, strlen("enable")) == 0) {
-               hdev->disabled = false;
-       } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
-               hdev->asic_funcs->suspend(hdev);
-       } else if (strncmp("resume", data, strlen("resume")) == 0) {
-               hdev->asic_funcs->resume(hdev);
-       } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
-               hdev->device_cpu_disabled = true;
-       } else {
-               dev_err(hdev->dev,
-                       "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
-               count = -EINVAL;
-       }
-
-       return count;
-}
-
-static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char tmp_buf[200];
-       ssize_t rc;
-
-       if (*ppos)
-               return 0;
-
-       sprintf(tmp_buf, "%d\n", hdev->clock_gating);
-       rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
-                       strlen(tmp_buf) + 1);
-
-       return rc;
-}
-
-static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
-                                    size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       if (atomic_read(&hdev->in_reset)) {
-               dev_warn_ratelimited(hdev->dev,
-                               "Can't change clock gating during reset\n");
-               return 0;
-       }
-
-       rc = kstrtouint_from_user(buf, count, 10, &value);
-       if (rc)
-               return rc;
-
-       if (value) {
-               hdev->clock_gating = 1;
-               if (hdev->asic_funcs->enable_clock_gating)
-                       hdev->asic_funcs->enable_clock_gating(hdev);
-       } else {
-               if (hdev->asic_funcs->disable_clock_gating)
-                       hdev->asic_funcs->disable_clock_gating(hdev);
-               hdev->clock_gating = 0;
-       }
-
-       return count;
-}
-
-static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       char tmp_buf[200];
-       ssize_t rc;
-
-       if (*ppos)
-               return 0;
-
-       sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
-       rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
-                       strlen(tmp_buf) + 1);
-
-       return rc;
-}
-
-static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
-                                    size_t count, loff_t *ppos)
-{
-       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
-       struct hl_device *hdev = entry->hdev;
-       u32 value;
-       ssize_t rc;
-
-       if (atomic_read(&hdev->in_reset)) {
-               dev_warn_ratelimited(hdev->dev,
-                               "Can't change stop on error during reset\n");
-               return 0;
-       }
-
-       rc = kstrtouint_from_user(buf, count, 10, &value);
-       if (rc)
-               return rc;
-
-       hdev->stop_on_err = value ? 1 : 0;
-
-       hl_device_reset(hdev, false, false);
-
-       return count;
-}
-
-static const struct file_operations hl_data32b_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_data_read32,
-       .write = hl_data_write32
-};
-
-static const struct file_operations hl_data64b_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_data_read64,
-       .write = hl_data_write64
-};
-
-static const struct file_operations hl_i2c_data_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_i2c_data_read,
-       .write = hl_i2c_data_write
-};
-
-static const struct file_operations hl_power_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_get_power_state,
-       .write = hl_set_power_state
-};
-
-static const struct file_operations hl_led0_fops = {
-       .owner = THIS_MODULE,
-       .write = hl_led0_write
-};
-
-static const struct file_operations hl_led1_fops = {
-       .owner = THIS_MODULE,
-       .write = hl_led1_write
-};
-
-static const struct file_operations hl_led2_fops = {
-       .owner = THIS_MODULE,
-       .write = hl_led2_write
-};
-
-static const struct file_operations hl_device_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_device_read,
-       .write = hl_device_write
-};
-
-static const struct file_operations hl_clk_gate_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_clk_gate_read,
-       .write = hl_clk_gate_write
-};
-
-static const struct file_operations hl_stop_on_err_fops = {
-       .owner = THIS_MODULE,
-       .read = hl_stop_on_err_read,
-       .write = hl_stop_on_err_write
-};
-
-static const struct hl_info_list hl_debugfs_list[] = {
-       {"command_buffers", command_buffers_show, NULL},
-       {"command_submission", command_submission_show, NULL},
-       {"command_submission_jobs", command_submission_jobs_show, NULL},
-       {"userptr", userptr_show, NULL},
-       {"vm", vm_show, NULL},
-       {"mmu", mmu_show, mmu_asid_va_write},
-       {"engines", engines_show, NULL}
-};
-
-static int hl_debugfs_open(struct inode *inode, struct file *file)
-{
-       struct hl_debugfs_entry *node = inode->i_private;
-
-       return single_open(file, node->info_ent->show, node);
-}
-
-static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
-               size_t count, loff_t *f_pos)
-{
-       struct hl_debugfs_entry *node = file->f_inode->i_private;
-
-       if (node->info_ent->write)
-               return node->info_ent->write(file, buf, count, f_pos);
-       else
-               return -EINVAL;
-
-}
-
-static const struct file_operations hl_debugfs_fops = {
-       .owner = THIS_MODULE,
-       .open = hl_debugfs_open,
-       .read = seq_read,
-       .write = hl_debugfs_write,
-       .llseek = seq_lseek,
-       .release = single_release,
-};
-
-void hl_debugfs_add_device(struct hl_device *hdev)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-       int count = ARRAY_SIZE(hl_debugfs_list);
-       struct hl_debugfs_entry *entry;
-       struct dentry *ent;
-       int i;
-
-       dev_entry->hdev = hdev;
-       dev_entry->entry_arr = kmalloc_array(count,
-                                       sizeof(struct hl_debugfs_entry),
-                                       GFP_KERNEL);
-       if (!dev_entry->entry_arr)
-               return;
-
-       INIT_LIST_HEAD(&dev_entry->file_list);
-       INIT_LIST_HEAD(&dev_entry->cb_list);
-       INIT_LIST_HEAD(&dev_entry->cs_list);
-       INIT_LIST_HEAD(&dev_entry->cs_job_list);
-       INIT_LIST_HEAD(&dev_entry->userptr_list);
-       INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
-       mutex_init(&dev_entry->file_mutex);
-       spin_lock_init(&dev_entry->cb_spinlock);
-       spin_lock_init(&dev_entry->cs_spinlock);
-       spin_lock_init(&dev_entry->cs_job_spinlock);
-       spin_lock_init(&dev_entry->userptr_spinlock);
-       spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
-
-       dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
-                                               hl_debug_root);
-
-       debugfs_create_x64("addr",
-                               0644,
-                               dev_entry->root,
-                               &dev_entry->addr);
-
-       debugfs_create_file("data32",
-                               0644,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_data32b_fops);
-
-       debugfs_create_file("data64",
-                               0644,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_data64b_fops);
-
-       debugfs_create_file("set_power_state",
-                               0200,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_power_fops);
-
-       debugfs_create_u8("i2c_bus",
-                               0644,
-                               dev_entry->root,
-                               &dev_entry->i2c_bus);
-
-       debugfs_create_u8("i2c_addr",
-                               0644,
-                               dev_entry->root,
-                               &dev_entry->i2c_addr);
-
-       debugfs_create_u8("i2c_reg",
-                               0644,
-                               dev_entry->root,
-                               &dev_entry->i2c_reg);
-
-       debugfs_create_file("i2c_data",
-                               0644,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_i2c_data_fops);
-
-       debugfs_create_file("led0",
-                               0200,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_led0_fops);
-
-       debugfs_create_file("led1",
-                               0200,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_led1_fops);
-
-       debugfs_create_file("led2",
-                               0200,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_led2_fops);
-
-       debugfs_create_file("device",
-                               0200,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_device_fops);
-
-       debugfs_create_file("clk_gate",
-                               0200,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_clk_gate_fops);
-
-       debugfs_create_file("stop_on_err",
-                               0644,
-                               dev_entry->root,
-                               dev_entry,
-                               &hl_stop_on_err_fops);
-
-       for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
-
-               ent = debugfs_create_file(hl_debugfs_list[i].name,
-                                       0444,
-                                       dev_entry->root,
-                                       entry,
-                                       &hl_debugfs_fops);
-               entry->dent = ent;
-               entry->info_ent = &hl_debugfs_list[i];
-               entry->dev_entry = dev_entry;
-       }
-}
-
-void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-       struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
-
-       debugfs_remove_recursive(entry->root);
-
-       mutex_destroy(&entry->file_mutex);
-       kfree(entry->entry_arr);
-}
-
-void hl_debugfs_add_file(struct hl_fpriv *hpriv)
-{
-       struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
-
-       mutex_lock(&dev_entry->file_mutex);
-       list_add(&hpriv->debugfs_list, &dev_entry->file_list);
-       mutex_unlock(&dev_entry->file_mutex);
-}
-
-void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
-{
-       struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
-
-       mutex_lock(&dev_entry->file_mutex);
-       list_del(&hpriv->debugfs_list);
-       mutex_unlock(&dev_entry->file_mutex);
-}
-
-void hl_debugfs_add_cb(struct hl_cb *cb)
-{
-       struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->cb_spinlock);
-       list_add(&cb->debugfs_list, &dev_entry->cb_list);
-       spin_unlock(&dev_entry->cb_spinlock);
-}
-
-void hl_debugfs_remove_cb(struct hl_cb *cb)
-{
-       struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->cb_spinlock);
-       list_del(&cb->debugfs_list);
-       spin_unlock(&dev_entry->cb_spinlock);
-}
-
-void hl_debugfs_add_cs(struct hl_cs *cs)
-{
-       struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->cs_spinlock);
-       list_add(&cs->debugfs_list, &dev_entry->cs_list);
-       spin_unlock(&dev_entry->cs_spinlock);
-}
-
-void hl_debugfs_remove_cs(struct hl_cs *cs)
-{
-       struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->cs_spinlock);
-       list_del(&cs->debugfs_list);
-       spin_unlock(&dev_entry->cs_spinlock);
-}
-
-void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->cs_job_spinlock);
-       list_add(&job->debugfs_list, &dev_entry->cs_job_list);
-       spin_unlock(&dev_entry->cs_job_spinlock);
-}
-
-void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->cs_job_spinlock);
-       list_del(&job->debugfs_list);
-       spin_unlock(&dev_entry->cs_job_spinlock);
-}
-
-void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->userptr_spinlock);
-       list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
-       spin_unlock(&dev_entry->userptr_spinlock);
-}
-
-void hl_debugfs_remove_userptr(struct hl_device *hdev,
-                               struct hl_userptr *userptr)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->userptr_spinlock);
-       list_del(&userptr->debugfs_list);
-       spin_unlock(&dev_entry->userptr_spinlock);
-}
-
-void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->ctx_mem_hash_spinlock);
-       list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
-       spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
-}
-
-void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
-{
-       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
-       spin_lock(&dev_entry->ctx_mem_hash_spinlock);
-       list_del(&ctx->debugfs_list);
-       spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
-}
-
-void __init hl_debugfs_init(void)
-{
-       hl_debug_root = debugfs_create_dir("habanalabs", NULL);
-}
-
-void hl_debugfs_fini(void)
-{
-       debugfs_remove_recursive(hl_debug_root);
-}
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
deleted file mode 100644 (file)
index 84800ef..0000000
+++ /dev/null
@@ -1,1506 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#define pr_fmt(fmt)                    "habanalabs: " fmt
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-#include <linux/sched/signal.h>
-#include <linux/hwmon.h>
-#include <uapi/misc/habanalabs.h>
-
-#define HL_PLDM_PENDING_RESET_PER_SEC  (HL_PENDING_RESET_PER_SEC * 10)
-
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
-{
-       if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
-               return true;
-       else
-               return false;
-}
-
-enum hl_device_status hl_device_status(struct hl_device *hdev)
-{
-       enum hl_device_status status;
-
-       if (hdev->disabled)
-               status = HL_DEVICE_STATUS_MALFUNCTION;
-       else if (atomic_read(&hdev->in_reset))
-               status = HL_DEVICE_STATUS_IN_RESET;
-       else
-               status = HL_DEVICE_STATUS_OPERATIONAL;
-
-       return status;
-}
-
-static void hpriv_release(struct kref *ref)
-{
-       struct hl_fpriv *hpriv;
-       struct hl_device *hdev;
-
-       hpriv = container_of(ref, struct hl_fpriv, refcount);
-
-       hdev = hpriv->hdev;
-
-       put_pid(hpriv->taskpid);
-
-       hl_debugfs_remove_file(hpriv);
-
-       mutex_destroy(&hpriv->restore_phase_mutex);
-
-       mutex_lock(&hdev->fpriv_list_lock);
-       list_del(&hpriv->dev_node);
-       hdev->compute_ctx = NULL;
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       kfree(hpriv);
-}
-
-void hl_hpriv_get(struct hl_fpriv *hpriv)
-{
-       kref_get(&hpriv->refcount);
-}
-
-void hl_hpriv_put(struct hl_fpriv *hpriv)
-{
-       kref_put(&hpriv->refcount, hpriv_release);
-}
-
-/*
- * hl_device_release - release function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
- *
- * Called when process closes an habanalabs device
- */
-static int hl_device_release(struct inode *inode, struct file *filp)
-{
-       struct hl_fpriv *hpriv = filp->private_data;
-
-       hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
-       hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
-
-       filp->private_data = NULL;
-
-       hl_hpriv_put(hpriv);
-
-       return 0;
-}
-
-static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
-{
-       struct hl_fpriv *hpriv = filp->private_data;
-       struct hl_device *hdev;
-
-       filp->private_data = NULL;
-
-       hdev = hpriv->hdev;
-
-       mutex_lock(&hdev->fpriv_list_lock);
-       list_del(&hpriv->dev_node);
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       kfree(hpriv);
-
-       return 0;
-}
-
-/*
- * hl_mmap - mmap function for habanalabs device
- *
- * @*filp: pointer to file structure
- * @*vma: pointer to vm_area_struct of the process
- *
- * Called when process does an mmap on habanalabs device. Call the device's mmap
- * function at the end of the common code.
- */
-static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-       struct hl_fpriv *hpriv = filp->private_data;
-
-       if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
-               vma->vm_pgoff ^= HL_MMAP_CB_MASK;
-               return hl_cb_mmap(hpriv, vma);
-       }
-
-       return -EINVAL;
-}
-
-static const struct file_operations hl_ops = {
-       .owner = THIS_MODULE,
-       .open = hl_device_open,
-       .release = hl_device_release,
-       .mmap = hl_mmap,
-       .unlocked_ioctl = hl_ioctl,
-       .compat_ioctl = hl_ioctl
-};
-
-static const struct file_operations hl_ctrl_ops = {
-       .owner = THIS_MODULE,
-       .open = hl_device_open_ctrl,
-       .release = hl_device_release_ctrl,
-       .unlocked_ioctl = hl_ioctl_control,
-       .compat_ioctl = hl_ioctl_control
-};
-
-static void device_release_func(struct device *dev)
-{
-       kfree(dev);
-}
-
-/*
- * device_init_cdev - Initialize cdev and device for habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- * @hclass: pointer to the class object of the device
- * @minor: minor number of the specific device
- * @fpos: file operations to install for this device
- * @name: name of the device as it will appear in the filesystem
- * @cdev: pointer to the char device object that will be initialized
- * @dev: pointer to the device object that will be initialized
- *
- * Initialize a cdev and a Linux device for habanalabs's device.
- */
-static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
-                               int minor, const struct file_operations *fops,
-                               char *name, struct cdev *cdev,
-                               struct device **dev)
-{
-       cdev_init(cdev, fops);
-       cdev->owner = THIS_MODULE;
-
-       *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
-       if (!*dev)
-               return -ENOMEM;
-
-       device_initialize(*dev);
-       (*dev)->devt = MKDEV(hdev->major, minor);
-       (*dev)->class = hclass;
-       (*dev)->release = device_release_func;
-       dev_set_drvdata(*dev, hdev);
-       dev_set_name(*dev, "%s", name);
-
-       return 0;
-}
-
-static int device_cdev_sysfs_add(struct hl_device *hdev)
-{
-       int rc;
-
-       rc = cdev_device_add(&hdev->cdev, hdev->dev);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "failed to add a char device to the system\n");
-               return rc;
-       }
-
-       rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "failed to add a control char device to the system\n");
-               goto delete_cdev_device;
-       }
-
-       /* hl_sysfs_init() must be done after adding the device to the system */
-       rc = hl_sysfs_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "failed to initialize sysfs\n");
-               goto delete_ctrl_cdev_device;
-       }
-
-       hdev->cdev_sysfs_created = true;
-
-       return 0;
-
-delete_ctrl_cdev_device:
-       cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-delete_cdev_device:
-       cdev_device_del(&hdev->cdev, hdev->dev);
-       return rc;
-}
-
-static void device_cdev_sysfs_del(struct hl_device *hdev)
-{
-       /* device_release() won't be called so must free devices explicitly */
-       if (!hdev->cdev_sysfs_created) {
-               kfree(hdev->dev_ctrl);
-               kfree(hdev->dev);
-               return;
-       }
-
-       hl_sysfs_fini(hdev);
-       cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-       cdev_device_del(&hdev->cdev, hdev->dev);
-}
-
-/*
- * device_early_init - do some early initialization for the habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Install the relevant function pointers and call the early_init function,
- * if such a function exists
- */
-static int device_early_init(struct hl_device *hdev)
-{
-       int i, rc;
-       char workq_name[32];
-
-       switch (hdev->asic_type) {
-       case ASIC_GOYA:
-               goya_set_asic_funcs(hdev);
-               strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
-               break;
-       case ASIC_GAUDI:
-               gaudi_set_asic_funcs(hdev);
-               sprintf(hdev->asic_name, "GAUDI");
-               break;
-       default:
-               dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
-                       hdev->asic_type);
-               return -EINVAL;
-       }
-
-       rc = hdev->asic_funcs->early_init(hdev);
-       if (rc)
-               return rc;
-
-       rc = hl_asid_init(hdev);
-       if (rc)
-               goto early_fini;
-
-       if (hdev->asic_prop.completion_queues_count) {
-               hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
-                               sizeof(*hdev->cq_wq),
-                               GFP_ATOMIC);
-               if (!hdev->cq_wq) {
-                       rc = -ENOMEM;
-                       goto asid_fini;
-               }
-       }
-
-       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
-               snprintf(workq_name, 32, "hl-free-jobs-%u", i);
-               hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
-               if (hdev->cq_wq == NULL) {
-                       dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
-                       rc = -ENOMEM;
-                       goto free_cq_wq;
-               }
-       }
-
-       hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
-       if (hdev->eq_wq == NULL) {
-               dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
-               rc = -ENOMEM;
-               goto free_cq_wq;
-       }
-
-       hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
-                                       GFP_KERNEL);
-       if (!hdev->hl_chip_info) {
-               rc = -ENOMEM;
-               goto free_eq_wq;
-       }
-
-       hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
-                                       sizeof(struct hl_device_idle_busy_ts),
-                                       (GFP_KERNEL | __GFP_ZERO));
-       if (!hdev->idle_busy_ts_arr) {
-               rc = -ENOMEM;
-               goto free_chip_info;
-       }
-
-       hl_cb_mgr_init(&hdev->kernel_cb_mgr);
-
-       mutex_init(&hdev->send_cpu_message_lock);
-       mutex_init(&hdev->debug_lock);
-       mutex_init(&hdev->mmu_cache_lock);
-       INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
-       spin_lock_init(&hdev->hw_queues_mirror_lock);
-       INIT_LIST_HEAD(&hdev->fpriv_list);
-       mutex_init(&hdev->fpriv_list_lock);
-       atomic_set(&hdev->in_reset, 0);
-
-       return 0;
-
-free_chip_info:
-       kfree(hdev->hl_chip_info);
-free_eq_wq:
-       destroy_workqueue(hdev->eq_wq);
-free_cq_wq:
-       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
-               if (hdev->cq_wq[i])
-                       destroy_workqueue(hdev->cq_wq[i]);
-       kfree(hdev->cq_wq);
-asid_fini:
-       hl_asid_fini(hdev);
-early_fini:
-       if (hdev->asic_funcs->early_fini)
-               hdev->asic_funcs->early_fini(hdev);
-
-       return rc;
-}
-
-/*
- * device_early_fini - finalize all that was done in device_early_init
- *
- * @hdev: pointer to habanalabs device structure
- *
- */
-static void device_early_fini(struct hl_device *hdev)
-{
-       int i;
-
-       mutex_destroy(&hdev->mmu_cache_lock);
-       mutex_destroy(&hdev->debug_lock);
-       mutex_destroy(&hdev->send_cpu_message_lock);
-
-       mutex_destroy(&hdev->fpriv_list_lock);
-
-       hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
-
-       kfree(hdev->idle_busy_ts_arr);
-       kfree(hdev->hl_chip_info);
-
-       destroy_workqueue(hdev->eq_wq);
-
-       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
-               destroy_workqueue(hdev->cq_wq[i]);
-       kfree(hdev->cq_wq);
-
-       hl_asid_fini(hdev);
-
-       if (hdev->asic_funcs->early_fini)
-               hdev->asic_funcs->early_fini(hdev);
-}
-
-static void set_freq_to_low_job(struct work_struct *work)
-{
-       struct hl_device *hdev = container_of(work, struct hl_device,
-                                               work_freq.work);
-
-       mutex_lock(&hdev->fpriv_list_lock);
-
-       if (!hdev->compute_ctx)
-               hl_device_set_frequency(hdev, PLL_LOW);
-
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       schedule_delayed_work(&hdev->work_freq,
-                       usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
-}
-
-static void hl_device_heartbeat(struct work_struct *work)
-{
-       struct hl_device *hdev = container_of(work, struct hl_device,
-                                               work_heartbeat.work);
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               goto reschedule;
-
-       if (!hdev->asic_funcs->send_heartbeat(hdev))
-               goto reschedule;
-
-       dev_err(hdev->dev, "Device heartbeat failed!\n");
-       hl_device_reset(hdev, true, false);
-
-       return;
-
-reschedule:
-       schedule_delayed_work(&hdev->work_heartbeat,
-                       usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
-}
-
-/*
- * device_late_init - do late stuff initialization for the habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Do stuff that either needs the device H/W queues to be active or needs
- * to happen after all the rest of the initialization is finished
- */
-static int device_late_init(struct hl_device *hdev)
-{
-       int rc;
-
-       if (hdev->asic_funcs->late_init) {
-               rc = hdev->asic_funcs->late_init(hdev);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "failed late initialization for the H/W\n");
-                       return rc;
-               }
-       }
-
-       hdev->high_pll = hdev->asic_prop.high_pll;
-
-       /* force setting to low frequency */
-       hdev->curr_pll_profile = PLL_LOW;
-
-       if (hdev->pm_mng_profile == PM_AUTO)
-               hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
-       else
-               hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
-
-       INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
-       schedule_delayed_work(&hdev->work_freq,
-       usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
-
-       if (hdev->heartbeat) {
-               INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
-               schedule_delayed_work(&hdev->work_heartbeat,
-                               usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
-       }
-
-       hdev->late_init_done = true;
-
-       return 0;
-}
-
-/*
- * device_late_fini - finalize all that was done in device_late_init
- *
- * @hdev: pointer to habanalabs device structure
- *
- */
-static void device_late_fini(struct hl_device *hdev)
-{
-       if (!hdev->late_init_done)
-               return;
-
-       cancel_delayed_work_sync(&hdev->work_freq);
-       if (hdev->heartbeat)
-               cancel_delayed_work_sync(&hdev->work_heartbeat);
-
-       if (hdev->asic_funcs->late_fini)
-               hdev->asic_funcs->late_fini(hdev);
-
-       hdev->late_init_done = false;
-}
-
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
-{
-       struct hl_device_idle_busy_ts *ts;
-       ktime_t zero_ktime, curr = ktime_get();
-       u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
-       s64 period_us, last_start_us, last_end_us, last_busy_time_us,
-               total_busy_time_us = 0, total_busy_time_ms;
-
-       zero_ktime = ktime_set(0, 0);
-       period_us = period_ms * USEC_PER_MSEC;
-       ts = &hdev->idle_busy_ts_arr[last_index];
-
-       /* check case that device is currently in idle */
-       if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
-                       !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
-
-               last_index--;
-               /* Handle case idle_busy_ts_idx was 0 */
-               if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
-                       last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
-
-               ts = &hdev->idle_busy_ts_arr[last_index];
-       }
-
-       while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
-               /* Check if we are in last sample case. i.e. if the sample
-                * begun before the sampling period. This could be a real
-                * sample or 0 so need to handle both cases
-                */
-               last_start_us = ktime_to_us(
-                               ktime_sub(curr, ts->idle_to_busy_ts));
-
-               if (last_start_us > period_us) {
-
-                       /* First check two cases:
-                        * 1. If the device is currently busy
-                        * 2. If the device was idle during the whole sampling
-                        *    period
-                        */
-
-                       if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
-                               /* Check if the device is currently busy */
-                               if (ktime_compare(ts->idle_to_busy_ts,
-                                               zero_ktime))
-                                       return 100;
-
-                               /* We either didn't have any activity or we
-                                * reached an entry which is 0. Either way,
-                                * exit and return what was accumulated so far
-                                */
-                               break;
-                       }
-
-                       /* If sample has finished, check it is relevant */
-                       last_end_us = ktime_to_us(
-                                       ktime_sub(curr, ts->busy_to_idle_ts));
-
-                       if (last_end_us > period_us)
-                               break;
-
-                       /* It is relevant so add it but with adjustment */
-                       last_busy_time_us = ktime_to_us(
-                                               ktime_sub(ts->busy_to_idle_ts,
-                                               ts->idle_to_busy_ts));
-                       total_busy_time_us += last_busy_time_us -
-                                       (last_start_us - period_us);
-                       break;
-               }
-
-               /* Check if the sample is finished or still open */
-               if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
-                       last_busy_time_us = ktime_to_us(
-                                               ktime_sub(ts->busy_to_idle_ts,
-                                               ts->idle_to_busy_ts));
-               else
-                       last_busy_time_us = ktime_to_us(
-                                       ktime_sub(curr, ts->idle_to_busy_ts));
-
-               total_busy_time_us += last_busy_time_us;
-
-               last_index--;
-               /* Handle case idle_busy_ts_idx was 0 */
-               if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
-                       last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
-
-               ts = &hdev->idle_busy_ts_arr[last_index];
-
-               overlap_cnt++;
-       }
-
-       total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
-                                               USEC_PER_MSEC);
-
-       return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
-}
-
-/*
- * hl_device_set_frequency - set the frequency of the device
- *
- * @hdev: pointer to habanalabs device structure
- * @freq: the new frequency value
- *
- * Change the frequency if needed. This function has no protection against
- * concurrency, therefore it is assumed that the calling function has protected
- * itself against the case of calling this function from multiple threads with
- * different values
- *
- * Returns 0 if no change was done, otherwise returns 1
- */
-int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
-{
-       if ((hdev->pm_mng_profile == PM_MANUAL) ||
-                       (hdev->curr_pll_profile == freq))
-               return 0;
-
-       dev_dbg(hdev->dev, "Changing device frequency to %s\n",
-               freq == PLL_HIGH ? "high" : "low");
-
-       hdev->asic_funcs->set_pll_profile(hdev, freq);
-
-       hdev->curr_pll_profile = freq;
-
-       return 1;
-}
-
-int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
-{
-       int rc = 0;
-
-       mutex_lock(&hdev->debug_lock);
-
-       if (!enable) {
-               if (!hdev->in_debug) {
-                       dev_err(hdev->dev,
-                               "Failed to disable debug mode because device was not in debug mode\n");
-                       rc = -EFAULT;
-                       goto out;
-               }
-
-               if (!hdev->hard_reset_pending)
-                       hdev->asic_funcs->halt_coresight(hdev);
-
-               hdev->in_debug = 0;
-
-               if (!hdev->hard_reset_pending)
-                       hdev->asic_funcs->enable_clock_gating(hdev);
-
-               goto out;
-       }
-
-       if (hdev->in_debug) {
-               dev_err(hdev->dev,
-                       "Failed to enable debug mode because device is already in debug mode\n");
-               rc = -EFAULT;
-               goto out;
-       }
-
-       hdev->asic_funcs->disable_clock_gating(hdev);
-       hdev->in_debug = 1;
-
-out:
-       mutex_unlock(&hdev->debug_lock);
-
-       return rc;
-}
-
-/*
- * hl_device_suspend - initiate device suspend
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Puts the hw in the suspend state (all asics).
- * Returns 0 for success or an error on failure.
- * Called at driver suspend.
- */
-int hl_device_suspend(struct hl_device *hdev)
-{
-       int rc;
-
-       pci_save_state(hdev->pdev);
-
-       /* Block future CS/VM/JOB completion operations */
-       rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
-       if (rc) {
-               dev_err(hdev->dev, "Can't suspend while in reset\n");
-               return -EIO;
-       }
-
-       /* This blocks all other stuff that is not blocked by in_reset */
-       hdev->disabled = true;
-
-       /*
-        * Flush anyone that is inside the critical section of enqueue
-        * jobs to the H/W
-        */
-       hdev->asic_funcs->hw_queues_lock(hdev);
-       hdev->asic_funcs->hw_queues_unlock(hdev);
-
-       /* Flush processes that are sending message to CPU */
-       mutex_lock(&hdev->send_cpu_message_lock);
-       mutex_unlock(&hdev->send_cpu_message_lock);
-
-       rc = hdev->asic_funcs->suspend(hdev);
-       if (rc)
-               dev_err(hdev->dev,
-                       "Failed to disable PCI access of device CPU\n");
-
-       /* Shut down the device */
-       pci_disable_device(hdev->pdev);
-       pci_set_power_state(hdev->pdev, PCI_D3hot);
-
-       return 0;
-}
-
-/*
- * hl_device_resume - initiate device resume
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Bring the hw back to operating state (all asics).
- * Returns 0 for success or an error on failure.
- * Called at driver resume.
- */
-int hl_device_resume(struct hl_device *hdev)
-{
-       int rc;
-
-       pci_set_power_state(hdev->pdev, PCI_D0);
-       pci_restore_state(hdev->pdev);
-       rc = pci_enable_device_mem(hdev->pdev);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to enable PCI device in resume\n");
-               return rc;
-       }
-
-       pci_set_master(hdev->pdev);
-
-       rc = hdev->asic_funcs->resume(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to resume device after suspend\n");
-               goto disable_device;
-       }
-
-
-       hdev->disabled = false;
-       atomic_set(&hdev->in_reset, 0);
-
-       rc = hl_device_reset(hdev, true, false);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to reset device during resume\n");
-               goto disable_device;
-       }
-
-       return 0;
-
-disable_device:
-       pci_clear_master(hdev->pdev);
-       pci_disable_device(hdev->pdev);
-
-       return rc;
-}
-
-static int device_kill_open_processes(struct hl_device *hdev)
-{
-       u16 pending_total, pending_cnt;
-       struct hl_fpriv *hpriv;
-       struct task_struct *task = NULL;
-
-       if (hdev->pldm)
-               pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
-       else
-               pending_total = HL_PENDING_RESET_PER_SEC;
-
-       /* Giving time for user to close FD, and for processes that are inside
-        * hl_device_open to finish
-        */
-       if (!list_empty(&hdev->fpriv_list))
-               ssleep(1);
-
-       mutex_lock(&hdev->fpriv_list_lock);
-
-       /* This section must be protected because we are dereferencing
-        * pointers that are freed if the process exits
-        */
-       list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
-               task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
-               if (task) {
-                       dev_info(hdev->dev, "Killing user process pid=%d\n",
-                               task_pid_nr(task));
-                       send_sig(SIGKILL, task, 1);
-                       usleep_range(1000, 10000);
-
-                       put_task_struct(task);
-               }
-       }
-
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       /* We killed the open users, but because the driver cleans up after the
-        * user contexts are closed (e.g. mmu mappings), we need to wait again
-        * to make sure the cleaning phase is finished before continuing with
-        * the reset
-        */
-
-       pending_cnt = pending_total;
-
-       while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
-               dev_info(hdev->dev,
-                       "Waiting for all unmap operations to finish before hard reset\n");
-
-               pending_cnt--;
-
-               ssleep(1);
-       }
-
-       return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
-}
-
-static void device_hard_reset_pending(struct work_struct *work)
-{
-       struct hl_device_reset_work *device_reset_work =
-               container_of(work, struct hl_device_reset_work, reset_work);
-       struct hl_device *hdev = device_reset_work->hdev;
-
-       hl_device_reset(hdev, true, true);
-
-       kfree(device_reset_work);
-}
-
-/*
- * hl_device_reset - reset the device
- *
- * @hdev: pointer to habanalabs device structure
- * @hard_reset: should we do hard reset to all engines or just reset the
- *              compute/dma engines
- * @from_hard_reset_thread: is the caller the hard-reset thread
- *
- * Block future CS and wait for pending CS to be enqueued
- * Call ASIC H/W fini
- * Flush all completions
- * Re-initialize all internal data structures
- * Call ASIC H/W init, late_init
- * Test queues
- * Enable device
- *
- * Returns 0 for success or an error on failure.
- */
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
-                       bool from_hard_reset_thread)
-{
-       int i, rc;
-
-       if (!hdev->init_done) {
-               dev_err(hdev->dev,
-                       "Can't reset before initialization is done\n");
-               return 0;
-       }
-
-       if ((!hard_reset) && (!hdev->supports_soft_reset)) {
-               dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
-               hard_reset = true;
-       }
-
-       /*
-        * Prevent concurrency in this function - only one reset should be
-        * done at any given time. Only need to perform this if we didn't
-        * get from the dedicated hard reset thread
-        */
-       if (!from_hard_reset_thread) {
-               /* Block future CS/VM/JOB completion operations */
-               rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
-               if (rc)
-                       return 0;
-
-               if (hard_reset) {
-                       /* Disable PCI access from device F/W so he won't send
-                        * us additional interrupts. We disable MSI/MSI-X at
-                        * the halt_engines function and we can't have the F/W
-                        * sending us interrupts after that. We need to disable
-                        * the access here because if the device is marked
-                        * disable, the message won't be send. Also, in case
-                        * of heartbeat, the device CPU is marked as disable
-                        * so this message won't be sent
-                        */
-                       if (hl_fw_send_pci_access_msg(hdev,
-                                       ARMCP_PACKET_DISABLE_PCI_ACCESS))
-                               dev_warn(hdev->dev,
-                                       "Failed to disable PCI access by F/W\n");
-               }
-
-               /* This also blocks future CS/VM/JOB completion operations */
-               hdev->disabled = true;
-
-               /* Flush anyone that is inside the critical section of enqueue
-                * jobs to the H/W
-                */
-               hdev->asic_funcs->hw_queues_lock(hdev);
-               hdev->asic_funcs->hw_queues_unlock(hdev);
-
-               /* Flush anyone that is inside device open */
-               mutex_lock(&hdev->fpriv_list_lock);
-               mutex_unlock(&hdev->fpriv_list_lock);
-
-               dev_err(hdev->dev, "Going to RESET device!\n");
-       }
-
-again:
-       if ((hard_reset) && (!from_hard_reset_thread)) {
-               struct hl_device_reset_work *device_reset_work;
-
-               hdev->hard_reset_pending = true;
-
-               device_reset_work = kzalloc(sizeof(*device_reset_work),
-                                               GFP_ATOMIC);
-               if (!device_reset_work) {
-                       rc = -ENOMEM;
-                       goto out_err;
-               }
-
-               /*
-                * Because the reset function can't run from interrupt or
-                * from heartbeat work, we need to call the reset function
-                * from a dedicated work
-                */
-               INIT_WORK(&device_reset_work->reset_work,
-                               device_hard_reset_pending);
-               device_reset_work->hdev = hdev;
-               schedule_work(&device_reset_work->reset_work);
-
-               return 0;
-       }
-
-       if (hard_reset) {
-               device_late_fini(hdev);
-
-               /*
-                * Now that the heartbeat thread is closed, flush processes
-                * which are sending messages to CPU
-                */
-               mutex_lock(&hdev->send_cpu_message_lock);
-               mutex_unlock(&hdev->send_cpu_message_lock);
-       }
-
-       /*
-        * Halt the engines and disable interrupts so we won't get any more
-        * completions from H/W and we won't have any accesses from the
-        * H/W to the host machine
-        */
-       hdev->asic_funcs->halt_engines(hdev, hard_reset);
-
-       /* Go over all the queues, release all CS and their jobs */
-       hl_cs_rollback_all(hdev);
-
-       if (hard_reset) {
-               /* Kill processes here after CS rollback. This is because the
-                * process can't really exit until all its CSs are done, which
-                * is what we do in cs rollback
-                */
-               rc = device_kill_open_processes(hdev);
-               if (rc) {
-                       dev_crit(hdev->dev,
-                               "Failed to kill all open processes, stopping hard reset\n");
-                       goto out_err;
-               }
-
-               /* Flush the Event queue workers to make sure no other thread is
-                * reading or writing to registers during the reset
-                */
-               flush_workqueue(hdev->eq_wq);
-       }
-
-       /* Release kernel context */
-       if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
-               hdev->kernel_ctx = NULL;
-
-       /* Reset the H/W. It will be in idle state after this returns */
-       hdev->asic_funcs->hw_fini(hdev, hard_reset);
-
-       if (hard_reset) {
-               hl_vm_fini(hdev);
-               hl_mmu_fini(hdev);
-               hl_eq_reset(hdev, &hdev->event_queue);
-       }
-
-       /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
-       hl_hw_queue_reset(hdev, hard_reset);
-       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
-               hl_cq_reset(hdev, &hdev->completion_queue[i]);
-
-       hdev->idle_busy_ts_idx = 0;
-       hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
-       hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
-
-       if (hdev->cs_active_cnt)
-               dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
-                       hdev->cs_active_cnt);
-
-       mutex_lock(&hdev->fpriv_list_lock);
-
-       /* Make sure the context switch phase will run again */
-       if (hdev->compute_ctx) {
-               atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
-               hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
-       }
-
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       /* Finished tear-down, starting to re-initialize */
-
-       if (hard_reset) {
-               hdev->device_cpu_disabled = false;
-               hdev->hard_reset_pending = false;
-
-               if (hdev->kernel_ctx) {
-                       dev_crit(hdev->dev,
-                               "kernel ctx was alive during hard reset, something is terribly wrong\n");
-                       rc = -EBUSY;
-                       goto out_err;
-               }
-
-               rc = hl_mmu_init(hdev);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed to initialize MMU S/W after hard reset\n");
-                       goto out_err;
-               }
-
-               /* Allocate the kernel context */
-               hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
-                                               GFP_KERNEL);
-               if (!hdev->kernel_ctx) {
-                       rc = -ENOMEM;
-                       goto out_err;
-               }
-
-               hdev->compute_ctx = NULL;
-
-               rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "failed to init kernel ctx in hard reset\n");
-                       kfree(hdev->kernel_ctx);
-                       hdev->kernel_ctx = NULL;
-                       goto out_err;
-               }
-       }
-
-       rc = hdev->asic_funcs->hw_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "failed to initialize the H/W after reset\n");
-               goto out_err;
-       }
-
-       hdev->disabled = false;
-
-       /* Check that the communication with the device is working */
-       rc = hdev->asic_funcs->test_queues(hdev);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to detect if device is alive after reset\n");
-               goto out_err;
-       }
-
-       if (hard_reset) {
-               rc = device_late_init(hdev);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed late init after hard reset\n");
-                       goto out_err;
-               }
-
-               rc = hl_vm_init(hdev);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed to init memory module after hard reset\n");
-                       goto out_err;
-               }
-
-               hl_set_max_power(hdev, hdev->max_power);
-       } else {
-               rc = hdev->asic_funcs->soft_reset_late_init(hdev);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed late init after soft reset\n");
-                       goto out_err;
-               }
-       }
-
-       atomic_set(&hdev->in_reset, 0);
-
-       if (hard_reset)
-               hdev->hard_reset_cnt++;
-       else
-               hdev->soft_reset_cnt++;
-
-       dev_warn(hdev->dev, "Successfully finished resetting the device\n");
-
-       return 0;
-
-out_err:
-       hdev->disabled = true;
-
-       if (hard_reset) {
-               dev_err(hdev->dev,
-                       "Failed to reset! Device is NOT usable\n");
-               hdev->hard_reset_cnt++;
-       } else {
-               dev_err(hdev->dev,
-                       "Failed to do soft-reset, trying hard reset\n");
-               hdev->soft_reset_cnt++;
-               hard_reset = true;
-               goto again;
-       }
-
-       atomic_set(&hdev->in_reset, 0);
-
-       return rc;
-}
-
-/*
- * hl_device_init - main initialization function for habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Allocate an id for the device, do early initialization and then call the
- * ASIC specific initialization functions. Finally, create the cdev and the
- * Linux device to expose it to the user
- */
-int hl_device_init(struct hl_device *hdev, struct class *hclass)
-{
-       int i, rc, cq_cnt, cq_ready_cnt;
-       char *name;
-       bool add_cdev_sysfs_on_err = false;
-
-       name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
-       if (!name) {
-               rc = -ENOMEM;
-               goto out_disabled;
-       }
-
-       /* Initialize cdev and device structures */
-       rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
-                               &hdev->cdev, &hdev->dev);
-
-       kfree(name);
-
-       if (rc)
-               goto out_disabled;
-
-       name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
-       if (!name) {
-               rc = -ENOMEM;
-               goto free_dev;
-       }
-
-       /* Initialize cdev and device structures for control device */
-       rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
-                               name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
-
-       kfree(name);
-
-       if (rc)
-               goto free_dev;
-
-       /* Initialize ASIC function pointers and perform early init */
-       rc = device_early_init(hdev);
-       if (rc)
-               goto free_dev_ctrl;
-
-       /*
-        * Start calling ASIC initialization. First S/W then H/W and finally
-        * late init
-        */
-       rc = hdev->asic_funcs->sw_init(hdev);
-       if (rc)
-               goto early_fini;
-
-       /*
-        * Initialize the H/W queues. Must be done before hw_init, because
-        * there the addresses of the kernel queue are being written to the
-        * registers of the device
-        */
-       rc = hl_hw_queues_create(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "failed to initialize kernel queues\n");
-               goto sw_fini;
-       }
-
-       cq_cnt = hdev->asic_prop.completion_queues_count;
-
-       /*
-        * Initialize the completion queues. Must be done before hw_init,
-        * because there the addresses of the completion queues are being
-        * passed as arguments to request_irq
-        */
-       if (cq_cnt) {
-               hdev->completion_queue = kcalloc(cq_cnt,
-                               sizeof(*hdev->completion_queue),
-                               GFP_KERNEL);
-
-               if (!hdev->completion_queue) {
-                       dev_err(hdev->dev,
-                               "failed to allocate completion queues\n");
-                       rc = -ENOMEM;
-                       goto hw_queues_destroy;
-               }
-       }
-
-       for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
-               rc = hl_cq_init(hdev, &hdev->completion_queue[i],
-                               hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "failed to initialize completion queue\n");
-                       goto cq_fini;
-               }
-               hdev->completion_queue[i].cq_idx = i;
-       }
-
-       /*
-        * Initialize the event queue. Must be done before hw_init,
-        * because there the address of the event queue is being
-        * passed as argument to request_irq
-        */
-       rc = hl_eq_init(hdev, &hdev->event_queue);
-       if (rc) {
-               dev_err(hdev->dev, "failed to initialize event queue\n");
-               goto cq_fini;
-       }
-
-       /* MMU S/W must be initialized before kernel context is created */
-       rc = hl_mmu_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
-               goto eq_fini;
-       }
-
-       /* Allocate the kernel context */
-       hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
-       if (!hdev->kernel_ctx) {
-               rc = -ENOMEM;
-               goto mmu_fini;
-       }
-
-       hdev->compute_ctx = NULL;
-
-       rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
-       if (rc) {
-               dev_err(hdev->dev, "failed to initialize kernel context\n");
-               kfree(hdev->kernel_ctx);
-               goto mmu_fini;
-       }
-
-       rc = hl_cb_pool_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "failed to initialize CB pool\n");
-               goto release_ctx;
-       }
-
-       hl_debugfs_add_device(hdev);
-
-       if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-               dev_info(hdev->dev,
-                       "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->halt_engines(hdev, true);
-               hdev->asic_funcs->hw_fini(hdev, true);
-       }
-
-       /*
-        * From this point, in case of an error, add char devices and create
-        * sysfs nodes as part of the error flow, to allow debugging.
-        */
-       add_cdev_sysfs_on_err = true;
-
-       rc = hdev->asic_funcs->hw_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "failed to initialize the H/W\n");
-               rc = 0;
-               goto out_disabled;
-       }
-
-       hdev->disabled = false;
-
-       /* Check that the communication with the device is working */
-       rc = hdev->asic_funcs->test_queues(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to detect if device is alive\n");
-               rc = 0;
-               goto out_disabled;
-       }
-
-       rc = device_late_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed late initialization\n");
-               rc = 0;
-               goto out_disabled;
-       }
-
-       dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
-               hdev->asic_name,
-               hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
-
-       rc = hl_vm_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to initialize memory module\n");
-               rc = 0;
-               goto out_disabled;
-       }
-
-       /*
-        * Expose devices and sysfs nodes to user.
-        * From here there is no need to add char devices and create sysfs nodes
-        * in case of an error.
-        */
-       add_cdev_sysfs_on_err = false;
-       rc = device_cdev_sysfs_add(hdev);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to add char devices and sysfs nodes\n");
-               rc = 0;
-               goto out_disabled;
-       }
-
-       /*
-        * hl_hwmon_init() must be called after device_late_init(), because only
-        * there we get the information from the device about which
-        * hwmon-related sensors the device supports.
-        * Furthermore, it must be done after adding the device to the system.
-        */
-       rc = hl_hwmon_init(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to initialize hwmon\n");
-               rc = 0;
-               goto out_disabled;
-       }
-
-       dev_notice(hdev->dev,
-               "Successfully added device to habanalabs driver\n");
-
-       hdev->init_done = true;
-
-       return 0;
-
-release_ctx:
-       if (hl_ctx_put(hdev->kernel_ctx) != 1)
-               dev_err(hdev->dev,
-                       "kernel ctx is still alive on initialization failure\n");
-mmu_fini:
-       hl_mmu_fini(hdev);
-eq_fini:
-       hl_eq_fini(hdev, &hdev->event_queue);
-cq_fini:
-       for (i = 0 ; i < cq_ready_cnt ; i++)
-               hl_cq_fini(hdev, &hdev->completion_queue[i]);
-       kfree(hdev->completion_queue);
-hw_queues_destroy:
-       hl_hw_queues_destroy(hdev);
-sw_fini:
-       hdev->asic_funcs->sw_fini(hdev);
-early_fini:
-       device_early_fini(hdev);
-free_dev_ctrl:
-       kfree(hdev->dev_ctrl);
-free_dev:
-       kfree(hdev->dev);
-out_disabled:
-       hdev->disabled = true;
-       if (add_cdev_sysfs_on_err)
-               device_cdev_sysfs_add(hdev);
-       if (hdev->pdev)
-               dev_err(&hdev->pdev->dev,
-                       "Failed to initialize hl%d. Device is NOT usable !\n",
-                       hdev->id / 2);
-       else
-               pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
-                       hdev->id / 2);
-
-       return rc;
-}
-
-/*
- * hl_device_fini - main tear-down function for habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Destroy the device, call ASIC fini functions and release the id
- */
-void hl_device_fini(struct hl_device *hdev)
-{
-       int i, rc;
-       ktime_t timeout;
-
-       dev_info(hdev->dev, "Removing device\n");
-
-       /*
-        * This function is competing with the reset function, so try to
-        * take the reset atomic and if we are already in middle of reset,
-        * wait until reset function is finished. Reset function is designed
-        * to always finish. However, in Gaudi, because of all the network
-        * ports, the hard reset could take between 10-30 seconds
-        */
-
-       timeout = ktime_add_us(ktime_get(),
-                               HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
-       rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
-       while (rc) {
-               usleep_range(50, 200);
-               rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
-               if (ktime_compare(ktime_get(), timeout) > 0) {
-                       WARN(1, "Failed to remove device because reset function did not finish\n");
-                       return;
-               }
-       }
-
-       /* Mark device as disabled */
-       hdev->disabled = true;
-
-       /* Flush anyone that is inside the critical section of enqueue
-        * jobs to the H/W
-        */
-       hdev->asic_funcs->hw_queues_lock(hdev);
-       hdev->asic_funcs->hw_queues_unlock(hdev);
-
-       /* Flush anyone that is inside device open */
-       mutex_lock(&hdev->fpriv_list_lock);
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       hdev->hard_reset_pending = true;
-
-       hl_hwmon_fini(hdev);
-
-       device_late_fini(hdev);
-
-       hl_debugfs_remove_device(hdev);
-
-       /*
-        * Halt the engines and disable interrupts so we won't get any more
-        * completions from H/W and we won't have any accesses from the
-        * H/W to the host machine
-        */
-       hdev->asic_funcs->halt_engines(hdev, true);
-
-       /* Go over all the queues, release all CS and their jobs */
-       hl_cs_rollback_all(hdev);
-
-       /* Kill processes here after CS rollback. This is because the process
-        * can't really exit until all its CSs are done, which is what we
-        * do in cs rollback
-        */
-       rc = device_kill_open_processes(hdev);
-       if (rc)
-               dev_crit(hdev->dev, "Failed to kill all open processes\n");
-
-       hl_cb_pool_fini(hdev);
-
-       /* Release kernel context */
-       if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
-               dev_err(hdev->dev, "kernel ctx is still alive\n");
-
-       /* Reset the H/W. It will be in idle state after this returns */
-       hdev->asic_funcs->hw_fini(hdev, true);
-
-       hl_vm_fini(hdev);
-
-       hl_mmu_fini(hdev);
-
-       hl_eq_fini(hdev, &hdev->event_queue);
-
-       for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
-               hl_cq_fini(hdev, &hdev->completion_queue[i]);
-       kfree(hdev->completion_queue);
-
-       hl_hw_queues_destroy(hdev);
-
-       /* Call ASIC S/W finalize function */
-       hdev->asic_funcs->sw_fini(hdev);
-
-       device_early_fini(hdev);
-
-       /* Hide devices and sysfs nodes from user */
-       device_cdev_sysfs_del(hdev);
-
-       pr_info("removed device successfully\n");
-}
-
-/*
- * MMIO register access helper functions.
- */
-
-/*
- * hl_rreg - Read an MMIO register
- *
- * @hdev: pointer to habanalabs device structure
- * @reg: MMIO register offset (in bytes)
- *
- * Returns the value of the MMIO register we are asked to read
- *
- */
-inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
-{
-       return readl(hdev->rmmio + reg);
-}
-
-/*
- * hl_wreg - Write to an MMIO register
- *
- * @hdev: pointer to habanalabs device structure
- * @reg: MMIO register offset (in bytes)
- * @val: 32-bit value
- *
- * Writes the 32-bit value into the MMIO register
- *
- */
-inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
-{
-       writel(val, hdev->rmmio + reg);
-}
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
deleted file mode 100644 (file)
index 3be1549..0000000
+++ /dev/null
@@ -1,589 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hl_boot_if.h"
-
-#include <linux/firmware.h>
-#include <linux/genalloc.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/slab.h>
-
-/**
- * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
- *
- * @hdev: pointer to hl_device structure.
- * @fw_name: the firmware image name
- * @dst: IO memory mapped address space to copy firmware to
- *
- * Copy fw code from firmware file to device memory.
- *
- * Return: 0 on success, non-zero for failure.
- */
-int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
-                               void __iomem *dst)
-{
-       const struct firmware *fw;
-       const u64 *fw_data;
-       size_t fw_size;
-       int rc;
-
-       rc = request_firmware(&fw, fw_name, hdev->dev);
-       if (rc) {
-               dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
-               goto out;
-       }
-
-       fw_size = fw->size;
-       if ((fw_size % 4) != 0) {
-               dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
-                       fw_name, fw_size);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
-
-       fw_data = (const u64 *) fw->data;
-
-       memcpy_toio(dst, fw_data, fw_size);
-
-out:
-       release_firmware(fw);
-       return rc;
-}
-
-int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
-{
-       struct armcp_packet pkt = {};
-
-       pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
-
-       return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
-                               sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
-}
-
-int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
-                               u16 len, u32 timeout, long *result)
-{
-       struct armcp_packet *pkt;
-       dma_addr_t pkt_dma_addr;
-       u32 tmp;
-       int rc = 0;
-
-       pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
-                                                               &pkt_dma_addr);
-       if (!pkt) {
-               dev_err(hdev->dev,
-                       "Failed to allocate DMA memory for packet to CPU\n");
-               return -ENOMEM;
-       }
-
-       memcpy(pkt, msg, len);
-
-       mutex_lock(&hdev->send_cpu_message_lock);
-
-       if (hdev->disabled)
-               goto out;
-
-       if (hdev->device_cpu_disabled) {
-               rc = -EIO;
-               goto out;
-       }
-
-       rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
-               goto out;
-       }
-
-       rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
-                               (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
-                               timeout, true);
-
-       hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
-
-       if (rc == -ETIMEDOUT) {
-               dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
-               hdev->device_cpu_disabled = true;
-               goto out;
-       }
-
-       tmp = le32_to_cpu(pkt->ctl);
-
-       rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
-       if (rc) {
-               dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
-                       rc,
-                       (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
-                                               >> ARMCP_PKT_CTL_OPCODE_SHIFT);
-               rc = -EIO;
-       } else if (result) {
-               *result = (long) le64_to_cpu(pkt->result);
-       }
-
-out:
-       mutex_unlock(&hdev->send_cpu_message_lock);
-
-       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
-
-       return rc;
-}
-
-int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
-{
-       struct armcp_packet pkt;
-       long result;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.value = cpu_to_le64(event_type);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                       HL_DEVICE_TIMEOUT_USEC, &result);
-
-       if (rc)
-               dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
-
-       return rc;
-}
-
-int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
-               size_t irq_arr_size)
-{
-       struct armcp_unmask_irq_arr_packet *pkt;
-       size_t total_pkt_size;
-       long result;
-       int rc;
-
-       total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
-                       irq_arr_size;
-
-       /* data should be aligned to 8 bytes in order to ArmCP to copy it */
-       total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
-
-       /* total_pkt_size is casted to u16 later on */
-       if (total_pkt_size > USHRT_MAX) {
-               dev_err(hdev->dev, "too many elements in IRQ array\n");
-               return -EINVAL;
-       }
-
-       pkt = kzalloc(total_pkt_size, GFP_KERNEL);
-       if (!pkt)
-               return -ENOMEM;
-
-       pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
-       memcpy(&pkt->irqs, irq_arr, irq_arr_size);
-
-       pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
-                                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
-                       total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
-
-       if (rc)
-               dev_err(hdev->dev, "failed to unmask IRQ array\n");
-
-       kfree(pkt);
-
-       return rc;
-}
-
-int hl_fw_test_cpu_queue(struct hl_device *hdev)
-{
-       struct armcp_packet test_pkt = {};
-       long result;
-       int rc;
-
-       test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
-                                       ARMCP_PKT_CTL_OPCODE_SHIFT);
-       test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
-                       sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
-
-       if (!rc) {
-               if (result != ARMCP_PACKET_FENCE_VAL)
-                       dev_err(hdev->dev,
-                               "CPU queue test failed (0x%08lX)\n", result);
-       } else {
-               dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
-       }
-
-       return rc;
-}
-
-void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
-                                               dma_addr_t *dma_handle)
-{
-       u64 kernel_addr;
-
-       kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
-
-       *dma_handle = hdev->cpu_accessible_dma_address +
-               (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
-
-       return (void *) (uintptr_t) kernel_addr;
-}
-
-void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
-                                       void *vaddr)
-{
-       gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
-                       size);
-}
-
-int hl_fw_send_heartbeat(struct hl_device *hdev)
-{
-       struct armcp_packet hb_pkt = {};
-       long result;
-       int rc;
-
-       hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
-                                       ARMCP_PKT_CTL_OPCODE_SHIFT);
-       hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
-                       sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
-
-       if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
-               rc = -EIO;
-
-       return rc;
-}
-
-int hl_fw_armcp_info_get(struct hl_device *hdev)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct armcp_packet pkt = {};
-       void *armcp_info_cpu_addr;
-       dma_addr_t armcp_info_dma_addr;
-       long result;
-       int rc;
-
-       armcp_info_cpu_addr =
-                       hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
-                                       sizeof(struct armcp_info),
-                                       &armcp_info_dma_addr);
-       if (!armcp_info_cpu_addr) {
-               dev_err(hdev->dev,
-                       "Failed to allocate DMA memory for ArmCP info packet\n");
-               return -ENOMEM;
-       }
-
-       memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.addr = cpu_to_le64(armcp_info_dma_addr);
-       pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       HL_ARMCP_INFO_TIMEOUT_USEC, &result);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to handle ArmCP info pkt, error %d\n", rc);
-               goto out;
-       }
-
-       memcpy(&prop->armcp_info, armcp_info_cpu_addr,
-                       sizeof(prop->armcp_info));
-
-       rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to build hwmon channel info, error %d\n", rc);
-               rc = -EFAULT;
-               goto out;
-       }
-
-out:
-       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
-                       sizeof(struct armcp_info), armcp_info_cpu_addr);
-
-       return rc;
-}
-
-int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
-{
-       struct armcp_packet pkt = {};
-       void *eeprom_info_cpu_addr;
-       dma_addr_t eeprom_info_dma_addr;
-       long result;
-       int rc;
-
-       eeprom_info_cpu_addr =
-                       hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
-                                       max_size, &eeprom_info_dma_addr);
-       if (!eeprom_info_cpu_addr) {
-               dev_err(hdev->dev,
-                       "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
-               return -ENOMEM;
-       }
-
-       memset(eeprom_info_cpu_addr, 0, max_size);
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
-       pkt.data_max_size = cpu_to_le32(max_size);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                       HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
-               goto out;
-       }
-
-       /* result contains the actual size */
-       memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
-
-out:
-       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
-                       eeprom_info_cpu_addr);
-
-       return rc;
-}
-
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
-{
-       u32 err_val;
-
-       /* Some of the firmware status codes are deprecated in newer f/w
-        * versions. In those versions, the errors are reported
-        * in different registers. Therefore, we need to check those
-        * registers and print the exact errors. Moreover, there
-        * may be multiple errors, so we need to report on each error
-        * separately. Some of the error codes might indicate a state
-        * that is not an error per-se, but it is an error in production
-        * environment
-        */
-       err_val = RREG32(boot_err0_reg);
-       if (!(err_val & CPU_BOOT_ERR0_ENABLED))
-               return;
-
-       if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
-               dev_err(hdev->dev,
-                       "Device boot error - DRAM initialization failed\n");
-       if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
-               dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
-       if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
-               dev_err(hdev->dev,
-                       "Device boot error - Thermal Sensor initialization failed\n");
-       if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
-               dev_warn(hdev->dev,
-                       "Device boot warning - Skipped DRAM initialization\n");
-       if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
-               dev_warn(hdev->dev,
-                       "Device boot error - Skipped waiting for BMC\n");
-       if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
-               dev_err(hdev->dev,
-                       "Device boot error - Serdes data from BMC not available\n");
-       if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
-               dev_err(hdev->dev,
-                       "Device boot error - NIC F/W initialization failed\n");
-}
-
-static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
-{
-       switch (status) {
-       case CPU_BOOT_STATUS_NA:
-               dev_err(hdev->dev,
-                       "Device boot error - BTL did NOT run\n");
-               break;
-       case CPU_BOOT_STATUS_IN_WFE:
-               dev_err(hdev->dev,
-                       "Device boot error - Stuck inside WFE loop\n");
-               break;
-       case CPU_BOOT_STATUS_IN_BTL:
-               dev_err(hdev->dev,
-                       "Device boot error - Stuck in BTL\n");
-               break;
-       case CPU_BOOT_STATUS_IN_PREBOOT:
-               dev_err(hdev->dev,
-                       "Device boot error - Stuck in Preboot\n");
-               break;
-       case CPU_BOOT_STATUS_IN_SPL:
-               dev_err(hdev->dev,
-                       "Device boot error - Stuck in SPL\n");
-               break;
-       case CPU_BOOT_STATUS_IN_UBOOT:
-               dev_err(hdev->dev,
-                       "Device boot error - Stuck in u-boot\n");
-               break;
-       case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
-               dev_err(hdev->dev,
-                       "Device boot error - DRAM initialization failed\n");
-               break;
-       case CPU_BOOT_STATUS_UBOOT_NOT_READY:
-               dev_err(hdev->dev,
-                       "Device boot error - u-boot stopped by user\n");
-               break;
-       case CPU_BOOT_STATUS_TS_INIT_FAIL:
-               dev_err(hdev->dev,
-                       "Device boot error - Thermal Sensor initialization failed\n");
-               break;
-       default:
-               dev_err(hdev->dev,
-                       "Device boot error - Invalid status code %d\n",
-                       status);
-               break;
-       }
-}
-
-int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
-                       u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
-                       u32 boot_err0_reg, bool skip_bmc,
-                       u32 cpu_timeout, u32 boot_fit_timeout)
-{
-       u32 status;
-       int rc;
-
-       dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
-               cpu_timeout / USEC_PER_SEC);
-
-       /* Wait for boot FIT request */
-       rc = hl_poll_timeout(
-               hdev,
-               cpu_boot_status_reg,
-               status,
-               status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
-               10000,
-               boot_fit_timeout);
-
-       if (rc) {
-               dev_dbg(hdev->dev,
-                       "No boot fit request received, resuming boot\n");
-       } else {
-               rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
-               if (rc)
-                       goto out;
-
-               /* Clear device CPU message status */
-               WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
-
-               /* Signal device CPU that boot loader is ready */
-               WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
-
-               /* Poll for CPU device ack */
-               rc = hl_poll_timeout(
-                       hdev,
-                       cpu_msg_status_reg,
-                       status,
-                       status == CPU_MSG_OK,
-                       10000,
-                       boot_fit_timeout);
-
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Timeout waiting for boot fit load ack\n");
-                       goto out;
-               }
-
-               /* Clear message */
-               WREG32(msg_to_cpu_reg, KMD_MSG_NA);
-       }
-
-       /* Make sure CPU boot-loader is running */
-       rc = hl_poll_timeout(
-               hdev,
-               cpu_boot_status_reg,
-               status,
-               (status == CPU_BOOT_STATUS_DRAM_RDY) ||
-               (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
-               (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
-               (status == CPU_BOOT_STATUS_SRAM_AVAIL),
-               10000,
-               cpu_timeout);
-
-       /* Read U-Boot, preboot versions now in case we will later fail */
-       hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
-       hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
-
-       /* Some of the status codes below are deprecated in newer f/w
-        * versions but we keep them here for backward compatibility
-        */
-       if (rc) {
-               hl_detect_cpu_boot_status(hdev, status);
-               rc = -EIO;
-               goto out;
-       }
-
-       if (!hdev->fw_loading) {
-               dev_info(hdev->dev, "Skip loading FW\n");
-               goto out;
-       }
-
-       if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
-               goto out;
-
-       dev_info(hdev->dev,
-               "Loading firmware to device, may take some time...\n");
-
-       rc = hdev->asic_funcs->load_firmware_to_device(hdev);
-       if (rc)
-               goto out;
-
-       if (skip_bmc) {
-               WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
-
-               rc = hl_poll_timeout(
-                       hdev,
-                       cpu_boot_status_reg,
-                       status,
-                       (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
-                       10000,
-                       cpu_timeout);
-
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Failed to get ACK on skipping BMC, %d\n",
-                               status);
-                       WREG32(msg_to_cpu_reg, KMD_MSG_NA);
-                       rc = -EIO;
-                       goto out;
-               }
-       }
-
-       WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
-
-       rc = hl_poll_timeout(
-               hdev,
-               cpu_boot_status_reg,
-               status,
-               (status == CPU_BOOT_STATUS_SRAM_AVAIL),
-               10000,
-               cpu_timeout);
-
-       /* Clear message */
-       WREG32(msg_to_cpu_reg, KMD_MSG_NA);
-
-       if (rc) {
-               if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
-                       dev_err(hdev->dev,
-                               "Device reports FIT image is corrupted\n");
-               else
-                       dev_err(hdev->dev,
-                               "Failed to load firmware to device, %d\n",
-                               status);
-
-               rc = -EIO;
-               goto out;
-       }
-
-       dev_info(hdev->dev, "Successfully loaded firmware to device\n");
-
-out:
-       fw_read_errors(hdev, boot_err0_reg);
-
-       return rc;
-}
index f802cdc..75104ae 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-subdir-ccflags-y += -I$(src)
+subdir-ccflags-y += -I$(src)/common
 
 HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
        gaudi/gaudi_coresight.o
index bdc5f96..a94ab6a 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
-#include "include/hl_boot_if.h"
+#include "include/common/hl_boot_if.h"
 #include "include/gaudi/gaudi_packets.h"
 #include "include/gaudi/gaudi.h"
 #include "include/gaudi/gaudi_async_events.h"
index 8265cc2..9e674cf 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
-#include "include/hl_boot_if.h"
+#include "include/common/hl_boot_if.h"
 #include "include/goya/goya_packets.h"
 #include "include/goya/goya.h"
 #include "include/goya/goya_async_events.h"
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
deleted file mode 100644 (file)
index 14def0d..0000000
+++ /dev/null
@@ -1,1948 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef HABANALABSP_H_
-#define HABANALABSP_H_
-
-#include "include/armcp_if.h"
-#include "include/qman_if.h"
-#include <uapi/misc/habanalabs.h>
-
-#include <linux/cdev.h>
-#include <linux/iopoll.h>
-#include <linux/irqreturn.h>
-#include <linux/dma-fence.h>
-#include <linux/dma-direction.h>
-#include <linux/scatterlist.h>
-#include <linux/hashtable.h>
-
-#define HL_NAME                                "habanalabs"
-
-#define HL_MMAP_CB_MASK                        (0x8000000000000000ull >> PAGE_SHIFT)
-
-#define HL_PENDING_RESET_PER_SEC       30
-
-#define HL_HARD_RESET_MAX_TIMEOUT      120
-
-#define HL_DEVICE_TIMEOUT_USEC         1000000 /* 1 s */
-
-#define HL_HEARTBEAT_PER_USEC          5000000 /* 5 s */
-
-#define HL_PLL_LOW_JOB_FREQ_USEC       5000000 /* 5 s */
-
-#define HL_ARMCP_INFO_TIMEOUT_USEC     10000000 /* 10s */
-#define HL_ARMCP_EEPROM_TIMEOUT_USEC   10000000 /* 10s */
-
-#define HL_PCI_ELBI_TIMEOUT_MSEC       10 /* 10ms */
-
-#define HL_SIM_MAX_TIMEOUT_US          10000000 /* 10s */
-
-#define HL_IDLE_BUSY_TS_ARR_SIZE       4096
-
-/* Memory */
-#define MEM_HASH_TABLE_BITS            7 /* 1 << 7 buckets */
-
-/* MMU */
-#define MMU_HASH_TABLE_BITS            7 /* 1 << 7 buckets */
-
-/*
- * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
- * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
- */
-#define HL_RSVD_SOBS                   4
-#define HL_RSVD_MONS                   2
-
-#define HL_RSVD_SOBS_IN_USE            2
-#define HL_RSVD_MONS_IN_USE            1
-
-#define HL_MAX_SOB_VAL                 (1 << 15)
-
-#define IS_POWER_OF_2(n)               (n != 0 && ((n & (n - 1)) == 0))
-#define IS_MAX_PENDING_CS_VALID(n)     (IS_POWER_OF_2(n) && (n > 1))
-
-#define HL_PCI_NUM_BARS                        6
-
-/**
- * struct pgt_info - MMU hop page info.
- * @node: hash linked-list node for the pgts shadow hash of pgts.
- * @phys_addr: physical address of the pgt.
- * @shadow_addr: shadow hop in the host.
- * @ctx: pointer to the owner ctx.
- * @num_of_ptes: indicates how many ptes are used in the pgt.
- *
- * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
- * is needed during mapping, a new page is allocated and this structure holds
- * its essential information. During unmapping, if no valid PTEs remained in the
- * page, it is freed with its pgt_info structure.
- */
-struct pgt_info {
-       struct hlist_node       node;
-       u64                     phys_addr;
-       u64                     shadow_addr;
-       struct hl_ctx           *ctx;
-       int                     num_of_ptes;
-};
-
-struct hl_device;
-struct hl_fpriv;
-
-/**
- * enum hl_pci_match_mode - pci match mode per region
- * @PCI_ADDRESS_MATCH_MODE: address match mode
- * @PCI_BAR_MATCH_MODE: bar match mode
- */
-enum hl_pci_match_mode {
-       PCI_ADDRESS_MATCH_MODE,
-       PCI_BAR_MATCH_MODE
-};
-
-/**
- * enum hl_fw_component - F/W components to read version through registers.
- * @FW_COMP_UBOOT: u-boot.
- * @FW_COMP_PREBOOT: preboot.
- */
-enum hl_fw_component {
-       FW_COMP_UBOOT,
-       FW_COMP_PREBOOT
-};
-
-/**
- * enum hl_queue_type - Supported QUEUE types.
- * @QUEUE_TYPE_NA: queue is not available.
- * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
- *                  host.
- * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
- *                     memories and/or operates the compute engines.
- * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
- * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
- *                 notifications are sent by H/W.
- */
-enum hl_queue_type {
-       QUEUE_TYPE_NA,
-       QUEUE_TYPE_EXT,
-       QUEUE_TYPE_INT,
-       QUEUE_TYPE_CPU,
-       QUEUE_TYPE_HW
-};
-
-enum hl_cs_type {
-       CS_TYPE_DEFAULT,
-       CS_TYPE_SIGNAL,
-       CS_TYPE_WAIT
-};
-
-/*
- * struct hl_inbound_pci_region - inbound region descriptor
- * @mode: pci match mode for this region
- * @addr: region target address
- * @size: region size in bytes
- * @offset_in_bar: offset within bar (address match mode)
- * @bar: bar id
- */
-struct hl_inbound_pci_region {
-       enum hl_pci_match_mode  mode;
-       u64                     addr;
-       u64                     size;
-       u64                     offset_in_bar;
-       u8                      bar;
-};
-
-/*
- * struct hl_outbound_pci_region - outbound region descriptor
- * @addr: region target address
- * @size: region size in bytes
- */
-struct hl_outbound_pci_region {
-       u64     addr;
-       u64     size;
-};
-
-/*
- * struct hl_hw_sob - H/W SOB info.
- * @hdev: habanalabs device structure.
- * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
- * @sob_id: id of this SOB.
- * @q_idx: the H/W queue that uses this SOB.
- */
-struct hl_hw_sob {
-       struct hl_device        *hdev;
-       struct kref             kref;
-       u32                     sob_id;
-       u32                     q_idx;
-};
-
-/**
- * struct hw_queue_properties - queue information.
- * @type: queue type.
- * @driver_only: true if only the driver is allowed to send a job to this queue,
- *               false otherwise.
- * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
- *                      queue, false otherwise (a CB address must be provided).
- * @supports_sync_stream: True if queue supports sync stream
- */
-struct hw_queue_properties {
-       enum hl_queue_type      type;
-       u8                      driver_only;
-       u8                      requires_kernel_cb;
-       u8                      supports_sync_stream;
-};
-
-/**
- * enum vm_type_t - virtual memory mapping request information.
- * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
- * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
- */
-enum vm_type_t {
-       VM_TYPE_USERPTR = 0x1,
-       VM_TYPE_PHYS_PACK = 0x2
-};
-
-/**
- * enum hl_device_hw_state - H/W device state. use this to understand whether
- *                           to do reset before hw_init or not
- * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
- * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
- *                            hw_init
- */
-enum hl_device_hw_state {
-       HL_DEVICE_HW_STATE_CLEAN = 0,
-       HL_DEVICE_HW_STATE_DIRTY
-};
-
-/**
- * struct hl_mmu_properties - ASIC specific MMU address translation properties.
- * @start_addr: virtual start address of the memory region.
- * @end_addr: virtual end address of the memory region.
- * @hop0_shift: shift of hop 0 mask.
- * @hop1_shift: shift of hop 1 mask.
- * @hop2_shift: shift of hop 2 mask.
- * @hop3_shift: shift of hop 3 mask.
- * @hop4_shift: shift of hop 4 mask.
- * @hop0_mask: mask to get the PTE address in hop 0.
- * @hop1_mask: mask to get the PTE address in hop 1.
- * @hop2_mask: mask to get the PTE address in hop 2.
- * @hop3_mask: mask to get the PTE address in hop 3.
- * @hop4_mask: mask to get the PTE address in hop 4.
- * @page_size: default page size used to allocate memory.
- */
-struct hl_mmu_properties {
-       u64     start_addr;
-       u64     end_addr;
-       u64     hop0_shift;
-       u64     hop1_shift;
-       u64     hop2_shift;
-       u64     hop3_shift;
-       u64     hop4_shift;
-       u64     hop0_mask;
-       u64     hop1_mask;
-       u64     hop2_mask;
-       u64     hop3_mask;
-       u64     hop4_mask;
-       u32     page_size;
-};
-
-/**
- * struct asic_fixed_properties - ASIC specific immutable properties.
- * @hw_queues_props: H/W queues properties.
- * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
- *             available sensors.
- * @uboot_ver: F/W U-boot version.
- * @preboot_ver: F/W Preboot version.
- * @dmmu: DRAM MMU address translation properties.
- * @pmmu: PCI (host) MMU address translation properties.
- * @pmmu_huge: PCI (host) MMU address translation properties for memory
- *              allocated with huge pages.
- * @sram_base_address: SRAM physical start address.
- * @sram_end_address: SRAM physical end address.
- * @sram_user_base_address - SRAM physical start address for user access.
- * @dram_base_address: DRAM physical start address.
- * @dram_end_address: DRAM physical end address.
- * @dram_user_base_address: DRAM physical start address for user access.
- * @dram_size: DRAM total size.
- * @dram_pci_bar_size: size of PCI bar towards DRAM.
- * @max_power_default: max power of the device after reset
- * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
- *                                      fault.
- * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
- * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
- * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
- * @mmu_dram_default_page_addr: DRAM default page physical address.
- * @mmu_pgt_size: MMU page tables total size.
- * @mmu_pte_size: PTE size in MMU page tables.
- * @mmu_hop_table_size: MMU hop table size.
- * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
- * @dram_page_size: page size for MMU DRAM allocation.
- * @cfg_size: configuration space size on SRAM.
- * @sram_size: total size of SRAM.
- * @max_asid: maximum number of open contexts (ASIDs).
- * @num_of_events: number of possible internal H/W IRQs.
- * @psoc_pci_pll_nr: PCI PLL NR value.
- * @psoc_pci_pll_nf: PCI PLL NF value.
- * @psoc_pci_pll_od: PCI PLL OD value.
- * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
- * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
- * @high_pll: high PLL frequency used by the device.
- * @cb_pool_cb_cnt: number of CBs in the CB pool.
- * @cb_pool_cb_size: size of each CB in the CB pool.
- * @max_pending_cs: maximum of concurrent pending command submissions
- * @max_queues: maximum amount of queues in the system
- * @sync_stream_first_sob: first sync object available for sync stream use
- * @sync_stream_first_mon: first monitor available for sync stream use
- * @tpc_enabled_mask: which TPCs are enabled.
- * @completion_queues_count: number of completion queues.
- */
-struct asic_fixed_properties {
-       struct hw_queue_properties      *hw_queues_props;
-       struct armcp_info               armcp_info;
-       char                            uboot_ver[VERSION_MAX_LEN];
-       char                            preboot_ver[VERSION_MAX_LEN];
-       struct hl_mmu_properties        dmmu;
-       struct hl_mmu_properties        pmmu;
-       struct hl_mmu_properties        pmmu_huge;
-       u64                             sram_base_address;
-       u64                             sram_end_address;
-       u64                             sram_user_base_address;
-       u64                             dram_base_address;
-       u64                             dram_end_address;
-       u64                             dram_user_base_address;
-       u64                             dram_size;
-       u64                             dram_pci_bar_size;
-       u64                             max_power_default;
-       u64                             dram_size_for_default_page_mapping;
-       u64                             pcie_dbi_base_address;
-       u64                             pcie_aux_dbi_reg_addr;
-       u64                             mmu_pgt_addr;
-       u64                             mmu_dram_default_page_addr;
-       u32                             mmu_pgt_size;
-       u32                             mmu_pte_size;
-       u32                             mmu_hop_table_size;
-       u32                             mmu_hop0_tables_total_size;
-       u32                             dram_page_size;
-       u32                             cfg_size;
-       u32                             sram_size;
-       u32                             max_asid;
-       u32                             num_of_events;
-       u32                             psoc_pci_pll_nr;
-       u32                             psoc_pci_pll_nf;
-       u32                             psoc_pci_pll_od;
-       u32                             psoc_pci_pll_div_factor;
-       u32                             psoc_timestamp_frequency;
-       u32                             high_pll;
-       u32                             cb_pool_cb_cnt;
-       u32                             cb_pool_cb_size;
-       u32                             max_pending_cs;
-       u32                             max_queues;
-       u16                             sync_stream_first_sob;
-       u16                             sync_stream_first_mon;
-       u8                              tpc_enabled_mask;
-       u8                              completion_queues_count;
-};
-
-/**
- * struct hl_cs_compl - command submission completion object.
- * @base_fence: kernel fence object.
- * @lock: spinlock to protect fence.
- * @hdev: habanalabs device structure.
- * @hw_sob: the H/W SOB used in this signal/wait CS.
- * @cs_seq: command submission sequence number.
- * @type: type of the CS - signal/wait.
- * @sob_val: the SOB value that is used in this signal/wait CS.
- */
-struct hl_cs_compl {
-       struct dma_fence        base_fence;
-       spinlock_t              lock;
-       struct hl_device        *hdev;
-       struct hl_hw_sob        *hw_sob;
-       u64                     cs_seq;
-       enum hl_cs_type         type;
-       u16                     sob_val;
-};
-
-/*
- * Command Buffers
- */
-
-/**
- * struct hl_cb_mgr - describes a Command Buffer Manager.
- * @cb_lock: protects cb_handles.
- * @cb_handles: an idr to hold all command buffer handles.
- */
-struct hl_cb_mgr {
-       spinlock_t              cb_lock;
-       struct idr              cb_handles; /* protected by cb_lock */
-};
-
-/**
- * struct hl_cb - describes a Command Buffer.
- * @refcount: reference counter for usage of the CB.
- * @hdev: pointer to device this CB belongs to.
- * @lock: spinlock to protect mmap/cs flows.
- * @debugfs_list: node in debugfs list of command buffers.
- * @pool_list: node in pool list of command buffers.
- * @kernel_address: Holds the CB's kernel virtual address.
- * @bus_address: Holds the CB's DMA address.
- * @mmap_size: Holds the CB's size that was mmaped.
- * @size: holds the CB's size.
- * @id: the CB's ID.
- * @cs_cnt: holds number of CS that this CB participates in.
- * @ctx_id: holds the ID of the owner's context.
- * @mmap: true if the CB is currently mmaped to user.
- * @is_pool: true if CB was acquired from the pool, false otherwise.
- */
-struct hl_cb {
-       struct kref             refcount;
-       struct hl_device        *hdev;
-       spinlock_t              lock;
-       struct list_head        debugfs_list;
-       struct list_head        pool_list;
-       u64                     kernel_address;
-       dma_addr_t              bus_address;
-       u32                     mmap_size;
-       u32                     size;
-       u32                     id;
-       u32                     cs_cnt;
-       u32                     ctx_id;
-       u8                      mmap;
-       u8                      is_pool;
-};
-
-
-/*
- * QUEUES
- */
-
-struct hl_cs_job;
-
-/* Queue length of external and HW queues */
-#define HL_QUEUE_LENGTH                        4096
-#define HL_QUEUE_SIZE_IN_BYTES         (HL_QUEUE_LENGTH * HL_BD_SIZE)
-
-#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
-#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
-#endif
-
-/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
-#define HL_CQ_LENGTH                   HL_QUEUE_LENGTH
-#define HL_CQ_SIZE_IN_BYTES            (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
-
-/* Must be power of 2 */
-#define HL_EQ_LENGTH                   64
-#define HL_EQ_SIZE_IN_BYTES            (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
-
-/* Host <-> ArmCP shared memory size */
-#define HL_CPU_ACCESSIBLE_MEM_SIZE     SZ_2M
-
-/**
- * struct hl_hw_queue - describes a H/W transport queue.
- * @hw_sob: array of the used H/W SOBs by this H/W queue.
- * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
- * @queue_type: type of queue.
- * @kernel_address: holds the queue's kernel virtual address.
- * @bus_address: holds the queue's DMA address.
- * @pi: holds the queue's pi value.
- * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
- * @hw_queue_id: the id of the H/W queue.
- * @cq_id: the id for the corresponding CQ for this H/W queue.
- * @msi_vec: the IRQ number of the H/W queue.
- * @int_queue_len: length of internal queue (number of entries).
- * @next_sob_val: the next value to use for the currently used SOB.
- * @base_sob_id: the base SOB id of the SOBs used by this queue.
- * @base_mon_id: the base MON id of the MONs used by this queue.
- * @valid: is the queue valid (we have array of 32 queues, not all of them
- *         exist).
- * @curr_sob_offset: the id offset to the currently used SOB from the
- *                   HL_RSVD_SOBS that are being used by this queue.
- * @supports_sync_stream: True if queue supports sync stream
- */
-struct hl_hw_queue {
-       struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
-       struct hl_cs_job        **shadow_queue;
-       enum hl_queue_type      queue_type;
-       u64                     kernel_address;
-       dma_addr_t              bus_address;
-       u32                     pi;
-       atomic_t                ci;
-       u32                     hw_queue_id;
-       u32                     cq_id;
-       u32                     msi_vec;
-       u16                     int_queue_len;
-       u16                     next_sob_val;
-       u16                     base_sob_id;
-       u16                     base_mon_id;
-       u8                      valid;
-       u8                      curr_sob_offset;
-       u8                      supports_sync_stream;
-};
-
-/**
- * struct hl_cq - describes a completion queue
- * @hdev: pointer to the device structure
- * @kernel_address: holds the queue's kernel virtual address
- * @bus_address: holds the queue's DMA address
- * @cq_idx: completion queue index in array
- * @hw_queue_id: the id of the matching H/W queue
- * @ci: ci inside the queue
- * @pi: pi inside the queue
- * @free_slots_cnt: counter of free slots in queue
- */
-struct hl_cq {
-       struct hl_device        *hdev;
-       u64                     kernel_address;
-       dma_addr_t              bus_address;
-       u32                     cq_idx;
-       u32                     hw_queue_id;
-       u32                     ci;
-       u32                     pi;
-       atomic_t                free_slots_cnt;
-};
-
-/**
- * struct hl_eq - describes the event queue (single one per device)
- * @hdev: pointer to the device structure
- * @kernel_address: holds the queue's kernel virtual address
- * @bus_address: holds the queue's DMA address
- * @ci: ci inside the queue
- */
-struct hl_eq {
-       struct hl_device        *hdev;
-       u64                     kernel_address;
-       dma_addr_t              bus_address;
-       u32                     ci;
-};
-
-
-/*
- * ASICs
- */
-
-/**
- * enum hl_asic_type - supported ASIC types.
- * @ASIC_INVALID: Invalid ASIC type.
- * @ASIC_GOYA: Goya device.
- * @ASIC_GAUDI: Gaudi device.
- */
-enum hl_asic_type {
-       ASIC_INVALID,
-       ASIC_GOYA,
-       ASIC_GAUDI
-};
-
-struct hl_cs_parser;
-
-/**
- * enum hl_pm_mng_profile - power management profile.
- * @PM_AUTO: internal clock is set by the Linux driver.
- * @PM_MANUAL: internal clock is set by the user.
- * @PM_LAST: last power management type.
- */
-enum hl_pm_mng_profile {
-       PM_AUTO = 1,
-       PM_MANUAL,
-       PM_LAST
-};
-
-/**
- * enum hl_pll_frequency - PLL frequency.
- * @PLL_HIGH: high frequency.
- * @PLL_LOW: low frequency.
- * @PLL_LAST: last frequency values that were configured by the user.
- */
-enum hl_pll_frequency {
-       PLL_HIGH = 1,
-       PLL_LOW,
-       PLL_LAST
-};
-
-#define PLL_REF_CLK 50
-
-enum div_select_defs {
-       DIV_SEL_REF_CLK = 0,
-       DIV_SEL_PLL_CLK = 1,
-       DIV_SEL_DIVIDED_REF = 2,
-       DIV_SEL_DIVIDED_PLL = 3,
-};
-
-/**
- * struct hl_asic_funcs - ASIC specific functions that are can be called from
- *                        common code.
- * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
- * @early_fini: tears down what was done in early_init.
- * @late_init: sets up late driver/hw state (post hw_init) - Optional.
- * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
- * @sw_init: sets up driver state, does not configure H/W.
- * @sw_fini: tears down driver state, does not configure H/W.
- * @hw_init: sets up the H/W state.
- * @hw_fini: tears down the H/W state.
- * @halt_engines: halt engines, needed for reset sequence. This also disables
- *                interrupts from the device. Should be called before
- *                hw_fini and before CS rollback.
- * @suspend: handles IP specific H/W or SW changes for suspend.
- * @resume: handles IP specific H/W or SW changes for resume.
- * @cb_mmap: maps a CB.
- * @ring_doorbell: increment PI on a given QMAN.
- * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
- *             function because the PQs are located in different memory areas
- *             per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
- *             writing the PQE must match the destination memory area
- *             properties.
- * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
- *                           dma_alloc_coherent(). This is ASIC function because
- *                           its implementation is not trivial when the driver
- *                           is loaded in simulation mode (not upstreamed).
- * @asic_dma_free_coherent:  Free coherent DMA memory by calling
- *                           dma_free_coherent(). This is ASIC function because
- *                           its implementation is not trivial when the driver
- *                           is loaded in simulation mode (not upstreamed).
- * @get_int_queue_base: get the internal queue base address.
- * @test_queues: run simple test on all queues for sanity check.
- * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
- *                        size of allocation is HL_DMA_POOL_BLK_SIZE.
- * @asic_dma_pool_free: free small DMA allocation from pool.
- * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
- * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
- * @cs_parser: parse Command Submission.
- * @asic_dma_map_sg: DMA map scatter-gather list.
- * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
- * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
- * @update_eq_ci: update event queue CI.
- * @context_switch: called upon ASID context switch.
- * @restore_phase_topology: clear all SOBs amd MONs.
- * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
- * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
- * @add_device_attr: add ASIC specific device attributes.
- * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
- * @set_pll_profile: change PLL profile (manual/automatic).
- * @get_events_stat: retrieve event queue entries histogram.
- * @read_pte: read MMU page table entry from DRAM.
- * @write_pte: write MMU page table entry to DRAM.
- * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
- *                        (L1 only) or hard (L0 & L1) flush.
- * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
- *                              ASID-VA-size mask.
- * @send_heartbeat: send is-alive packet to ArmCP and verify response.
- * @enable_clock_gating: enable clock gating for reducing power consumption.
- * @disable_clock_gating: disable clock for accessing registers on HBW.
- * @debug_coresight: perform certain actions on Coresight for debugging.
- * @is_device_idle: return true if device is idle, false otherwise.
- * @soft_reset_late_init: perform certain actions needed after soft reset.
- * @hw_queues_lock: acquire H/W queues lock.
- * @hw_queues_unlock: release H/W queues lock.
- * @get_pci_id: retrieve PCI ID.
- * @get_eeprom_data: retrieve EEPROM data from F/W.
- * @send_cpu_message: send buffer to ArmCP.
- * @get_hw_state: retrieve the H/W state
- * @pci_bars_map: Map PCI BARs.
- * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
- *                     old address the bar pointed to or U64_MAX for failure
- * @init_iatu: Initialize the iATU unit inside the PCI controller.
- * @rreg: Read a register. Needed for simulator support.
- * @wreg: Write a register. Needed for simulator support.
- * @halt_coresight: stop the ETF and ETR traces.
- * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
- * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
- * @read_device_fw_version: read the device's firmware versions that are
- *                          contained in registers
- * @load_firmware_to_device: load the firmware to the device's memory
- * @load_boot_fit_to_device: load boot fit to device's memory
- * @get_signal_cb_size: Get signal CB size.
- * @get_wait_cb_size: Get wait CB size.
- * @gen_signal_cb: Generate a signal CB.
- * @gen_wait_cb: Generate a wait CB.
- * @reset_sob: Reset a SOB.
- * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
- *                        firmware configuration
- * @get_device_time: Get the device time.
- */
-struct hl_asic_funcs {
-       int (*early_init)(struct hl_device *hdev);
-       int (*early_fini)(struct hl_device *hdev);
-       int (*late_init)(struct hl_device *hdev);
-       void (*late_fini)(struct hl_device *hdev);
-       int (*sw_init)(struct hl_device *hdev);
-       int (*sw_fini)(struct hl_device *hdev);
-       int (*hw_init)(struct hl_device *hdev);
-       void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
-       void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
-       int (*suspend)(struct hl_device *hdev);
-       int (*resume)(struct hl_device *hdev);
-       int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
-                       u64 kaddress, phys_addr_t paddress, u32 size);
-       void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
-       void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
-                       struct hl_bd *bd);
-       void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
-                                       dma_addr_t *dma_handle, gfp_t flag);
-       void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
-                                       void *cpu_addr, dma_addr_t dma_handle);
-       void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
-                               dma_addr_t *dma_handle, u16 *queue_len);
-       int (*test_queues)(struct hl_device *hdev);
-       void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
-                               gfp_t mem_flags, dma_addr_t *dma_handle);
-       void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
-                               dma_addr_t dma_addr);
-       void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
-                               size_t size, dma_addr_t *dma_handle);
-       void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
-                               size_t size, void *vaddr);
-       void (*hl_dma_unmap_sg)(struct hl_device *hdev,
-                               struct scatterlist *sgl, int nents,
-                               enum dma_data_direction dir);
-       int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
-       int (*asic_dma_map_sg)(struct hl_device *hdev,
-                               struct scatterlist *sgl, int nents,
-                               enum dma_data_direction dir);
-       u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
-                                       struct sg_table *sgt);
-       void (*add_end_of_cb_packets)(struct hl_device *hdev,
-                                       u64 kernel_address, u32 len,
-                                       u64 cq_addr, u32 cq_val, u32 msix_num,
-                                       bool eb);
-       void (*update_eq_ci)(struct hl_device *hdev, u32 val);
-       int (*context_switch)(struct hl_device *hdev, u32 asid);
-       void (*restore_phase_topology)(struct hl_device *hdev);
-       int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
-       int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
-       int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
-       int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
-       void (*add_device_attr)(struct hl_device *hdev,
-                               struct attribute_group *dev_attr_grp);
-       void (*handle_eqe)(struct hl_device *hdev,
-                               struct hl_eq_entry *eq_entry);
-       void (*set_pll_profile)(struct hl_device *hdev,
-                       enum hl_pll_frequency freq);
-       void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
-                               u32 *size);
-       u64 (*read_pte)(struct hl_device *hdev, u64 addr);
-       void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
-       int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
-                                       u32 flags);
-       int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
-                       u32 asid, u64 va, u64 size);
-       int (*send_heartbeat)(struct hl_device *hdev);
-       void (*enable_clock_gating)(struct hl_device *hdev);
-       void (*disable_clock_gating)(struct hl_device *hdev);
-       int (*debug_coresight)(struct hl_device *hdev, void *data);
-       bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
-                               struct seq_file *s);
-       int (*soft_reset_late_init)(struct hl_device *hdev);
-       void (*hw_queues_lock)(struct hl_device *hdev);
-       void (*hw_queues_unlock)(struct hl_device *hdev);
-       u32 (*get_pci_id)(struct hl_device *hdev);
-       int (*get_eeprom_data)(struct hl_device *hdev, void *data,
-                               size_t max_size);
-       int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
-                               u16 len, u32 timeout, long *result);
-       enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
-       int (*pci_bars_map)(struct hl_device *hdev);
-       u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
-       int (*init_iatu)(struct hl_device *hdev);
-       u32 (*rreg)(struct hl_device *hdev, u32 reg);
-       void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
-       void (*halt_coresight)(struct hl_device *hdev);
-       int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
-       u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
-       void (*read_device_fw_version)(struct hl_device *hdev,
-                                       enum hl_fw_component fwc);
-       int (*load_firmware_to_device)(struct hl_device *hdev);
-       int (*load_boot_fit_to_device)(struct hl_device *hdev);
-       u32 (*get_signal_cb_size)(struct hl_device *hdev);
-       u32 (*get_wait_cb_size)(struct hl_device *hdev);
-       void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
-       void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
-                               u16 sob_val, u16 mon_id, u32 q_idx);
-       void (*reset_sob)(struct hl_device *hdev, void *data);
-       void (*set_dma_mask_from_fw)(struct hl_device *hdev);
-       u64 (*get_device_time)(struct hl_device *hdev);
-};
-
-
-/*
- * CONTEXTS
- */
-
-#define HL_KERNEL_ASID_ID      0
-
-/**
- * struct hl_va_range - virtual addresses range.
- * @lock: protects the virtual addresses list.
- * @list: list of virtual addresses blocks available for mappings.
- * @start_addr: range start address.
- * @end_addr: range end address.
- */
-struct hl_va_range {
-       struct mutex            lock;
-       struct list_head        list;
-       u64                     start_addr;
-       u64                     end_addr;
-};
-
-/**
- * struct hl_ctx - user/kernel context.
- * @mem_hash: holds mapping from virtual address to virtual memory area
- *             descriptor (hl_vm_phys_pg_list or hl_userptr).
- * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
- * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
- * @hdev: pointer to the device structure.
- * @refcount: reference counter for the context. Context is released only when
- *             this hits 0l. It is incremented on CS and CS_WAIT.
- * @cs_pending: array of DMA fence objects representing pending CS.
- * @host_va_range: holds available virtual addresses for host mappings.
- * @host_huge_va_range: holds available virtual addresses for host mappings
- *                      with huge pages.
- * @dram_va_range: holds available virtual addresses for DRAM mappings.
- * @mem_hash_lock: protects the mem_hash.
- * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
- *            MMU hash or walking the PGT requires talking this lock.
- * @debugfs_list: node in debugfs list of contexts.
- * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
- *                     to user so user could inquire about CS. It is used as
- *                     index to cs_pending array.
- * @dram_default_hops: array that holds all hops addresses needed for default
- *                     DRAM mapping.
- * @cs_lock: spinlock to protect cs_sequence.
- * @dram_phys_mem: amount of used physical DRAM memory by this context.
- * @thread_ctx_switch_token: token to prevent multiple threads of the same
- *                             context from running the context switch phase.
- *                             Only a single thread should run it.
- * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
- *                             the context switch phase from moving to their
- *                             execution phase before the context switch phase
- *                             has finished.
- * @asid: context's unique address space ID in the device's MMU.
- * @handle: context's opaque handle for user
- */
-struct hl_ctx {
-       DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
-       DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
-       struct hl_fpriv         *hpriv;
-       struct hl_device        *hdev;
-       struct kref             refcount;
-       struct dma_fence        **cs_pending;
-       struct hl_va_range      *host_va_range;
-       struct hl_va_range      *host_huge_va_range;
-       struct hl_va_range      *dram_va_range;
-       struct mutex            mem_hash_lock;
-       struct mutex            mmu_lock;
-       struct list_head        debugfs_list;
-       struct hl_cs_counters   cs_counters;
-       u64                     cs_sequence;
-       u64                     *dram_default_hops;
-       spinlock_t              cs_lock;
-       atomic64_t              dram_phys_mem;
-       atomic_t                thread_ctx_switch_token;
-       u32                     thread_ctx_switch_wait_token;
-       u32                     asid;
-       u32                     handle;
-};
-
-/**
- * struct hl_ctx_mgr - for handling multiple contexts.
- * @ctx_lock: protects ctx_handles.
- * @ctx_handles: idr to hold all ctx handles.
- */
-struct hl_ctx_mgr {
-       struct mutex            ctx_lock;
-       struct idr              ctx_handles;
-};
-
-
-
-/*
- * COMMAND SUBMISSIONS
- */
-
-/**
- * struct hl_userptr - memory mapping chunk information
- * @vm_type: type of the VM.
- * @job_node: linked-list node for hanging the object on the Job's list.
- * @vec: pointer to the frame vector.
- * @sgt: pointer to the scatter-gather table that holds the pages.
- * @dir: for DMA unmapping, the direction must be supplied, so save it.
- * @debugfs_list: node in debugfs list of command submissions.
- * @addr: user-space virtual address of the start of the memory area.
- * @size: size of the memory area to pin & map.
- * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
- */
-struct hl_userptr {
-       enum vm_type_t          vm_type; /* must be first */
-       struct list_head        job_node;
-       struct frame_vector     *vec;
-       struct sg_table         *sgt;
-       enum dma_data_direction dir;
-       struct list_head        debugfs_list;
-       u64                     addr;
-       u32                     size;
-       u8                      dma_mapped;
-};
-
-/**
- * struct hl_cs - command submission.
- * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
- * @ctx: the context this CS belongs to.
- * @job_list: list of the CS's jobs in the various queues.
- * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
- * @refcount: reference counter for usage of the CS.
- * @fence: pointer to the fence object of this CS.
- * @signal_fence: pointer to the fence object of the signal CS (used by wait
- *                CS only).
- * @finish_work: workqueue object to run when CS is completed by H/W.
- * @work_tdr: delayed work node for TDR.
- * @mirror_node : node in device mirror list of command submissions.
- * @debugfs_list: node in debugfs list of command submissions.
- * @sequence: the sequence number of this CS.
- * @type: CS_TYPE_*.
- * @submitted: true if CS was submitted to H/W.
- * @completed: true if CS was completed by device.
- * @timedout : true if CS was timedout.
- * @tdr_active: true if TDR was activated for this CS (to prevent
- *             double TDR activation).
- * @aborted: true if CS was aborted due to some device error.
- */
-struct hl_cs {
-       u16                     *jobs_in_queue_cnt;
-       struct hl_ctx           *ctx;
-       struct list_head        job_list;
-       spinlock_t              job_lock;
-       struct kref             refcount;
-       struct dma_fence        *fence;
-       struct dma_fence        *signal_fence;
-       struct work_struct      finish_work;
-       struct delayed_work     work_tdr;
-       struct list_head        mirror_node;
-       struct list_head        debugfs_list;
-       u64                     sequence;
-       enum hl_cs_type         type;
-       u8                      submitted;
-       u8                      completed;
-       u8                      timedout;
-       u8                      tdr_active;
-       u8                      aborted;
-};
-
-/**
- * struct hl_cs_job - command submission job.
- * @cs_node: the node to hang on the CS jobs list.
- * @cs: the CS this job belongs to.
- * @user_cb: the CB we got from the user.
- * @patched_cb: in case of patching, this is internal CB which is submitted on
- *             the queue instead of the CB we got from the IOCTL.
- * @finish_work: workqueue object to run when job is completed.
- * @userptr_list: linked-list of userptr mappings that belong to this job and
- *                     wait for completion.
- * @debugfs_list: node in debugfs list of command submission jobs.
- * @queue_type: the type of the H/W queue this job is submitted to.
- * @id: the id of this job inside a CS.
- * @hw_queue_id: the id of the H/W queue this job is submitted to.
- * @user_cb_size: the actual size of the CB we got from the user.
- * @job_cb_size: the actual size of the CB that we put on the queue.
- * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
- *                          handle to a kernel-allocated CB object, false
- *                          otherwise (SRAM/DRAM/host address).
- * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
- *                    info is needed later, when adding the 2xMSG_PROT at the
- *                    end of the JOB, to know which barriers to put in the
- *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
- *                    have streams so the engine can't be busy by another
- *                    stream.
- */
-struct hl_cs_job {
-       struct list_head        cs_node;
-       struct hl_cs            *cs;
-       struct hl_cb            *user_cb;
-       struct hl_cb            *patched_cb;
-       struct work_struct      finish_work;
-       struct list_head        userptr_list;
-       struct list_head        debugfs_list;
-       enum hl_queue_type      queue_type;
-       u32                     id;
-       u32                     hw_queue_id;
-       u32                     user_cb_size;
-       u32                     job_cb_size;
-       u8                      is_kernel_allocated_cb;
-       u8                      contains_dma_pkt;
-};
-
-/**
- * struct hl_cs_parser - command submission parser properties.
- * @user_cb: the CB we got from the user.
- * @patched_cb: in case of patching, this is internal CB which is submitted on
- *             the queue instead of the CB we got from the IOCTL.
- * @job_userptr_list: linked-list of userptr mappings that belong to the related
- *                     job and wait for completion.
- * @cs_sequence: the sequence number of the related CS.
- * @queue_type: the type of the H/W queue this job is submitted to.
- * @ctx_id: the ID of the context the related CS belongs to.
- * @hw_queue_id: the id of the H/W queue this job is submitted to.
- * @user_cb_size: the actual size of the CB we got from the user.
- * @patched_cb_size: the size of the CB after parsing.
- * @job_id: the id of the related job inside the related CS.
- * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
- *                          handle to a kernel-allocated CB object, false
- *                          otherwise (SRAM/DRAM/host address).
- * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
- *                    info is needed later, when adding the 2xMSG_PROT at the
- *                    end of the JOB, to know which barriers to put in the
- *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
- *                    have streams so the engine can't be busy by another
- *                    stream.
- */
-struct hl_cs_parser {
-       struct hl_cb            *user_cb;
-       struct hl_cb            *patched_cb;
-       struct list_head        *job_userptr_list;
-       u64                     cs_sequence;
-       enum hl_queue_type      queue_type;
-       u32                     ctx_id;
-       u32                     hw_queue_id;
-       u32                     user_cb_size;
-       u32                     patched_cb_size;
-       u8                      job_id;
-       u8                      is_kernel_allocated_cb;
-       u8                      contains_dma_pkt;
-};
-
-
-/*
- * MEMORY STRUCTURE
- */
-
-/**
- * struct hl_vm_hash_node - hash element from virtual address to virtual
- *                             memory area descriptor (hl_vm_phys_pg_list or
- *                             hl_userptr).
- * @node: node to hang on the hash table in context object.
- * @vaddr: key virtual address.
- * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
- */
-struct hl_vm_hash_node {
-       struct hlist_node       node;
-       u64                     vaddr;
-       void                    *ptr;
-};
-
-/**
- * struct hl_vm_phys_pg_pack - physical page pack.
- * @vm_type: describes the type of the virtual area descriptor.
- * @pages: the physical page array.
- * @npages: num physical pages in the pack.
- * @total_size: total size of all the pages in this list.
- * @mapping_cnt: number of shared mappings.
- * @asid: the context related to this list.
- * @page_size: size of each page in the pack.
- * @flags: HL_MEM_* flags related to this list.
- * @handle: the provided handle related to this list.
- * @offset: offset from the first page.
- * @contiguous: is contiguous physical memory.
- * @created_from_userptr: is product of host virtual address.
- */
-struct hl_vm_phys_pg_pack {
-       enum vm_type_t          vm_type; /* must be first */
-       u64                     *pages;
-       u64                     npages;
-       u64                     total_size;
-       atomic_t                mapping_cnt;
-       u32                     asid;
-       u32                     page_size;
-       u32                     flags;
-       u32                     handle;
-       u32                     offset;
-       u8                      contiguous;
-       u8                      created_from_userptr;
-};
-
-/**
- * struct hl_vm_va_block - virtual range block information.
- * @node: node to hang on the virtual range list in context object.
- * @start: virtual range start address.
- * @end: virtual range end address.
- * @size: virtual range size.
- */
-struct hl_vm_va_block {
-       struct list_head        node;
-       u64                     start;
-       u64                     end;
-       u64                     size;
-};
-
-/**
- * struct hl_vm - virtual memory manager for MMU.
- * @dram_pg_pool: pool for DRAM physical pages of 2MB.
- * @dram_pg_pool_refcount: reference counter for the pool usage.
- * @idr_lock: protects the phys_pg_list_handles.
- * @phys_pg_pack_handles: idr to hold all device allocations handles.
- * @init_done: whether initialization was done. We need this because VM
- *             initialization might be skipped during device initialization.
- */
-struct hl_vm {
-       struct gen_pool         *dram_pg_pool;
-       struct kref             dram_pg_pool_refcount;
-       spinlock_t              idr_lock;
-       struct idr              phys_pg_pack_handles;
-       u8                      init_done;
-};
-
-
-/*
- * DEBUG, PROFILING STRUCTURE
- */
-
-/**
- * struct hl_debug_params - Coresight debug parameters.
- * @input: pointer to component specific input parameters.
- * @output: pointer to component specific output parameters.
- * @output_size: size of output buffer.
- * @reg_idx: relevant register ID.
- * @op: component operation to execute.
- * @enable: true if to enable component debugging, false otherwise.
- */
-struct hl_debug_params {
-       void *input;
-       void *output;
-       u32 output_size;
-       u32 reg_idx;
-       u32 op;
-       bool enable;
-};
-
-/*
- * FILE PRIVATE STRUCTURE
- */
-
-/**
- * struct hl_fpriv - process information stored in FD private data.
- * @hdev: habanalabs device structure.
- * @filp: pointer to the given file structure.
- * @taskpid: current process ID.
- * @ctx: current executing context. TODO: remove for multiple ctx per process
- * @ctx_mgr: context manager to handle multiple context for this FD.
- * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
- * @debugfs_list: list of relevant ASIC debugfs.
- * @dev_node: node in the device list of file private data
- * @refcount: number of related contexts.
- * @restore_phase_mutex: lock for context switch and restore phase.
- * @is_control: true for control device, false otherwise
- */
-struct hl_fpriv {
-       struct hl_device        *hdev;
-       struct file             *filp;
-       struct pid              *taskpid;
-       struct hl_ctx           *ctx;
-       struct hl_ctx_mgr       ctx_mgr;
-       struct hl_cb_mgr        cb_mgr;
-       struct list_head        debugfs_list;
-       struct list_head        dev_node;
-       struct kref             refcount;
-       struct mutex            restore_phase_mutex;
-       u8                      is_control;
-};
-
-
-/*
- * DebugFS
- */
-
-/**
- * struct hl_info_list - debugfs file ops.
- * @name: file name.
- * @show: function to output information.
- * @write: function to write to the file.
- */
-struct hl_info_list {
-       const char      *name;
-       int             (*show)(struct seq_file *s, void *data);
-       ssize_t         (*write)(struct file *file, const char __user *buf,
-                               size_t count, loff_t *f_pos);
-};
-
-/**
- * struct hl_debugfs_entry - debugfs dentry wrapper.
- * @dent: base debugfs entry structure.
- * @info_ent: dentry realted ops.
- * @dev_entry: ASIC specific debugfs manager.
- */
-struct hl_debugfs_entry {
-       struct dentry                   *dent;
-       const struct hl_info_list       *info_ent;
-       struct hl_dbg_device_entry      *dev_entry;
-};
-
-/**
- * struct hl_dbg_device_entry - ASIC specific debugfs manager.
- * @root: root dentry.
- * @hdev: habanalabs device structure.
- * @entry_arr: array of available hl_debugfs_entry.
- * @file_list: list of available debugfs files.
- * @file_mutex: protects file_list.
- * @cb_list: list of available CBs.
- * @cb_spinlock: protects cb_list.
- * @cs_list: list of available CSs.
- * @cs_spinlock: protects cs_list.
- * @cs_job_list: list of available CB jobs.
- * @cs_job_spinlock: protects cs_job_list.
- * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
- * @userptr_spinlock: protects userptr_list.
- * @ctx_mem_hash_list: list of available contexts with MMU mappings.
- * @ctx_mem_hash_spinlock: protects cb_list.
- * @addr: next address to read/write from/to in read/write32.
- * @mmu_addr: next virtual address to translate to physical address in mmu_show.
- * @mmu_asid: ASID to use while translating in mmu_show.
- * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
- * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
- * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
- */
-struct hl_dbg_device_entry {
-       struct dentry                   *root;
-       struct hl_device                *hdev;
-       struct hl_debugfs_entry         *entry_arr;
-       struct list_head                file_list;
-       struct mutex                    file_mutex;
-       struct list_head                cb_list;
-       spinlock_t                      cb_spinlock;
-       struct list_head                cs_list;
-       spinlock_t                      cs_spinlock;
-       struct list_head                cs_job_list;
-       spinlock_t                      cs_job_spinlock;
-       struct list_head                userptr_list;
-       spinlock_t                      userptr_spinlock;
-       struct list_head                ctx_mem_hash_list;
-       spinlock_t                      ctx_mem_hash_spinlock;
-       u64                             addr;
-       u64                             mmu_addr;
-       u32                             mmu_asid;
-       u8                              i2c_bus;
-       u8                              i2c_addr;
-       u8                              i2c_reg;
-};
-
-
-/*
- * DEVICES
- */
-
-/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
- * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
- */
-#define HL_MAX_MINORS  256
-
-/*
- * Registers read & write functions.
- */
-
-u32 hl_rreg(struct hl_device *hdev, u32 reg);
-void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
-
-#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
-#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
-#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n",   \
-                       hdev->asic_funcs->rreg(hdev, (reg)))
-
-#define WREG32_P(reg, val, mask)                               \
-       do {                                                    \
-               u32 tmp_ = RREG32(reg);                         \
-               tmp_ &= (mask);                                 \
-               tmp_ |= ((val) & ~(mask));                      \
-               WREG32(reg, tmp_);                              \
-       } while (0)
-#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
-#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
-
-#define RMWREG32(reg, val, mask)                               \
-       do {                                                    \
-               u32 tmp_ = RREG32(reg);                         \
-               tmp_ &= ~(mask);                                \
-               tmp_ |= ((val) << __ffs(mask));                 \
-               WREG32(reg, tmp_);                              \
-       } while (0)
-
-#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
-
-#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
-#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
-#define WREG32_FIELD(reg, offset, field, val)  \
-       WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
-                               ~REG_FIELD_MASK(reg, field)) | \
-                               (val) << REG_FIELD_SHIFT(reg, field))
-
-/* Timeout should be longer when working with simulator but cap the
- * increased timeout to some maximum
- */
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
-({ \
-       ktime_t __timeout; \
-       if (hdev->pdev) \
-               __timeout = ktime_add_us(ktime_get(), timeout_us); \
-       else \
-               __timeout = ktime_add_us(ktime_get(),\
-                               min((u64)(timeout_us * 10), \
-                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
-       might_sleep_if(sleep_us); \
-       for (;;) { \
-               (val) = RREG32(addr); \
-               if (cond) \
-                       break; \
-               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-                       (val) = RREG32(addr); \
-                       break; \
-               } \
-               if (sleep_us) \
-                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
-       } \
-       (cond) ? 0 : -ETIMEDOUT; \
-})
-
-/*
- * address in this macro points always to a memory location in the
- * host's (server's) memory. That location is updated asynchronously
- * either by the direct access of the device or by another core.
- *
- * To work both in LE and BE architectures, we need to distinguish between the
- * two states (device or another core updates the memory location). Therefore,
- * if mem_written_by_device is true, the host memory being polled will be
- * updated directly by the device. If false, the host memory being polled will
- * be updated by host CPU. Required so host knows whether or not the memory
- * might need to be byte-swapped before returning value to caller.
- */
-#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
-                               mem_written_by_device) \
-({ \
-       ktime_t __timeout; \
-       if (hdev->pdev) \
-               __timeout = ktime_add_us(ktime_get(), timeout_us); \
-       else \
-               __timeout = ktime_add_us(ktime_get(),\
-                               min((u64)(timeout_us * 10), \
-                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
-       might_sleep_if(sleep_us); \
-       for (;;) { \
-               /* Verify we read updates done by other cores or by device */ \
-               mb(); \
-               (val) = *((u32 *) (uintptr_t) (addr)); \
-               if (mem_written_by_device) \
-                       (val) = le32_to_cpu(*(__le32 *) &(val)); \
-               if (cond) \
-                       break; \
-               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-                       (val) = *((u32 *) (uintptr_t) (addr)); \
-                       if (mem_written_by_device) \
-                               (val) = le32_to_cpu(*(__le32 *) &(val)); \
-                       break; \
-               } \
-               if (sleep_us) \
-                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
-       } \
-       (cond) ? 0 : -ETIMEDOUT; \
-})
-
-#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
-                                       timeout_us) \
-({ \
-       ktime_t __timeout; \
-       if (hdev->pdev) \
-               __timeout = ktime_add_us(ktime_get(), timeout_us); \
-       else \
-               __timeout = ktime_add_us(ktime_get(),\
-                               min((u64)(timeout_us * 10), \
-                                       (u64) HL_SIM_MAX_TIMEOUT_US)); \
-       might_sleep_if(sleep_us); \
-       for (;;) { \
-               (val) = readl(addr); \
-               if (cond) \
-                       break; \
-               if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-                       (val) = readl(addr); \
-                       break; \
-               } \
-               if (sleep_us) \
-                       usleep_range((sleep_us >> 2) + 1, sleep_us); \
-       } \
-       (cond) ? 0 : -ETIMEDOUT; \
-})
-
-struct hwmon_chip_info;
-
-/**
- * struct hl_device_reset_work - reset workqueue task wrapper.
- * @reset_work: reset work to be done.
- * @hdev: habanalabs device structure.
- */
-struct hl_device_reset_work {
-       struct work_struct              reset_work;
-       struct hl_device                *hdev;
-};
-
-/**
- * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
- * @idle_to_busy_ts: timestamp where device changed from idle to busy.
- * @busy_to_idle_ts: timestamp where device changed from busy to idle.
- */
-struct hl_device_idle_busy_ts {
-       ktime_t                         idle_to_busy_ts;
-       ktime_t                         busy_to_idle_ts;
-};
-
-/**
- * struct hl_device - habanalabs device structure.
- * @pdev: pointer to PCI device, can be NULL in case of simulator device.
- * @pcie_bar_phys: array of available PCIe bars physical addresses.
- *                (required only for PCI address match mode)
- * @pcie_bar: array of available PCIe bars virtual addresses.
- * @rmmio: configuration area address on SRAM.
- * @cdev: related char device.
- * @cdev_ctrl: char device for control operations only (INFO IOCTL)
- * @dev: related kernel basic device structure.
- * @dev_ctrl: related kernel device structure for the control device
- * @work_freq: delayed work to lower device frequency if possible.
- * @work_heartbeat: delayed work for ArmCP is-alive check.
- * @asic_name: ASIC specific nmae.
- * @asic_type: ASIC specific type.
- * @completion_queue: array of hl_cq.
- * @cq_wq: work queues of completion queues for executing work in process
- *         context.
- * @eq_wq: work queue of event queue for executing work in process context.
- * @kernel_ctx: Kernel driver context structure.
- * @kernel_queues: array of hl_hw_queue.
- * @hw_queues_mirror_list: CS mirror list for TDR.
- * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
- * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
- * @event_queue: event queue for IRQ from ArmCP.
- * @dma_pool: DMA pool for small allocations.
- * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
- * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
- * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
- * @asid_bitmap: holds used/available ASIDs.
- * @asid_mutex: protects asid_bitmap.
- * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
- * @debug_lock: protects critical section of setting debug mode for device
- * @asic_prop: ASIC specific immutable properties.
- * @asic_funcs: ASIC specific functions.
- * @asic_specific: ASIC specific information to use only from ASIC files.
- * @mmu_pgt_pool: pool of available MMU hops.
- * @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
- * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
- * @hwmon_dev: H/W monitor device.
- * @pm_mng_profile: current power management profile.
- * @hl_chip_info: ASIC's sensors information.
- * @hl_debugfs: device's debugfs manager.
- * @cb_pool: list of preallocated CBs.
- * @cb_pool_lock: protects the CB pool.
- * @fpriv_list: list of file private data structures. Each structure is created
- *              when a user opens the device
- * @fpriv_list_lock: protects the fpriv_list
- * @compute_ctx: current compute context executing.
- * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
- *                    and vice-versa
- * @aggregated_cs_counters: aggregated cs counters among all contexts
- * @dram_used_mem: current DRAM memory consumption.
- * @timeout_jiffies: device CS timeout value.
- * @max_power: the max power of the device, as configured by the sysadmin. This
- *             value is saved so in case of hard-reset, the driver will restore
- *             this value and update the F/W after the re-initialization
- * @in_reset: is device in reset flow.
- * @curr_pll_profile: current PLL profile.
- * @cs_active_cnt: number of active command submissions on this device (active
- *                 means already in H/W queues)
- * @major: habanalabs kernel driver major.
- * @high_pll: high PLL profile frequency.
- * @soft_reset_cnt: number of soft reset since the driver was loaded.
- * @hard_reset_cnt: number of hard reset since the driver was loaded.
- * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
- * @id: device minor.
- * @id_control: minor of the control device
- * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
- *                    addresses.
- * @disabled: is device disabled.
- * @late_init_done: is late init stage was done during initialization.
- * @hwmon_initialized: is H/W monitor sensors was initialized.
- * @hard_reset_pending: is there a hard reset work pending.
- * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
- * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
- *                   otherwise.
- * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
- * @dram_default_page_mapping: is DRAM default page mapping enabled.
- * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
- *                   huge pages.
- * @init_done: is the initialization of the device done.
- * @mmu_enable: is MMU enabled.
- * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
- * @clock_gating: is clock gating enabled.
- * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
- * @dma_mask: the dma mask that was set for this device
- * @in_debug: is device under debug. This, together with fpriv_list, enforces
- *            that only a single user is configuring the debug infrastructure.
- * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
- *                           only to POWER9 machines.
- * @cdev_sysfs_created: were char devices and sysfs nodes created.
- * @stop_on_err: true if engines should stop on error.
- * @supports_sync_stream: is sync stream supported.
- * @sync_stream_queue_idx: helper index for sync stream queues initialization.
- * @supports_coresight: is CoreSight supported.
- * @supports_soft_reset: is soft reset supported.
- */
-struct hl_device {
-       struct pci_dev                  *pdev;
-       u64                             pcie_bar_phys[HL_PCI_NUM_BARS];
-       void __iomem                    *pcie_bar[HL_PCI_NUM_BARS];
-       void __iomem                    *rmmio;
-       struct cdev                     cdev;
-       struct cdev                     cdev_ctrl;
-       struct device                   *dev;
-       struct device                   *dev_ctrl;
-       struct delayed_work             work_freq;
-       struct delayed_work             work_heartbeat;
-       char                            asic_name[16];
-       enum hl_asic_type               asic_type;
-       struct hl_cq                    *completion_queue;
-       struct workqueue_struct         **cq_wq;
-       struct workqueue_struct         *eq_wq;
-       struct hl_ctx                   *kernel_ctx;
-       struct hl_hw_queue              *kernel_queues;
-       struct list_head                hw_queues_mirror_list;
-       spinlock_t                      hw_queues_mirror_lock;
-       struct hl_cb_mgr                kernel_cb_mgr;
-       struct hl_eq                    event_queue;
-       struct dma_pool                 *dma_pool;
-       void                            *cpu_accessible_dma_mem;
-       dma_addr_t                      cpu_accessible_dma_address;
-       struct gen_pool                 *cpu_accessible_dma_pool;
-       unsigned long                   *asid_bitmap;
-       struct mutex                    asid_mutex;
-       struct mutex                    send_cpu_message_lock;
-       struct mutex                    debug_lock;
-       struct asic_fixed_properties    asic_prop;
-       const struct hl_asic_funcs      *asic_funcs;
-       void                            *asic_specific;
-       struct gen_pool                 *mmu_pgt_pool;
-       struct hl_vm                    vm;
-       struct mutex                    mmu_cache_lock;
-       void                            *mmu_shadow_hop0;
-       struct device                   *hwmon_dev;
-       enum hl_pm_mng_profile          pm_mng_profile;
-       struct hwmon_chip_info          *hl_chip_info;
-
-       struct hl_dbg_device_entry      hl_debugfs;
-
-       struct list_head                cb_pool;
-       spinlock_t                      cb_pool_lock;
-
-       struct list_head                fpriv_list;
-       struct mutex                    fpriv_list_lock;
-
-       struct hl_ctx                   *compute_ctx;
-
-       struct hl_device_idle_busy_ts   *idle_busy_ts_arr;
-
-       struct hl_cs_counters           aggregated_cs_counters;
-
-       atomic64_t                      dram_used_mem;
-       u64                             timeout_jiffies;
-       u64                             max_power;
-       atomic_t                        in_reset;
-       enum hl_pll_frequency           curr_pll_profile;
-       int                             cs_active_cnt;
-       u32                             major;
-       u32                             high_pll;
-       u32                             soft_reset_cnt;
-       u32                             hard_reset_cnt;
-       u32                             idle_busy_ts_idx;
-       u16                             id;
-       u16                             id_control;
-       u16                             cpu_pci_msb_addr;
-       u8                              disabled;
-       u8                              late_init_done;
-       u8                              hwmon_initialized;
-       u8                              hard_reset_pending;
-       u8                              heartbeat;
-       u8                              reset_on_lockup;
-       u8                              dram_supports_virtual_memory;
-       u8                              dram_default_page_mapping;
-       u8                              pmmu_huge_range;
-       u8                              init_done;
-       u8                              clock_gating;
-       u8                              device_cpu_disabled;
-       u8                              dma_mask;
-       u8                              in_debug;
-       u8                              power9_64bit_dma_enable;
-       u8                              cdev_sysfs_created;
-       u8                              stop_on_err;
-       u8                              supports_sync_stream;
-       u8                              sync_stream_queue_idx;
-       u8                              supports_coresight;
-       u8                              supports_soft_reset;
-
-       /* Parameters for bring-up */
-       u8                              mmu_enable;
-       u8                              mmu_huge_page_opt;
-       u8                              cpu_enable;
-       u8                              reset_pcilink;
-       u8                              cpu_queues_enable;
-       u8                              fw_loading;
-       u8                              pldm;
-       u8                              axi_drain;
-       u8                              sram_scrambler_enable;
-       u8                              dram_scrambler_enable;
-       u8                              hard_reset_on_fw_events;
-       u8                              bmc_enable;
-       u8                              rl_enable;
-};
-
-
-/*
- * IOCTLs
- */
-
-/**
- * typedef hl_ioctl_t - typedef for ioctl function in the driver
- * @hpriv: pointer to the FD's private data, which contains state of
- *             user process
- * @data: pointer to the input/output arguments structure of the IOCTL
- *
- * Return: 0 for success, negative value for error
- */
-typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
-
-/**
- * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
- * @cmd: the IOCTL code as created by the kernel macros.
- * @func: pointer to the driver's function that should be called for this IOCTL.
- */
-struct hl_ioctl_desc {
-       unsigned int cmd;
-       hl_ioctl_t *func;
-};
-
-
-/*
- * Kernel module functions that can be accessed by entire module
- */
-
-/**
- * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @range_start_address: The start address of the valid range.
- * @range_end_address: The end address of the valid range.
- *
- * Return: true if the area is inside the valid range, false otherwise.
- */
-static inline bool hl_mem_area_inside_range(u64 address, u32 size,
-                               u64 range_start_address, u64 range_end_address)
-{
-       u64 end_address = address + size;
-
-       if ((address >= range_start_address) &&
-                       (end_address <= range_end_address) &&
-                       (end_address > address))
-               return true;
-
-       return false;
-}
-
-/**
- * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @range_start_address: The start address of the valid range.
- * @range_end_address: The end address of the valid range.
- *
- * Return: true if the area overlaps part or all of the valid range,
- *             false otherwise.
- */
-static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
-                               u64 range_start_address, u64 range_end_address)
-{
-       u64 end_address = address + size;
-
-       if ((address >= range_start_address) &&
-                       (address < range_end_address))
-               return true;
-
-       if ((end_address >= range_start_address) &&
-                       (end_address < range_end_address))
-               return true;
-
-       if ((address < range_start_address) &&
-                       (end_address >= range_end_address))
-               return true;
-
-       return false;
-}
-
-int hl_device_open(struct inode *inode, struct file *filp);
-int hl_device_open_ctrl(struct inode *inode, struct file *filp);
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
-enum hl_device_status hl_device_status(struct hl_device *hdev);
-int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
-int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
-               enum hl_asic_type asic_type, int minor);
-void destroy_hdev(struct hl_device *hdev);
-int hl_hw_queues_create(struct hl_device *hdev);
-void hl_hw_queues_destroy(struct hl_device *hdev);
-int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
-                               u32 cb_size, u64 cb_ptr);
-int hl_hw_queue_schedule_cs(struct hl_cs *cs);
-u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
-void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
-void hl_int_hw_queue_update_ci(struct hl_cs *cs);
-void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
-
-#define hl_queue_inc_ptr(p)            hl_hw_queue_add_ptr(p, 1)
-#define hl_pi_2_offset(pi)             ((pi) & (HL_QUEUE_LENGTH - 1))
-
-int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
-void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
-int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
-void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
-void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
-void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
-irqreturn_t hl_irq_handler_cq(int irq, void *arg);
-irqreturn_t hl_irq_handler_eq(int irq, void *arg);
-u32 hl_cq_inc_ptr(u32 ptr);
-
-int hl_asid_init(struct hl_device *hdev);
-void hl_asid_fini(struct hl_device *hdev);
-unsigned long hl_asid_alloc(struct hl_device *hdev);
-void hl_asid_free(struct hl_device *hdev, unsigned long asid);
-
-int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
-void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
-int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
-void hl_ctx_do_release(struct kref *ref);
-void hl_ctx_get(struct hl_device *hdev,        struct hl_ctx *ctx);
-int hl_ctx_put(struct hl_ctx *ctx);
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
-void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
-void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
-
-int hl_device_init(struct hl_device *hdev, struct class *hclass);
-void hl_device_fini(struct hl_device *hdev);
-int hl_device_suspend(struct hl_device *hdev);
-int hl_device_resume(struct hl_device *hdev);
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
-                       bool from_hard_reset_thread);
-void hl_hpriv_get(struct hl_fpriv *hpriv);
-void hl_hpriv_put(struct hl_fpriv *hpriv);
-int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
-
-int hl_build_hwmon_channel_info(struct hl_device *hdev,
-               struct armcp_sensor *sensors_arr);
-
-int hl_sysfs_init(struct hl_device *hdev);
-void hl_sysfs_fini(struct hl_device *hdev);
-
-int hl_hwmon_init(struct hl_device *hdev);
-void hl_hwmon_fini(struct hl_device *hdev);
-
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
-               u64 *handle, int ctx_id);
-int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
-int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
-struct hl_cb *hl_cb_get(struct hl_device *hdev,        struct hl_cb_mgr *mgr,
-                       u32 handle);
-void hl_cb_put(struct hl_cb *cb);
-void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
-void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
-int hl_cb_pool_init(struct hl_device *hdev);
-int hl_cb_pool_fini(struct hl_device *hdev);
-
-void hl_cs_rollback_all(struct hl_device *hdev);
-struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
-               enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
-void hl_sob_reset_error(struct kref *ref);
-
-void goya_set_asic_funcs(struct hl_device *hdev);
-void gaudi_set_asic_funcs(struct hl_device *hdev);
-
-int hl_vm_ctx_init(struct hl_ctx *ctx);
-void hl_vm_ctx_fini(struct hl_ctx *ctx);
-
-int hl_vm_init(struct hl_device *hdev);
-void hl_vm_fini(struct hl_device *hdev);
-
-int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
-                       struct hl_userptr *userptr);
-void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
-void hl_userptr_delete_list(struct hl_device *hdev,
-                               struct list_head *userptr_list);
-bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
-                               struct list_head *userptr_list,
-                               struct hl_userptr **userptr);
-
-int hl_mmu_init(struct hl_device *hdev);
-void hl_mmu_fini(struct hl_device *hdev);
-int hl_mmu_ctx_init(struct hl_ctx *ctx);
-void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
-               u32 page_size, bool flush_pte);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
-               bool flush_pte);
-void hl_mmu_swap_out(struct hl_ctx *ctx);
-void hl_mmu_swap_in(struct hl_ctx *ctx);
-
-int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
-                               void __iomem *dst);
-int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
-int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
-                               u16 len, u32 timeout, long *result);
-int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
-int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
-               size_t irq_arr_size);
-int hl_fw_test_cpu_queue(struct hl_device *hdev);
-void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
-                                               dma_addr_t *dma_handle);
-void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
-                                       void *vaddr);
-int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_armcp_info_get(struct hl_device *hdev);
-int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
-int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
-                       u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
-                       u32 boot_err0_reg, bool skip_bmc,
-                       u32 cpu_timeout, u32 boot_fit_timeout);
-
-int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
-                       bool is_wc[3]);
-int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
-int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
-                               u64 addr);
-int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
-               struct hl_inbound_pci_region *pci_region);
-int hl_pci_set_outbound_region(struct hl_device *hdev,
-               struct hl_outbound_pci_region *pci_region);
-int hl_pci_init(struct hl_device *hdev);
-void hl_pci_fini(struct hl_device *hdev);
-
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
-int hl_get_temperature(struct hl_device *hdev,
-                      int sensor_index, u32 attr, long *value);
-int hl_set_temperature(struct hl_device *hdev,
-                      int sensor_index, u32 attr, long value);
-int hl_get_voltage(struct hl_device *hdev,
-                  int sensor_index, u32 attr, long *value);
-int hl_get_current(struct hl_device *hdev,
-                  int sensor_index, u32 attr, long *value);
-int hl_get_fan_speed(struct hl_device *hdev,
-                    int sensor_index, u32 attr, long *value);
-int hl_get_pwm_info(struct hl_device *hdev,
-                   int sensor_index, u32 attr, long *value);
-void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
-                       long value);
-u64 hl_get_max_power(struct hl_device *hdev);
-void hl_set_max_power(struct hl_device *hdev, u64 value);
-int hl_set_voltage(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long value);
-int hl_set_current(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long value);
-
-#ifdef CONFIG_DEBUG_FS
-
-void hl_debugfs_init(void);
-void hl_debugfs_fini(void);
-void hl_debugfs_add_device(struct hl_device *hdev);
-void hl_debugfs_remove_device(struct hl_device *hdev);
-void hl_debugfs_add_file(struct hl_fpriv *hpriv);
-void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
-void hl_debugfs_add_cb(struct hl_cb *cb);
-void hl_debugfs_remove_cb(struct hl_cb *cb);
-void hl_debugfs_add_cs(struct hl_cs *cs);
-void hl_debugfs_remove_cs(struct hl_cs *cs);
-void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
-void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
-void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
-void hl_debugfs_remove_userptr(struct hl_device *hdev,
-                               struct hl_userptr *userptr);
-void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
-void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
-
-#else
-
-static inline void __init hl_debugfs_init(void)
-{
-}
-
-static inline void hl_debugfs_fini(void)
-{
-}
-
-static inline void hl_debugfs_add_device(struct hl_device *hdev)
-{
-}
-
-static inline void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-}
-
-static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
-{
-}
-
-static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
-{
-}
-
-static inline void hl_debugfs_add_cb(struct hl_cb *cb)
-{
-}
-
-static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
-{
-}
-
-static inline void hl_debugfs_add_cs(struct hl_cs *cs)
-{
-}
-
-static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
-{
-}
-
-static inline void hl_debugfs_add_job(struct hl_device *hdev,
-                                       struct hl_cs_job *job)
-{
-}
-
-static inline void hl_debugfs_remove_job(struct hl_device *hdev,
-                                       struct hl_cs_job *job)
-{
-}
-
-static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
-                                       struct hl_userptr *userptr)
-{
-}
-
-static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
-                                       struct hl_userptr *userptr)
-{
-}
-
-static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
-                                       struct hl_ctx *ctx)
-{
-}
-
-static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
-                                       struct hl_ctx *ctx)
-{
-}
-
-#endif
-
-/* IOCTLs */
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
-long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
-
-#endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
deleted file mode 100644 (file)
index f38664b..0000000
+++ /dev/null
@@ -1,529 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#define pr_fmt(fmt)            "habanalabs: " fmt
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-#include <linux/module.h>
-
-#define HL_DRIVER_AUTHOR       "HabanaLabs Kernel Driver Team"
-
-#define HL_DRIVER_DESC         "Driver for HabanaLabs's AI Accelerators"
-
-MODULE_AUTHOR(HL_DRIVER_AUTHOR);
-MODULE_DESCRIPTION(HL_DRIVER_DESC);
-MODULE_LICENSE("GPL v2");
-
-static int hl_major;
-static struct class *hl_class;
-static DEFINE_IDR(hl_devs_idr);
-static DEFINE_MUTEX(hl_devs_idr_lock);
-
-static int timeout_locked = 5;
-static int reset_on_lockup = 1;
-
-module_param(timeout_locked, int, 0444);
-MODULE_PARM_DESC(timeout_locked,
-       "Device lockup timeout in seconds (0 = disabled, default 5s)");
-
-module_param(reset_on_lockup, int, 0444);
-MODULE_PARM_DESC(reset_on_lockup,
-       "Do device reset on lockup (0 = no, 1 = yes, default yes)");
-
-#define PCI_VENDOR_ID_HABANALABS       0x1da3
-
-#define PCI_IDS_GOYA                   0x0001
-#define PCI_IDS_GAUDI                  0x1000
-
-static const struct pci_device_id ids[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
-       { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
-       { 0, }
-};
-MODULE_DEVICE_TABLE(pci, ids);
-
-/*
- * get_asic_type - translate device id to asic type
- *
- * @device: id of the PCI device
- *
- * Translate device id to asic type.
- * In case of unidentified device, return -1
- */
-static enum hl_asic_type get_asic_type(u16 device)
-{
-       enum hl_asic_type asic_type;
-
-       switch (device) {
-       case PCI_IDS_GOYA:
-               asic_type = ASIC_GOYA;
-               break;
-       case PCI_IDS_GAUDI:
-               asic_type = ASIC_GAUDI;
-               break;
-       default:
-               asic_type = ASIC_INVALID;
-               break;
-       }
-
-       return asic_type;
-}
-
-/*
- * hl_device_open - open function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
- *
- * Called when process opens an habanalabs device.
- */
-int hl_device_open(struct inode *inode, struct file *filp)
-{
-       struct hl_device *hdev;
-       struct hl_fpriv *hpriv;
-       int rc;
-
-       mutex_lock(&hl_devs_idr_lock);
-       hdev = idr_find(&hl_devs_idr, iminor(inode));
-       mutex_unlock(&hl_devs_idr_lock);
-
-       if (!hdev) {
-               pr_err("Couldn't find device %d:%d\n",
-                       imajor(inode), iminor(inode));
-               return -ENXIO;
-       }
-
-       hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
-       if (!hpriv)
-               return -ENOMEM;
-
-       hpriv->hdev = hdev;
-       filp->private_data = hpriv;
-       hpriv->filp = filp;
-       mutex_init(&hpriv->restore_phase_mutex);
-       kref_init(&hpriv->refcount);
-       nonseekable_open(inode, filp);
-
-       hl_cb_mgr_init(&hpriv->cb_mgr);
-       hl_ctx_mgr_init(&hpriv->ctx_mgr);
-
-       hpriv->taskpid = find_get_pid(current->pid);
-
-       mutex_lock(&hdev->fpriv_list_lock);
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_err_ratelimited(hdev->dev,
-                       "Can't open %s because it is disabled or in reset\n",
-                       dev_name(hdev->dev));
-               rc = -EPERM;
-               goto out_err;
-       }
-
-       if (hdev->in_debug) {
-               dev_err_ratelimited(hdev->dev,
-                       "Can't open %s because it is being debugged by another user\n",
-                       dev_name(hdev->dev));
-               rc = -EPERM;
-               goto out_err;
-       }
-
-       if (hdev->compute_ctx) {
-               dev_dbg_ratelimited(hdev->dev,
-                       "Can't open %s because another user is working on it\n",
-                       dev_name(hdev->dev));
-               rc = -EBUSY;
-               goto out_err;
-       }
-
-       rc = hl_ctx_create(hdev, hpriv);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to create context %d\n", rc);
-               goto out_err;
-       }
-
-       /* Device is IDLE at this point so it is legal to change PLLs.
-        * There is no need to check anything because if the PLL is
-        * already HIGH, the set function will return without doing
-        * anything
-        */
-       hl_device_set_frequency(hdev, PLL_HIGH);
-
-       list_add(&hpriv->dev_node, &hdev->fpriv_list);
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       hl_debugfs_add_file(hpriv);
-
-       return 0;
-
-out_err:
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
-       hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
-       filp->private_data = NULL;
-       mutex_destroy(&hpriv->restore_phase_mutex);
-       put_pid(hpriv->taskpid);
-
-       kfree(hpriv);
-
-       return rc;
-}
-
-int hl_device_open_ctrl(struct inode *inode, struct file *filp)
-{
-       struct hl_device *hdev;
-       struct hl_fpriv *hpriv;
-       int rc;
-
-       mutex_lock(&hl_devs_idr_lock);
-       hdev = idr_find(&hl_devs_idr, iminor(inode));
-       mutex_unlock(&hl_devs_idr_lock);
-
-       if (!hdev) {
-               pr_err("Couldn't find device %d:%d\n",
-                       imajor(inode), iminor(inode));
-               return -ENXIO;
-       }
-
-       hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
-       if (!hpriv)
-               return -ENOMEM;
-
-       mutex_lock(&hdev->fpriv_list_lock);
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_err_ratelimited(hdev->dev_ctrl,
-                       "Can't open %s because it is disabled or in reset\n",
-                       dev_name(hdev->dev_ctrl));
-               rc = -EPERM;
-               goto out_err;
-       }
-
-       list_add(&hpriv->dev_node, &hdev->fpriv_list);
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       hpriv->hdev = hdev;
-       filp->private_data = hpriv;
-       hpriv->filp = filp;
-       hpriv->is_control = true;
-       nonseekable_open(inode, filp);
-
-       hpriv->taskpid = find_get_pid(current->pid);
-
-       return 0;
-
-out_err:
-       mutex_unlock(&hdev->fpriv_list_lock);
-       kfree(hpriv);
-       return rc;
-}
-
-static void set_driver_behavior_per_device(struct hl_device *hdev)
-{
-       hdev->mmu_enable = 1;
-       hdev->cpu_enable = 1;
-       hdev->fw_loading = 1;
-       hdev->cpu_queues_enable = 1;
-       hdev->heartbeat = 1;
-       hdev->clock_gating = 1;
-
-       hdev->reset_pcilink = 0;
-       hdev->axi_drain = 0;
-       hdev->sram_scrambler_enable = 1;
-       hdev->dram_scrambler_enable = 1;
-       hdev->bmc_enable = 1;
-       hdev->hard_reset_on_fw_events = 1;
-}
-
-/*
- * create_hdev - create habanalabs device instance
- *
- * @dev: will hold the pointer to the new habanalabs device structure
- * @pdev: pointer to the pci device
- * @asic_type: in case of simulator device, which device is it
- * @minor: in case of simulator device, the minor of the device
- *
- * Allocate memory for habanalabs device and initialize basic fields
- * Identify the ASIC type
- * Allocate ID (minor) for the device (only for real devices)
- */
-int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
-               enum hl_asic_type asic_type, int minor)
-{
-       struct hl_device *hdev;
-       int rc, main_id, ctrl_id = 0;
-
-       *dev = NULL;
-
-       hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
-       if (!hdev)
-               return -ENOMEM;
-
-       /* First, we must find out which ASIC are we handling. This is needed
-        * to configure the behavior of the driver (kernel parameters)
-        */
-       if (pdev) {
-               hdev->asic_type = get_asic_type(pdev->device);
-               if (hdev->asic_type == ASIC_INVALID) {
-                       dev_err(&pdev->dev, "Unsupported ASIC\n");
-                       rc = -ENODEV;
-                       goto free_hdev;
-               }
-       } else {
-               hdev->asic_type = asic_type;
-       }
-
-       hdev->major = hl_major;
-       hdev->reset_on_lockup = reset_on_lockup;
-       hdev->pldm = 0;
-
-       set_driver_behavior_per_device(hdev);
-
-       if (timeout_locked)
-               hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
-       else
-               hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
-
-       hdev->disabled = true;
-       hdev->pdev = pdev; /* can be NULL in case of simulator device */
-
-       /* Set default DMA mask to 32 bits */
-       hdev->dma_mask = 32;
-
-       mutex_lock(&hl_devs_idr_lock);
-
-       /* Always save 2 numbers, 1 for main device and 1 for control.
-        * They must be consecutive
-        */
-       main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
-                               GFP_KERNEL);
-
-       if (main_id >= 0)
-               ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
-                                       main_id + 2, GFP_KERNEL);
-
-       mutex_unlock(&hl_devs_idr_lock);
-
-       if ((main_id < 0) || (ctrl_id < 0)) {
-               if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
-                       pr_err("too many devices in the system\n");
-
-               if (main_id >= 0) {
-                       mutex_lock(&hl_devs_idr_lock);
-                       idr_remove(&hl_devs_idr, main_id);
-                       mutex_unlock(&hl_devs_idr_lock);
-               }
-
-               rc = -EBUSY;
-               goto free_hdev;
-       }
-
-       hdev->id = main_id;
-       hdev->id_control = ctrl_id;
-
-       *dev = hdev;
-
-       return 0;
-
-free_hdev:
-       kfree(hdev);
-       return rc;
-}
-
-/*
- * destroy_hdev - destroy habanalabs device instance
- *
- * @dev: pointer to the habanalabs device structure
- *
- */
-void destroy_hdev(struct hl_device *hdev)
-{
-       /* Remove device from the device list */
-       mutex_lock(&hl_devs_idr_lock);
-       idr_remove(&hl_devs_idr, hdev->id);
-       idr_remove(&hl_devs_idr, hdev->id_control);
-       mutex_unlock(&hl_devs_idr_lock);
-
-       kfree(hdev);
-}
-
-static int hl_pmops_suspend(struct device *dev)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       pr_debug("Going to suspend PCI device\n");
-
-       if (!hdev) {
-               pr_err("device pointer is NULL in suspend\n");
-               return 0;
-       }
-
-       return hl_device_suspend(hdev);
-}
-
-static int hl_pmops_resume(struct device *dev)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       pr_debug("Going to resume PCI device\n");
-
-       if (!hdev) {
-               pr_err("device pointer is NULL in resume\n");
-               return 0;
-       }
-
-       return hl_device_resume(hdev);
-}
-
-/*
- * hl_pci_probe - probe PCI habanalabs devices
- *
- * @pdev: pointer to pci device
- * @id: pointer to pci device id structure
- *
- * Standard PCI probe function for habanalabs device.
- * Create a new habanalabs device and initialize it according to the
- * device's type
- */
-static int hl_pci_probe(struct pci_dev *pdev,
-                               const struct pci_device_id *id)
-{
-       struct hl_device *hdev;
-       int rc;
-
-       dev_info(&pdev->dev, HL_NAME
-                " device found [%04x:%04x] (rev %x)\n",
-                (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
-
-       rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
-       if (rc)
-               return rc;
-
-       pci_set_drvdata(pdev, hdev);
-
-       rc = hl_device_init(hdev, hl_class);
-       if (rc) {
-               dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
-               rc = -ENODEV;
-               goto disable_device;
-       }
-
-       return 0;
-
-disable_device:
-       pci_set_drvdata(pdev, NULL);
-       destroy_hdev(hdev);
-
-       return rc;
-}
-
-/*
- * hl_pci_remove - remove PCI habanalabs devices
- *
- * @pdev: pointer to pci device
- *
- * Standard PCI remove function for habanalabs device
- */
-static void hl_pci_remove(struct pci_dev *pdev)
-{
-       struct hl_device *hdev;
-
-       hdev = pci_get_drvdata(pdev);
-       if (!hdev)
-               return;
-
-       hl_device_fini(hdev);
-       pci_set_drvdata(pdev, NULL);
-
-       destroy_hdev(hdev);
-}
-
-static const struct dev_pm_ops hl_pm_ops = {
-       .suspend = hl_pmops_suspend,
-       .resume = hl_pmops_resume,
-};
-
-static struct pci_driver hl_pci_driver = {
-       .name = HL_NAME,
-       .id_table = ids,
-       .probe = hl_pci_probe,
-       .remove = hl_pci_remove,
-       .driver.pm = &hl_pm_ops,
-};
-
-/*
- * hl_init - Initialize the habanalabs kernel driver
- */
-static int __init hl_init(void)
-{
-       int rc;
-       dev_t dev;
-
-       pr_info("loading driver\n");
-
-       rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
-       if (rc < 0) {
-               pr_err("unable to get major\n");
-               return rc;
-       }
-
-       hl_major = MAJOR(dev);
-
-       hl_class = class_create(THIS_MODULE, HL_NAME);
-       if (IS_ERR(hl_class)) {
-               pr_err("failed to allocate class\n");
-               rc = PTR_ERR(hl_class);
-               goto remove_major;
-       }
-
-       hl_debugfs_init();
-
-       rc = pci_register_driver(&hl_pci_driver);
-       if (rc) {
-               pr_err("failed to register pci device\n");
-               goto remove_debugfs;
-       }
-
-       pr_debug("driver loaded\n");
-
-       return 0;
-
-remove_debugfs:
-       hl_debugfs_fini();
-       class_destroy(hl_class);
-remove_major:
-       unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
-       return rc;
-}
-
-/*
- * hl_exit - Release all resources of the habanalabs kernel driver
- */
-static void __exit hl_exit(void)
-{
-       pci_unregister_driver(&hl_pci_driver);
-
-       /*
-        * Removing debugfs must be after all devices or simulator devices
-        * have been removed because otherwise we get a bug in the
-        * debugfs module for referencing NULL objects
-        */
-       hl_debugfs_fini();
-
-       class_destroy(hl_class);
-       unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
-
-       idr_destroy(&hl_devs_idr);
-
-       pr_debug("driver removed\n");
-}
-
-module_init(hl_init);
-module_exit(hl_exit);
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
deleted file mode 100644 (file)
index 5af1c03..0000000
+++ /dev/null
@@ -1,546 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-
-static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
-       [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
-       [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
-       [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm),
-       [HL_DEBUG_OP_FUNNEL] = 0,
-       [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon),
-       [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu),
-       [HL_DEBUG_OP_TIMESTAMP] = 0
-
-};
-
-static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_device_status dev_stat = {0};
-       u32 size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
-       if ((!size) || (!out))
-               return -EINVAL;
-
-       dev_stat.status = hl_device_status(hdev);
-
-       return copy_to_user(out, &dev_stat,
-                       min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0;
-}
-
-static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_hw_ip_info hw_ip = {0};
-       u32 size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 sram_kmd_size, dram_kmd_size;
-
-       if ((!size) || (!out))
-               return -EINVAL;
-
-       sram_kmd_size = (prop->sram_user_base_address -
-                               prop->sram_base_address);
-       dram_kmd_size = (prop->dram_user_base_address -
-                               prop->dram_base_address);
-
-       hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
-       hw_ip.sram_base_address = prop->sram_user_base_address;
-       hw_ip.dram_base_address = prop->dram_user_base_address;
-       hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
-       hw_ip.sram_size = prop->sram_size - sram_kmd_size;
-       hw_ip.dram_size = prop->dram_size - dram_kmd_size;
-       if (hw_ip.dram_size > PAGE_SIZE)
-               hw_ip.dram_enabled = 1;
-       hw_ip.num_of_events = prop->num_of_events;
-
-       memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
-               min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
-
-       memcpy(hw_ip.card_name, prop->armcp_info.card_name,
-               min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
-
-       hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
-       hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
-
-       hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
-       hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
-       hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
-       hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
-
-       return copy_to_user(out, &hw_ip,
-               min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
-}
-
-static int hw_events_info(struct hl_device *hdev, bool aggregate,
-                       struct hl_info_args *args)
-{
-       u32 size, max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-       void *arr;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
-
-       return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
-}
-
-static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_info_dram_usage dram_usage = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 dram_kmd_size;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       dram_kmd_size = (prop->dram_user_base_address -
-                               prop->dram_base_address);
-       dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) -
-                                       atomic64_read(&hdev->dram_used_mem);
-       if (hpriv->ctx)
-               dram_usage.ctx_dram_mem =
-                       atomic64_read(&hpriv->ctx->dram_phys_mem);
-
-       return copy_to_user(out, &dram_usage,
-               min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0;
-}
-
-static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_hw_idle hw_idle = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
-                                       &hw_idle.busy_engines_mask, NULL);
-
-       return copy_to_user(out, &hw_idle,
-               min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
-}
-
-static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
-{
-       struct hl_debug_params *params;
-       void *input = NULL, *output = NULL;
-       int rc;
-
-       params = kzalloc(sizeof(*params), GFP_KERNEL);
-       if (!params)
-               return -ENOMEM;
-
-       params->reg_idx = args->reg_idx;
-       params->enable = args->enable;
-       params->op = args->op;
-
-       if (args->input_ptr && args->input_size) {
-               input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL);
-               if (!input) {
-                       rc = -ENOMEM;
-                       goto out;
-               }
-
-               if (copy_from_user(input, u64_to_user_ptr(args->input_ptr),
-                                       args->input_size)) {
-                       rc = -EFAULT;
-                       dev_err(hdev->dev, "failed to copy input debug data\n");
-                       goto out;
-               }
-
-               params->input = input;
-       }
-
-       if (args->output_ptr && args->output_size) {
-               output = kzalloc(args->output_size, GFP_KERNEL);
-               if (!output) {
-                       rc = -ENOMEM;
-                       goto out;
-               }
-
-               params->output = output;
-               params->output_size = args->output_size;
-       }
-
-       rc = hdev->asic_funcs->debug_coresight(hdev, params);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "debug coresight operation failed %d\n", rc);
-               goto out;
-       }
-
-       if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr,
-                                       output, args->output_size)) {
-               dev_err(hdev->dev, "copy to user failed in debug ioctl\n");
-               rc = -EFAULT;
-               goto out;
-       }
-
-
-out:
-       kfree(params);
-       kfree(output);
-       kfree(input);
-
-       return rc;
-}
-
-static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_device_utilization device_util = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       if ((args->period_ms < 100) || (args->period_ms > 1000) ||
-               (args->period_ms % 100)) {
-               dev_err(hdev->dev,
-                       "period %u must be between 100 - 1000 and must be divisible by 100\n",
-                       args->period_ms);
-               return -EINVAL;
-       }
-
-       device_util.utilization = hl_device_utilization(hdev, args->period_ms);
-
-       return copy_to_user(out, &device_util,
-               min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
-}
-
-static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_clk_rate clk_rate = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-       int rc;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
-                                               &clk_rate.max_clk_rate_mhz);
-       if (rc)
-               return rc;
-
-       return copy_to_user(out, &clk_rate,
-               min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
-}
-
-static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_reset_count reset_count = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
-       reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
-
-       return copy_to_user(out, &reset_count,
-               min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
-}
-
-static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
-{
-       struct hl_info_time_sync time_sync = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
-       time_sync.host_time = ktime_get_raw_ns();
-
-       return copy_to_user(out, &time_sync,
-               min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
-}
-
-static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_info_cs_counters cs_counters = {0};
-       u32 max_size = args->return_size;
-       void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
-       if ((!max_size) || (!out))
-               return -EINVAL;
-
-       memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
-                       sizeof(struct hl_cs_counters));
-
-       if (hpriv->ctx)
-               memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
-                               sizeof(struct hl_cs_counters));
-
-       return copy_to_user(out, &cs_counters,
-               min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
-}
-
-static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
-                               struct device *dev)
-{
-       struct hl_info_args *args = data;
-       struct hl_device *hdev = hpriv->hdev;
-       int rc;
-
-       /*
-        * Information is returned for the following opcodes even if the device
-        * is disabled or in reset.
-        */
-       switch (args->op) {
-       case HL_INFO_HW_IP_INFO:
-               return hw_ip_info(hdev, args);
-
-       case HL_INFO_DEVICE_STATUS:
-               return device_status_info(hdev, args);
-
-       case HL_INFO_RESET_COUNT:
-               return get_reset_count(hdev, args);
-
-       default:
-               break;
-       }
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_warn_ratelimited(dev,
-                       "Device is %s. Can't execute INFO IOCTL\n",
-                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-               return -EBUSY;
-       }
-
-       switch (args->op) {
-       case HL_INFO_HW_EVENTS:
-               rc = hw_events_info(hdev, false, args);
-               break;
-
-       case HL_INFO_DRAM_USAGE:
-               rc = dram_usage_info(hpriv, args);
-               break;
-
-       case HL_INFO_HW_IDLE:
-               rc = hw_idle(hdev, args);
-               break;
-
-       case HL_INFO_DEVICE_UTILIZATION:
-               rc = device_utilization(hdev, args);
-               break;
-
-       case HL_INFO_HW_EVENTS_AGGREGATE:
-               rc = hw_events_info(hdev, true, args);
-               break;
-
-       case HL_INFO_CLK_RATE:
-               rc = get_clk_rate(hdev, args);
-               break;
-
-       case HL_INFO_TIME_SYNC:
-               return time_sync_info(hdev, args);
-
-       case HL_INFO_CS_COUNTERS:
-               return cs_counters_info(hpriv, args);
-
-       default:
-               dev_err(dev, "Invalid request %d\n", args->op);
-               rc = -ENOTTY;
-               break;
-       }
-
-       return rc;
-}
-
-static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
-{
-       return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
-}
-
-static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
-{
-       return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
-}
-
-static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
-{
-       struct hl_debug_args *args = data;
-       struct hl_device *hdev = hpriv->hdev;
-       int rc = 0;
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_warn_ratelimited(hdev->dev,
-                       "Device is %s. Can't execute DEBUG IOCTL\n",
-                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-               return -EBUSY;
-       }
-
-       switch (args->op) {
-       case HL_DEBUG_OP_ETR:
-       case HL_DEBUG_OP_ETF:
-       case HL_DEBUG_OP_STM:
-       case HL_DEBUG_OP_FUNNEL:
-       case HL_DEBUG_OP_BMON:
-       case HL_DEBUG_OP_SPMU:
-       case HL_DEBUG_OP_TIMESTAMP:
-               if (!hdev->in_debug) {
-                       dev_err_ratelimited(hdev->dev,
-                               "Rejecting debug configuration request because device not in debug mode\n");
-                       return -EFAULT;
-               }
-               args->input_size =
-                       min(args->input_size, hl_debug_struct_size[args->op]);
-               rc = debug_coresight(hdev, args);
-               break;
-       case HL_DEBUG_OP_SET_MODE:
-               rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
-               break;
-       default:
-               dev_err(hdev->dev, "Invalid request %d\n", args->op);
-               rc = -ENOTTY;
-               break;
-       }
-
-       return rc;
-}
-
-#define HL_IOCTL_DEF(ioctl, _func) \
-       [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
-
-static const struct hl_ioctl_desc hl_ioctls[] = {
-       HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
-       HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
-       HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
-       HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
-       HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
-       HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
-};
-
-static const struct hl_ioctl_desc hl_ioctls_control[] = {
-       HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
-};
-
-static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
-               const struct hl_ioctl_desc *ioctl, struct device *dev)
-{
-       struct hl_fpriv *hpriv = filep->private_data;
-       struct hl_device *hdev = hpriv->hdev;
-       unsigned int nr = _IOC_NR(cmd);
-       char stack_kdata[128] = {0};
-       char *kdata = NULL;
-       unsigned int usize, asize;
-       hl_ioctl_t *func;
-       u32 hl_size;
-       int retcode;
-
-       if (hdev->hard_reset_pending) {
-               dev_crit_ratelimited(hdev->dev_ctrl,
-                       "Device HARD reset pending! Please close FD\n");
-               return -ENODEV;
-       }
-
-       /* Do not trust userspace, use our own definition */
-       func = ioctl->func;
-
-       if (unlikely(!func)) {
-               dev_dbg(dev, "no function\n");
-               retcode = -ENOTTY;
-               goto out_err;
-       }
-
-       hl_size = _IOC_SIZE(ioctl->cmd);
-       usize = asize = _IOC_SIZE(cmd);
-       if (hl_size > asize)
-               asize = hl_size;
-
-       cmd = ioctl->cmd;
-
-       if (cmd & (IOC_IN | IOC_OUT)) {
-               if (asize <= sizeof(stack_kdata)) {
-                       kdata = stack_kdata;
-               } else {
-                       kdata = kzalloc(asize, GFP_KERNEL);
-                       if (!kdata) {
-                               retcode = -ENOMEM;
-                               goto out_err;
-                       }
-               }
-       }
-
-       if (cmd & IOC_IN) {
-               if (copy_from_user(kdata, (void __user *)arg, usize)) {
-                       retcode = -EFAULT;
-                       goto out_err;
-               }
-       } else if (cmd & IOC_OUT) {
-               memset(kdata, 0, usize);
-       }
-
-       retcode = func(hpriv, kdata);
-
-       if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize))
-               retcode = -EFAULT;
-
-out_err:
-       if (retcode)
-               dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
-                         task_pid_nr(current), cmd, nr);
-
-       if (kdata != stack_kdata)
-               kfree(kdata);
-
-       return retcode;
-}
-
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
-{
-       struct hl_fpriv *hpriv = filep->private_data;
-       struct hl_device *hdev = hpriv->hdev;
-       const struct hl_ioctl_desc *ioctl = NULL;
-       unsigned int nr = _IOC_NR(cmd);
-
-       if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
-               ioctl = &hl_ioctls[nr];
-       } else {
-               dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
-                       task_pid_nr(current), nr);
-               return -ENOTTY;
-       }
-
-       return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
-}
-
-long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
-{
-       struct hl_fpriv *hpriv = filep->private_data;
-       struct hl_device *hdev = hpriv->hdev;
-       const struct hl_ioctl_desc *ioctl = NULL;
-       unsigned int nr = _IOC_NR(cmd);
-
-       if (nr == _IOC_NR(HL_IOCTL_INFO)) {
-               ioctl = &hl_ioctls_control[nr];
-       } else {
-               dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
-                       task_pid_nr(current), nr);
-               return -ENOTTY;
-       }
-
-       return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
-}
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
deleted file mode 100644 (file)
index 2876816..0000000
+++ /dev/null
@@ -1,918 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-/*
- * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
- *
- * @ptr: the current pi/ci value
- * @val: the amount to add
- *
- * Add val to ptr. It can go until twice the queue length.
- */
-inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
-{
-       ptr += val;
-       ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
-       return ptr;
-}
-static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
-{
-       return atomic_read(ci) & ((queue_len << 1) - 1);
-}
-
-static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
-{
-       int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
-
-       if (delta >= 0)
-               return (queue_len - delta);
-       else
-               return (abs(delta) - queue_len);
-}
-
-void hl_int_hw_queue_update_ci(struct hl_cs *cs)
-{
-       struct hl_device *hdev = cs->ctx->hdev;
-       struct hl_hw_queue *q;
-       int i;
-
-       if (hdev->disabled)
-               return;
-
-       q = &hdev->kernel_queues[0];
-       for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
-               if (q->queue_type == QUEUE_TYPE_INT)
-                       atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
-       }
-}
-
-/*
- * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
- *                                H/W queue.
- * @hdev: pointer to habanalabs device structure
- * @q: pointer to habanalabs queue structure
- * @ctl: BD's control word
- * @len: BD's length
- * @ptr: BD's pointer
- *
- * This function assumes there is enough space on the queue to submit a new
- * BD to it. It initializes the next BD and calls the device specific
- * function to set the pi (and doorbell)
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
-                       struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
-{
-       struct hl_bd *bd;
-
-       bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
-       bd += hl_pi_2_offset(q->pi);
-       bd->ctl = cpu_to_le32(ctl);
-       bd->len = cpu_to_le32(len);
-       bd->ptr = cpu_to_le64(ptr);
-
-       q->pi = hl_queue_inc_ptr(q->pi);
-       hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
-}
-
-/*
- * ext_queue_sanity_checks - perform some sanity checks on external queue
- *
- * @hdev              : pointer to hl_device structure
- * @q                 :        pointer to hl_hw_queue structure
- * @num_of_entries    : how many entries to check for space
- * @reserve_cq_entry  :        whether to reserve an entry in the cq
- *
- * H/W queues spinlock should be taken before calling this function
- *
- * Perform the following:
- * - Make sure we have enough space in the h/w queue
- * - Make sure we have enough space in the completion queue
- * - Reserve space in the completion queue (needs to be reversed if there
- *   is a failure down the road before the actual submission of work). Only
- *   do this action if reserve_cq_entry is true
- *
- */
-static int ext_queue_sanity_checks(struct hl_device *hdev,
-                               struct hl_hw_queue *q, int num_of_entries,
-                               bool reserve_cq_entry)
-{
-       atomic_t *free_slots =
-                       &hdev->completion_queue[q->cq_id].free_slots_cnt;
-       int free_slots_cnt;
-
-       /* Check we have enough space in the queue */
-       free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
-
-       if (free_slots_cnt < num_of_entries) {
-               dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
-                       q->hw_queue_id, num_of_entries);
-               return -EAGAIN;
-       }
-
-       if (reserve_cq_entry) {
-               /*
-                * Check we have enough space in the completion queue
-                * Add -1 to counter (decrement) unless counter was already 0
-                * In that case, CQ is full so we can't submit a new CB because
-                * we won't get ack on its completion
-                * atomic_add_unless will return 0 if counter was already 0
-                */
-               if (atomic_add_negative(num_of_entries * -1, free_slots)) {
-                       dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
-                               num_of_entries, q->hw_queue_id);
-                       atomic_add(num_of_entries, free_slots);
-                       return -EAGAIN;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * int_queue_sanity_checks - perform some sanity checks on internal queue
- *
- * @hdev              : pointer to hl_device structure
- * @q                 :        pointer to hl_hw_queue structure
- * @num_of_entries    : how many entries to check for space
- *
- * H/W queues spinlock should be taken before calling this function
- *
- * Perform the following:
- * - Make sure we have enough space in the h/w queue
- *
- */
-static int int_queue_sanity_checks(struct hl_device *hdev,
-                                       struct hl_hw_queue *q,
-                                       int num_of_entries)
-{
-       int free_slots_cnt;
-
-       if (num_of_entries > q->int_queue_len) {
-               dev_err(hdev->dev,
-                       "Cannot populate queue %u with %u jobs\n",
-                       q->hw_queue_id, num_of_entries);
-               return -ENOMEM;
-       }
-
-       /* Check we have enough space in the queue */
-       free_slots_cnt = queue_free_slots(q, q->int_queue_len);
-
-       if (free_slots_cnt < num_of_entries) {
-               dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
-                       q->hw_queue_id, num_of_entries);
-               return -EAGAIN;
-       }
-
-       return 0;
-}
-
-/*
- * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
- * @hdev: Pointer to hl_device structure.
- * @q: Pointer to hl_hw_queue structure.
- * @num_of_entries: How many entries to check for space.
- *
- * Notice: We do not reserve queue entries so this function mustn't be called
- *         more than once per CS for the same queue
- *
- */
-static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
-                                       int num_of_entries)
-{
-       int free_slots_cnt;
-
-       /* Check we have enough space in the queue */
-       free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
-
-       if (free_slots_cnt < num_of_entries) {
-               dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
-                       q->hw_queue_id, num_of_entries);
-               return -EAGAIN;
-       }
-
-       return 0;
-}
-
-/*
- * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
- *
- * @hdev: pointer to hl_device structure
- * @hw_queue_id: Queue's type
- * @cb_size: size of CB
- * @cb_ptr: pointer to CB location
- *
- * This function sends a single CB, that must NOT generate a completion entry
- *
- */
-int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
-                               u32 cb_size, u64 cb_ptr)
-{
-       struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
-       int rc = 0;
-
-       /*
-        * The CPU queue is a synchronous queue with an effective depth of
-        * a single entry (although it is allocated with room for multiple
-        * entries). Therefore, there is a different lock, called
-        * send_cpu_message_lock, that serializes accesses to the CPU queue.
-        * As a result, we don't need to lock the access to the entire H/W
-        * queues module when submitting a JOB to the CPU queue
-        */
-       if (q->queue_type != QUEUE_TYPE_CPU)
-               hdev->asic_funcs->hw_queues_lock(hdev);
-
-       if (hdev->disabled) {
-               rc = -EPERM;
-               goto out;
-       }
-
-       /*
-        * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
-        * type only on init phase, when the queues are empty and being tested,
-        * so there is no need for sanity checks.
-        */
-       if (q->queue_type != QUEUE_TYPE_HW) {
-               rc = ext_queue_sanity_checks(hdev, q, 1, false);
-               if (rc)
-                       goto out;
-       }
-
-       ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
-
-out:
-       if (q->queue_type != QUEUE_TYPE_CPU)
-               hdev->asic_funcs->hw_queues_unlock(hdev);
-
-       return rc;
-}
-
-/*
- * ext_queue_schedule_job - submit a JOB to an external queue
- *
- * @job: pointer to the job that needs to be submitted to the queue
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void ext_queue_schedule_job(struct hl_cs_job *job)
-{
-       struct hl_device *hdev = job->cs->ctx->hdev;
-       struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
-       struct hl_cq_entry cq_pkt;
-       struct hl_cq *cq;
-       u64 cq_addr;
-       struct hl_cb *cb;
-       u32 ctl;
-       u32 len;
-       u64 ptr;
-
-       /*
-        * Update the JOB ID inside the BD CTL so the device would know what
-        * to write in the completion queue
-        */
-       ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
-
-       cb = job->patched_cb;
-       len = job->job_cb_size;
-       ptr = cb->bus_address;
-
-       cq_pkt.data = cpu_to_le32(
-                               ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
-                                       & CQ_ENTRY_SHADOW_INDEX_MASK) |
-                               (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
-                               (1 << CQ_ENTRY_READY_SHIFT));
-
-       /*
-        * No need to protect pi_offset because scheduling to the
-        * H/W queues is done under the scheduler mutex
-        *
-        * No need to check if CQ is full because it was already
-        * checked in ext_queue_sanity_checks
-        */
-       cq = &hdev->completion_queue[q->cq_id];
-       cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
-
-       hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
-                                               cq_addr,
-                                               le32_to_cpu(cq_pkt.data),
-                                               q->msi_vec,
-                                               job->contains_dma_pkt);
-
-       q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
-
-       cq->pi = hl_cq_inc_ptr(cq->pi);
-
-       ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
-}
-
-/*
- * int_queue_schedule_job - submit a JOB to an internal queue
- *
- * @job: pointer to the job that needs to be submitted to the queue
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void int_queue_schedule_job(struct hl_cs_job *job)
-{
-       struct hl_device *hdev = job->cs->ctx->hdev;
-       struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
-       struct hl_bd bd;
-       __le64 *pi;
-
-       bd.ctl = 0;
-       bd.len = cpu_to_le32(job->job_cb_size);
-       bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
-
-       pi = (__le64 *) (uintptr_t) (q->kernel_address +
-               ((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
-
-       q->pi++;
-       q->pi &= ((q->int_queue_len << 1) - 1);
-
-       hdev->asic_funcs->pqe_write(hdev, pi, &bd);
-
-       hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
-}
-
-/*
- * hw_queue_schedule_job - submit a JOB to a H/W queue
- *
- * @job: pointer to the job that needs to be submitted to the queue
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void hw_queue_schedule_job(struct hl_cs_job *job)
-{
-       struct hl_device *hdev = job->cs->ctx->hdev;
-       struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
-       u64 ptr;
-       u32 offset, ctl, len;
-
-       /*
-        * Upon PQE completion, COMP_DATA is used as the write data to the
-        * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
-        * write address offset in the SM block (QMAN LBW message).
-        * The write address offset is calculated as "COMP_OFFSET << 2".
-        */
-       offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
-       ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
-               ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
-
-       len = job->job_cb_size;
-
-       /*
-        * A patched CB is created only if a user CB was allocated by driver and
-        * MMU is disabled. If MMU is enabled, the user CB should be used
-        * instead. If the user CB wasn't allocated by driver, assume that it
-        * holds an address.
-        */
-       if (job->patched_cb)
-               ptr = job->patched_cb->bus_address;
-       else if (job->is_kernel_allocated_cb)
-               ptr = job->user_cb->bus_address;
-       else
-               ptr = (u64) (uintptr_t) job->user_cb;
-
-       ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
-}
-
-/*
- * init_signal_wait_cs - initialize a signal/wait CS
- * @cs: pointer to the signal/wait CS
- *
- * H/W queues spinlock should be taken before calling this function
- */
-static void init_signal_wait_cs(struct hl_cs *cs)
-{
-       struct hl_ctx *ctx = cs->ctx;
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_hw_queue *hw_queue;
-       struct hl_cs_compl *cs_cmpl =
-                       container_of(cs->fence, struct hl_cs_compl, base_fence);
-
-       struct hl_hw_sob *hw_sob;
-       struct hl_cs_job *job;
-       u32 q_idx;
-
-       /* There is only one job in a signal/wait CS */
-       job = list_first_entry(&cs->job_list, struct hl_cs_job,
-                               cs_node);
-       q_idx = job->hw_queue_id;
-       hw_queue = &hdev->kernel_queues[q_idx];
-
-       if (cs->type & CS_TYPE_SIGNAL) {
-               hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset];
-
-               cs_cmpl->hw_sob = hw_sob;
-               cs_cmpl->sob_val = hw_queue->next_sob_val++;
-
-               dev_dbg(hdev->dev,
-                       "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
-                       cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
-
-               hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
-                                       cs_cmpl->hw_sob->sob_id);
-
-               kref_get(&hw_sob->kref);
-
-               /* check for wraparound */
-               if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) {
-                       /*
-                        * Decrement as we reached the max value.
-                        * The release function won't be called here as we've
-                        * just incremented the refcount.
-                        */
-                       kref_put(&hw_sob->kref, hl_sob_reset_error);
-                       hw_queue->next_sob_val = 1;
-                       /* only two SOBs are currently in use */
-                       hw_queue->curr_sob_offset =
-                                       (hw_queue->curr_sob_offset + 1) %
-                                               HL_RSVD_SOBS_IN_USE;
-
-                       dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
-                                       hw_queue->curr_sob_offset, q_idx);
-               }
-       } else if (cs->type & CS_TYPE_WAIT) {
-               struct hl_cs_compl *signal_cs_cmpl;
-
-               signal_cs_cmpl = container_of(cs->signal_fence,
-                                               struct hl_cs_compl,
-                                               base_fence);
-
-               /* copy the the SOB id and value of the signal CS */
-               cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-               cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
-
-               dev_dbg(hdev->dev,
-                       "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
-                       cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
-                       hw_queue->base_mon_id, q_idx);
-
-               hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb,
-                                               cs_cmpl->hw_sob->sob_id,
-                                               cs_cmpl->sob_val,
-                                               hw_queue->base_mon_id,
-                                               q_idx);
-
-               kref_get(&cs_cmpl->hw_sob->kref);
-               /*
-                * Must put the signal fence after the SOB refcnt increment so
-                * the SOB refcnt won't turn 0 and reset the SOB before the
-                * wait CS was submitted.
-                */
-               mb();
-               dma_fence_put(cs->signal_fence);
-               cs->signal_fence = NULL;
-       }
-}
-
-/*
- * hl_hw_queue_schedule_cs - schedule a command submission
- * @cs: pointer to the CS
- */
-int hl_hw_queue_schedule_cs(struct hl_cs *cs)
-{
-       struct hl_ctx *ctx = cs->ctx;
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_cs_job *job, *tmp;
-       struct hl_hw_queue *q;
-       u32 max_queues;
-       int rc = 0, i, cq_cnt;
-
-       hdev->asic_funcs->hw_queues_lock(hdev);
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               ctx->cs_counters.device_in_reset_drop_cnt++;
-               dev_err(hdev->dev,
-                       "device is disabled or in reset, CS rejected!\n");
-               rc = -EPERM;
-               goto out;
-       }
-
-       max_queues = hdev->asic_prop.max_queues;
-
-       q = &hdev->kernel_queues[0];
-       for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
-               if (cs->jobs_in_queue_cnt[i]) {
-                       switch (q->queue_type) {
-                       case QUEUE_TYPE_EXT:
-                               rc = ext_queue_sanity_checks(hdev, q,
-                                               cs->jobs_in_queue_cnt[i], true);
-                               break;
-                       case QUEUE_TYPE_INT:
-                               rc = int_queue_sanity_checks(hdev, q,
-                                               cs->jobs_in_queue_cnt[i]);
-                               break;
-                       case QUEUE_TYPE_HW:
-                               rc = hw_queue_sanity_checks(hdev, q,
-                                               cs->jobs_in_queue_cnt[i]);
-                               break;
-                       default:
-                               dev_err(hdev->dev, "Queue type %d is invalid\n",
-                                       q->queue_type);
-                               rc = -EINVAL;
-                               break;
-                       }
-
-                       if (rc) {
-                               ctx->cs_counters.queue_full_drop_cnt++;
-                               goto unroll_cq_resv;
-                       }
-
-                       if (q->queue_type == QUEUE_TYPE_EXT)
-                               cq_cnt++;
-               }
-       }
-
-       if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
-               init_signal_wait_cs(cs);
-
-       spin_lock(&hdev->hw_queues_mirror_lock);
-       list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
-
-       /* Queue TDR if the CS is the first entry and if timeout is wanted */
-       if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
-                       (list_first_entry(&hdev->hw_queues_mirror_list,
-                                       struct hl_cs, mirror_node) == cs)) {
-               cs->tdr_active = true;
-               schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
-               spin_unlock(&hdev->hw_queues_mirror_lock);
-       } else {
-               spin_unlock(&hdev->hw_queues_mirror_lock);
-       }
-
-       if (!hdev->cs_active_cnt++) {
-               struct hl_device_idle_busy_ts *ts;
-
-               ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
-               ts->busy_to_idle_ts = ktime_set(0, 0);
-               ts->idle_to_busy_ts = ktime_get();
-       }
-
-       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
-               switch (job->queue_type) {
-               case QUEUE_TYPE_EXT:
-                       ext_queue_schedule_job(job);
-                       break;
-               case QUEUE_TYPE_INT:
-                       int_queue_schedule_job(job);
-                       break;
-               case QUEUE_TYPE_HW:
-                       hw_queue_schedule_job(job);
-                       break;
-               default:
-                       break;
-               }
-
-       cs->submitted = true;
-
-       goto out;
-
-unroll_cq_resv:
-       q = &hdev->kernel_queues[0];
-       for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
-               if ((q->queue_type == QUEUE_TYPE_EXT) &&
-                                               (cs->jobs_in_queue_cnt[i])) {
-                       atomic_t *free_slots =
-                               &hdev->completion_queue[i].free_slots_cnt;
-                       atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
-                       cq_cnt--;
-               }
-       }
-
-out:
-       hdev->asic_funcs->hw_queues_unlock(hdev);
-
-       return rc;
-}
-
-/*
- * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
- *
- * @hdev: pointer to hl_device structure
- * @hw_queue_id: which queue to increment its ci
- */
-void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
-{
-       struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
-
-       atomic_inc(&q->ci);
-}
-
-static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
-                                       bool is_cpu_queue)
-{
-       void *p;
-       int rc;
-
-       if (is_cpu_queue)
-               p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
-                                                       HL_QUEUE_SIZE_IN_BYTES,
-                                                       &q->bus_address);
-       else
-               p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
-                                               HL_QUEUE_SIZE_IN_BYTES,
-                                               &q->bus_address,
-                                               GFP_KERNEL | __GFP_ZERO);
-       if (!p)
-               return -ENOMEM;
-
-       q->kernel_address = (u64) (uintptr_t) p;
-
-       q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
-                                       sizeof(*q->shadow_queue),
-                                       GFP_KERNEL);
-       if (!q->shadow_queue) {
-               dev_err(hdev->dev,
-                       "Failed to allocate shadow queue for H/W queue %d\n",
-                       q->hw_queue_id);
-               rc = -ENOMEM;
-               goto free_queue;
-       }
-
-       /* Make sure read/write pointers are initialized to start of queue */
-       atomic_set(&q->ci, 0);
-       q->pi = 0;
-
-       return 0;
-
-free_queue:
-       if (is_cpu_queue)
-               hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
-                                       HL_QUEUE_SIZE_IN_BYTES,
-                                       (void *) (uintptr_t) q->kernel_address);
-       else
-               hdev->asic_funcs->asic_dma_free_coherent(hdev,
-                                       HL_QUEUE_SIZE_IN_BYTES,
-                                       (void *) (uintptr_t) q->kernel_address,
-                                       q->bus_address);
-
-       return rc;
-}
-
-static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
-       void *p;
-
-       p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
-                                       &q->bus_address, &q->int_queue_len);
-       if (!p) {
-               dev_err(hdev->dev,
-                       "Failed to get base address for internal queue %d\n",
-                       q->hw_queue_id);
-               return -EFAULT;
-       }
-
-       q->kernel_address = (u64) (uintptr_t) p;
-       q->pi = 0;
-       atomic_set(&q->ci, 0);
-
-       return 0;
-}
-
-static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
-       return ext_and_cpu_queue_init(hdev, q, true);
-}
-
-static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
-       return ext_and_cpu_queue_init(hdev, q, false);
-}
-
-static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
-       void *p;
-
-       p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
-                                               HL_QUEUE_SIZE_IN_BYTES,
-                                               &q->bus_address,
-                                               GFP_KERNEL | __GFP_ZERO);
-       if (!p)
-               return -ENOMEM;
-
-       q->kernel_address = (u64) (uintptr_t) p;
-
-       /* Make sure read/write pointers are initialized to start of queue */
-       atomic_set(&q->ci, 0);
-       q->pi = 0;
-
-       return 0;
-}
-
-static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
-{
-       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_hw_sob *hw_sob;
-       int sob, queue_idx = hdev->sync_stream_queue_idx++;
-
-       hw_queue->base_sob_id =
-               prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
-       hw_queue->base_mon_id =
-               prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
-       hw_queue->next_sob_val = 1;
-       hw_queue->curr_sob_offset = 0;
-
-       for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
-               hw_sob = &hw_queue->hw_sob[sob];
-               hw_sob->hdev = hdev;
-               hw_sob->sob_id = hw_queue->base_sob_id + sob;
-               hw_sob->q_idx = q_idx;
-               kref_init(&hw_sob->kref);
-       }
-}
-
-static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
-{
-       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-
-       /*
-        * In case we got here due to a stuck CS, the refcnt might be bigger
-        * than 1 and therefore we reset it.
-        */
-       kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
-       hw_queue->curr_sob_offset = 0;
-       hw_queue->next_sob_val = 1;
-}
-
-/*
- * queue_init - main initialization function for H/W queue object
- *
- * @hdev: pointer to hl_device device structure
- * @q: pointer to hl_hw_queue queue structure
- * @hw_queue_id: The id of the H/W queue
- *
- * Allocate dma-able memory for the queue and initialize fields
- * Returns 0 on success
- */
-static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
-                       u32 hw_queue_id)
-{
-       int rc;
-
-       q->hw_queue_id = hw_queue_id;
-
-       switch (q->queue_type) {
-       case QUEUE_TYPE_EXT:
-               rc = ext_queue_init(hdev, q);
-               break;
-       case QUEUE_TYPE_INT:
-               rc = int_queue_init(hdev, q);
-               break;
-       case QUEUE_TYPE_CPU:
-               rc = cpu_queue_init(hdev, q);
-               break;
-       case QUEUE_TYPE_HW:
-               rc = hw_queue_init(hdev, q);
-               break;
-       case QUEUE_TYPE_NA:
-               q->valid = 0;
-               return 0;
-       default:
-               dev_crit(hdev->dev, "wrong queue type %d during init\n",
-                       q->queue_type);
-               rc = -EINVAL;
-               break;
-       }
-
-       if (q->supports_sync_stream)
-               sync_stream_queue_init(hdev, q->hw_queue_id);
-
-       if (rc)
-               return rc;
-
-       q->valid = 1;
-
-       return 0;
-}
-
-/*
- * hw_queue_fini - destroy queue
- *
- * @hdev: pointer to hl_device device structure
- * @q: pointer to hl_hw_queue queue structure
- *
- * Free the queue memory
- */
-static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
-{
-       if (!q->valid)
-               return;
-
-       /*
-        * If we arrived here, there are no jobs waiting on this queue
-        * so we can safely remove it.
-        * This is because this function can only called when:
-        * 1. Either a context is deleted, which only can occur if all its
-        *    jobs were finished
-        * 2. A context wasn't able to be created due to failure or timeout,
-        *    which means there are no jobs on the queue yet
-        *
-        * The only exception are the queues of the kernel context, but
-        * if they are being destroyed, it means that the entire module is
-        * being removed. If the module is removed, it means there is no open
-        * user context. It also means that if a job was submitted by
-        * the kernel driver (e.g. context creation), the job itself was
-        * released by the kernel driver when a timeout occurred on its
-        * Completion. Thus, we don't need to release it again.
-        */
-
-       if (q->queue_type == QUEUE_TYPE_INT)
-               return;
-
-       kfree(q->shadow_queue);
-
-       if (q->queue_type == QUEUE_TYPE_CPU)
-               hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
-                                       HL_QUEUE_SIZE_IN_BYTES,
-                                       (void *) (uintptr_t) q->kernel_address);
-       else
-               hdev->asic_funcs->asic_dma_free_coherent(hdev,
-                                       HL_QUEUE_SIZE_IN_BYTES,
-                                       (void *) (uintptr_t) q->kernel_address,
-                                       q->bus_address);
-}
-
-int hl_hw_queues_create(struct hl_device *hdev)
-{
-       struct asic_fixed_properties *asic = &hdev->asic_prop;
-       struct hl_hw_queue *q;
-       int i, rc, q_ready_cnt;
-
-       hdev->kernel_queues = kcalloc(asic->max_queues,
-                               sizeof(*hdev->kernel_queues), GFP_KERNEL);
-
-       if (!hdev->kernel_queues) {
-               dev_err(hdev->dev, "Not enough memory for H/W queues\n");
-               return -ENOMEM;
-       }
-
-       /* Initialize the H/W queues */
-       for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
-                       i < asic->max_queues ; i++, q_ready_cnt++, q++) {
-
-               q->queue_type = asic->hw_queues_props[i].type;
-               q->supports_sync_stream =
-                               asic->hw_queues_props[i].supports_sync_stream;
-               rc = queue_init(hdev, q, i);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "failed to initialize queue %d\n", i);
-                       goto release_queues;
-               }
-       }
-
-       return 0;
-
-release_queues:
-       for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
-               queue_fini(hdev, q);
-
-       kfree(hdev->kernel_queues);
-
-       return rc;
-}
-
-void hl_hw_queues_destroy(struct hl_device *hdev)
-{
-       struct hl_hw_queue *q;
-       u32 max_queues = hdev->asic_prop.max_queues;
-       int i;
-
-       for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
-               queue_fini(hdev, q);
-
-       kfree(hdev->kernel_queues);
-}
-
-void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
-{
-       struct hl_hw_queue *q;
-       u32 max_queues = hdev->asic_prop.max_queues;
-       int i;
-
-       for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
-               if ((!q->valid) ||
-                       ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
-                       continue;
-               q->pi = 0;
-               atomic_set(&q->ci, 0);
-
-               if (q->supports_sync_stream)
-                       sync_stream_queue_reset(hdev, q->hw_queue_id);
-       }
-}
diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c
deleted file mode 100644 (file)
index 8c6cd77..0000000
+++ /dev/null
@@ -1,579 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-#include <linux/hwmon.h>
-
-#define SENSORS_PKT_TIMEOUT            1000000 /* 1s */
-#define HWMON_NR_SENSOR_TYPES          (hwmon_pwm + 1)
-
-int hl_build_hwmon_channel_info(struct hl_device *hdev,
-                               struct armcp_sensor *sensors_arr)
-{
-       u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
-       u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
-       u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
-       struct hwmon_channel_info **channels_info;
-       u32 num_sensors_for_type, num_active_sensor_types = 0,
-                       arr_size = 0, *curr_arr;
-       enum hwmon_sensor_types type;
-       int rc, i, j;
-
-       for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
-               type = le32_to_cpu(sensors_arr[i].type);
-
-               if ((type == 0) && (sensors_arr[i].flags == 0))
-                       break;
-
-               if (type >= HWMON_NR_SENSOR_TYPES) {
-                       dev_err(hdev->dev,
-                               "Got wrong sensor type %d from device\n", type);
-                       return -EINVAL;
-               }
-
-               counts[type]++;
-               arr_size++;
-       }
-
-       for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
-               if (counts[i] == 0)
-                       continue;
-
-               num_sensors_for_type = counts[i] + 1;
-               curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
-                               GFP_KERNEL);
-               if (!curr_arr) {
-                       rc = -ENOMEM;
-                       goto sensors_type_err;
-               }
-
-               num_active_sensor_types++;
-               sensors_by_type[i] = curr_arr;
-       }
-
-       for (i = 0 ; i < arr_size ; i++) {
-               type = le32_to_cpu(sensors_arr[i].type);
-               curr_arr = sensors_by_type[type];
-               curr_arr[sensors_by_type_next_index[type]++] =
-                               le32_to_cpu(sensors_arr[i].flags);
-       }
-
-       channels_info = kcalloc(num_active_sensor_types + 1,
-                       sizeof(*channels_info), GFP_KERNEL);
-       if (!channels_info) {
-               rc = -ENOMEM;
-               goto channels_info_array_err;
-       }
-
-       for (i = 0 ; i < num_active_sensor_types ; i++) {
-               channels_info[i] = kzalloc(sizeof(*channels_info[i]),
-                               GFP_KERNEL);
-               if (!channels_info[i]) {
-                       rc = -ENOMEM;
-                       goto channel_info_err;
-               }
-       }
-
-       for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
-               if (!sensors_by_type[i])
-                       continue;
-
-               channels_info[j]->type = i;
-               channels_info[j]->config = sensors_by_type[i];
-               j++;
-       }
-
-       hdev->hl_chip_info->info =
-                       (const struct hwmon_channel_info **)channels_info;
-
-       return 0;
-
-channel_info_err:
-       for (i = 0 ; i < num_active_sensor_types ; i++)
-               if (channels_info[i]) {
-                       kfree(channels_info[i]->config);
-                       kfree(channels_info[i]);
-               }
-       kfree(channels_info);
-channels_info_array_err:
-sensors_type_err:
-       for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
-               kfree(sensors_by_type[i]);
-
-       return rc;
-}
-
-static int hl_read(struct device *dev, enum hwmon_sensor_types type,
-                       u32 attr, int channel, long *val)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               return -ENODEV;
-
-       switch (type) {
-       case hwmon_temp:
-               switch (attr) {
-               case hwmon_temp_input:
-               case hwmon_temp_max:
-               case hwmon_temp_crit:
-               case hwmon_temp_max_hyst:
-               case hwmon_temp_crit_hyst:
-               case hwmon_temp_offset:
-               case hwmon_temp_highest:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               rc = hl_get_temperature(hdev, channel, attr, val);
-               break;
-       case hwmon_in:
-               switch (attr) {
-               case hwmon_in_input:
-               case hwmon_in_min:
-               case hwmon_in_max:
-               case hwmon_in_highest:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               rc = hl_get_voltage(hdev, channel, attr, val);
-               break;
-       case hwmon_curr:
-               switch (attr) {
-               case hwmon_curr_input:
-               case hwmon_curr_min:
-               case hwmon_curr_max:
-               case hwmon_curr_highest:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               rc = hl_get_current(hdev, channel, attr, val);
-               break;
-       case hwmon_fan:
-               switch (attr) {
-               case hwmon_fan_input:
-               case hwmon_fan_min:
-               case hwmon_fan_max:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               rc = hl_get_fan_speed(hdev, channel, attr, val);
-               break;
-       case hwmon_pwm:
-               switch (attr) {
-               case hwmon_pwm_input:
-               case hwmon_pwm_enable:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               rc = hl_get_pwm_info(hdev, channel, attr, val);
-               break;
-       default:
-               return -EINVAL;
-       }
-       return rc;
-}
-
-static int hl_write(struct device *dev, enum hwmon_sensor_types type,
-                       u32 attr, int channel, long val)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               return -ENODEV;
-
-       switch (type) {
-       case hwmon_temp:
-               switch (attr) {
-               case hwmon_temp_offset:
-               case hwmon_temp_reset_history:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               hl_set_temperature(hdev, channel, attr, val);
-               break;
-       case hwmon_pwm:
-               switch (attr) {
-               case hwmon_pwm_input:
-               case hwmon_pwm_enable:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               hl_set_pwm_info(hdev, channel, attr, val);
-               break;
-       case hwmon_in:
-               switch (attr) {
-               case hwmon_in_reset_history:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               hl_set_voltage(hdev, channel, attr, val);
-               break;
-       case hwmon_curr:
-               switch (attr) {
-               case hwmon_curr_reset_history:
-                       break;
-               default:
-                       return -EINVAL;
-               }
-               hl_set_current(hdev, channel, attr, val);
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
-                               u32 attr, int channel)
-{
-       switch (type) {
-       case hwmon_temp:
-               switch (attr) {
-               case hwmon_temp_input:
-               case hwmon_temp_max:
-               case hwmon_temp_max_hyst:
-               case hwmon_temp_crit:
-               case hwmon_temp_crit_hyst:
-               case hwmon_temp_highest:
-                       return 0444;
-               case hwmon_temp_offset:
-                       return 0644;
-               case hwmon_temp_reset_history:
-                       return 0200;
-               }
-               break;
-       case hwmon_in:
-               switch (attr) {
-               case hwmon_in_input:
-               case hwmon_in_min:
-               case hwmon_in_max:
-               case hwmon_in_highest:
-                       return 0444;
-               case hwmon_in_reset_history:
-                       return 0200;
-               }
-               break;
-       case hwmon_curr:
-               switch (attr) {
-               case hwmon_curr_input:
-               case hwmon_curr_min:
-               case hwmon_curr_max:
-               case hwmon_curr_highest:
-                       return 0444;
-               case hwmon_curr_reset_history:
-                       return 0200;
-               }
-               break;
-       case hwmon_fan:
-               switch (attr) {
-               case hwmon_fan_input:
-               case hwmon_fan_min:
-               case hwmon_fan_max:
-                       return 0444;
-               }
-               break;
-       case hwmon_pwm:
-               switch (attr) {
-               case hwmon_pwm_input:
-               case hwmon_pwm_enable:
-                       return 0644;
-               }
-               break;
-       default:
-               break;
-       }
-       return 0;
-}
-
-static const struct hwmon_ops hl_hwmon_ops = {
-       .is_visible = hl_is_visible,
-       .read = hl_read,
-       .write = hl_write
-};
-
-int hl_get_temperature(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long *value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                       SENSORS_PKT_TIMEOUT, value);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to get temperature from sensor %d, error %d\n",
-                       sensor_index, rc);
-               *value = 0;
-       }
-
-       return rc;
-}
-
-int hl_set_temperature(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-       pkt.value = __cpu_to_le64(value);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
-
-       if (rc)
-               dev_err(hdev->dev,
-                       "Failed to set temperature of sensor %d, error %d\n",
-                       sensor_index, rc);
-
-       return rc;
-}
-
-int hl_get_voltage(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long *value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to get voltage from sensor %d, error %d\n",
-                       sensor_index, rc);
-               *value = 0;
-       }
-
-       return rc;
-}
-
-int hl_get_current(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long *value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to get current from sensor %d, error %d\n",
-                       sensor_index, rc);
-               *value = 0;
-       }
-
-       return rc;
-}
-
-int hl_get_fan_speed(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long *value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to get fan speed from sensor %d, error %d\n",
-                       sensor_index, rc);
-               *value = 0;
-       }
-
-       return rc;
-}
-
-int hl_get_pwm_info(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long *value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to get pwm info from sensor %d, error %d\n",
-                       sensor_index, rc);
-               *value = 0;
-       }
-
-       return rc;
-}
-
-void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
-                       long value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-       pkt.value = cpu_to_le64(value);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, NULL);
-
-       if (rc)
-               dev_err(hdev->dev,
-                       "Failed to set pwm info to sensor %d, error %d\n",
-                       sensor_index, rc);
-}
-
-int hl_set_voltage(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-       pkt.value = __cpu_to_le64(value);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
-
-       if (rc)
-               dev_err(hdev->dev,
-                       "Failed to set voltage of sensor %d, error %d\n",
-                       sensor_index, rc);
-
-       return rc;
-}
-
-int hl_set_current(struct hl_device *hdev,
-                       int sensor_index, u32 attr, long value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.sensor_index = __cpu_to_le16(sensor_index);
-       pkt.type = __cpu_to_le16(attr);
-       pkt.value = __cpu_to_le64(value);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
-
-       if (rc)
-               dev_err(hdev->dev,
-                       "Failed to set current of sensor %d, error %d\n",
-                       sensor_index, rc);
-
-       return rc;
-}
-
-int hl_hwmon_init(struct hl_device *hdev)
-{
-       struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       int rc;
-
-       if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
-               return 0;
-
-       if (hdev->hl_chip_info->info) {
-               hdev->hl_chip_info->ops = &hl_hwmon_ops;
-
-               hdev->hwmon_dev = hwmon_device_register_with_info(dev,
-                                       prop->armcp_info.card_name, hdev,
-                                       hdev->hl_chip_info, NULL);
-               if (IS_ERR(hdev->hwmon_dev)) {
-                       rc = PTR_ERR(hdev->hwmon_dev);
-                       dev_err(hdev->dev,
-                               "Unable to register hwmon device: %d\n", rc);
-                       return rc;
-               }
-
-               dev_info(hdev->dev, "%s: add sensors information\n",
-                       dev_name(hdev->hwmon_dev));
-
-               hdev->hwmon_initialized = true;
-       } else {
-               dev_info(hdev->dev, "no available sensors\n");
-       }
-
-       return 0;
-}
-
-void hl_hwmon_fini(struct hl_device *hdev)
-{
-       if (!hdev->hwmon_initialized)
-               return;
-
-       hwmon_device_unregister(hdev->hwmon_dev);
-}
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
deleted file mode 100644 (file)
index 07f9972..0000000
+++ /dev/null
@@ -1,407 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2020 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef ARMCP_IF_H
-#define ARMCP_IF_H
-
-#include <linux/types.h>
-
-/*
- * EVENT QUEUE
- */
-
-struct hl_eq_header {
-       __le32 reserved;
-       __le32 ctl;
-};
-
-struct hl_eq_ecc_data {
-       __le64 ecc_address;
-       __le64 ecc_syndrom;
-       __u8 memory_wrapper_idx;
-       __u8 pad[7];
-};
-
-struct hl_eq_entry {
-       struct hl_eq_header hdr;
-       union {
-               struct hl_eq_ecc_data ecc_data;
-               __le64 data[7];
-       };
-};
-
-#define HL_EQ_ENTRY_SIZE               sizeof(struct hl_eq_entry)
-
-#define EQ_CTL_READY_SHIFT             31
-#define EQ_CTL_READY_MASK              0x80000000
-
-#define EQ_CTL_EVENT_TYPE_SHIFT                16
-#define EQ_CTL_EVENT_TYPE_MASK         0x03FF0000
-
-enum pq_init_status {
-       PQ_INIT_STATUS_NA = 0,
-       PQ_INIT_STATUS_READY_FOR_CP,
-       PQ_INIT_STATUS_READY_FOR_HOST,
-       PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
-};
-
-/*
- * ArmCP Primary Queue Packets
- *
- * During normal operation, the host's kernel driver needs to send various
- * messages to ArmCP, usually either to SET some value into a H/W periphery or
- * to GET the current value of some H/W periphery. For example, SET the
- * frequency of MME/TPC and GET the value of the thermal sensor.
- *
- * These messages can be initiated either by the User application or by the
- * host's driver itself, e.g. power management code. In either case, the
- * communication from the host's driver to ArmCP will *always* be in
- * synchronous mode, meaning that the host will send a single message and poll
- * until the message was acknowledged and the results are ready (if results are
- * needed).
- *
- * This means that only a single message can be sent at a time and the host's
- * driver must wait for its result before sending the next message. Having said
- * that, because these are control messages which are sent in a relatively low
- * frequency, this limitation seems acceptable. It's important to note that
- * in case of multiple devices, messages to different devices *can* be sent
- * at the same time.
- *
- * The message, inputs/outputs (if relevant) and fence object will be located
- * on the device DDR at an address that will be determined by the host's driver.
- * During device initialization phase, the host will pass to ArmCP that address.
- * Most of the message types will contain inputs/outputs inside the message
- * itself. The common part of each message will contain the opcode of the
- * message (its type) and a field representing a fence object.
- *
- * When the host's driver wishes to send a message to ArmCP, it will write the
- * message contents to the device DDR, clear the fence object and then write the
- * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
- * the 484 interrupt-id to the ARM core.
- *
- * Upon receiving the 484 interrupt-id, ArmCP will read the message from the
- * DDR. In case the message is a SET operation, ArmCP will first perform the
- * operation and then write to the fence object on the device DDR. In case the
- * message is a GET operation, ArmCP will first fill the results section on the
- * device DDR and then write to the fence object. If an error occurred, ArmCP
- * will fill the rc field with the right error code.
- *
- * In the meantime, the host's driver will poll on the fence object. Once the
- * host sees that the fence object is signaled, it will read the results from
- * the device DDR (if relevant) and resume the code execution in the host's
- * driver.
- *
- * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
- * so the value being put by the host's driver matches the value read by ArmCP
- *
- * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
- *
- * Detailed description:
- *
- * ARMCP_PACKET_DISABLE_PCI_ACCESS -
- *       After receiving this packet the embedded CPU must NOT issue PCI
- *       transactions (read/write) towards the Host CPU. This also include
- *       sending MSI-X interrupts.
- *       This packet is usually sent before the device is moved to D3Hot state.
- *
- * ARMCP_PACKET_ENABLE_PCI_ACCESS -
- *       After receiving this packet the embedded CPU is allowed to issue PCI
- *       transactions towards the Host CPU, including sending MSI-X interrupts.
- *       This packet is usually send after the device is moved to D0 state.
- *
- * ARMCP_PACKET_TEMPERATURE_GET -
- *       Fetch the current temperature / Max / Max Hyst / Critical /
- *       Critical Hyst of a specified thermal sensor. The packet's
- *       arguments specify the desired sensor and the field to get.
- *
- * ARMCP_PACKET_VOLTAGE_GET -
- *       Fetch the voltage / Max / Min of a specified sensor. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_CURRENT_GET -
- *       Fetch the current / Max / Min of a specified sensor. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_FAN_SPEED_GET -
- *       Fetch the speed / Max / Min of a specified fan. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_PWM_GET -
- *       Fetch the pwm value / mode of a specified pwm. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_PWM_SET -
- *       Set the pwm value / mode of a specified pwm. The packet's
- *       arguments specify the sensor, type and value.
- *
- * ARMCP_PACKET_FREQUENCY_SET -
- *       Set the frequency of a specified PLL. The packet's arguments specify
- *       the PLL and the desired frequency. The actual frequency in the device
- *       might differ from the requested frequency.
- *
- * ARMCP_PACKET_FREQUENCY_GET -
- *       Fetch the frequency of a specified PLL. The packet's arguments specify
- *       the PLL.
- *
- * ARMCP_PACKET_LED_SET -
- *       Set the state of a specified led. The packet's arguments
- *       specify the led and the desired state.
- *
- * ARMCP_PACKET_I2C_WR -
- *       Write 32-bit value to I2C device. The packet's arguments specify the
- *       I2C bus, address and value.
- *
- * ARMCP_PACKET_I2C_RD -
- *       Read 32-bit value from I2C device. The packet's arguments specify the
- *       I2C bus and address.
- *
- * ARMCP_PACKET_INFO_GET -
- *       Fetch information from the device as specified in the packet's
- *       structure. The host's driver passes the max size it allows the ArmCP to
- *       write to the structure, to prevent data corruption in case of
- *       mismatched driver/FW versions.
- *
- * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
- *
- * ARMCP_PACKET_UNMASK_RAZWI_IRQ -
- *       Unmask the given IRQ. The IRQ number is specified in the value field.
- *       The packet is sent after receiving an interrupt and printing its
- *       relevant information.
- *
- * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
- *       Unmask the given IRQs. The IRQs numbers are specified in an array right
- *       after the armcp_packet structure, where its first element is the array
- *       length. The packet is sent after a soft reset was done in order to
- *       handle any interrupts that were sent during the reset process.
- *
- * ARMCP_PACKET_TEST -
- *       Test packet for ArmCP connectivity. The CPU will put the fence value
- *       in the result field.
- *
- * ARMCP_PACKET_FREQUENCY_CURR_GET -
- *       Fetch the current frequency of a specified PLL. The packet's arguments
- *       specify the PLL.
- *
- * ARMCP_PACKET_MAX_POWER_GET -
- *       Fetch the maximal power of the device.
- *
- * ARMCP_PACKET_MAX_POWER_SET -
- *       Set the maximal power of the device. The packet's arguments specify
- *       the power.
- *
- * ARMCP_PACKET_EEPROM_DATA_GET -
- *       Get EEPROM data from the ArmCP kernel. The buffer is specified in the
- *       addr field. The CPU will put the returned data size in the result
- *       field. In addition, the host's driver passes the max size it allows the
- *       ArmCP to write to the structure, to prevent data corruption in case of
- *       mismatched driver/FW versions.
- *
- * ARMCP_PACKET_TEMPERATURE_SET -
- *       Set the value of the offset property of a specified thermal sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * ARMCP_PACKET_VOLTAGE_SET -
- *       Trigger the reset_history property of a specified voltage sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * ARMCP_PACKET_CURRENT_SET -
- *       Trigger the reset_history property of a specified current sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- */
-
-enum armcp_packet_id {
-       ARMCP_PACKET_DISABLE_PCI_ACCESS = 1,    /* internal */
-       ARMCP_PACKET_ENABLE_PCI_ACCESS,         /* internal */
-       ARMCP_PACKET_TEMPERATURE_GET,           /* sysfs */
-       ARMCP_PACKET_VOLTAGE_GET,               /* sysfs */
-       ARMCP_PACKET_CURRENT_GET,               /* sysfs */
-       ARMCP_PACKET_FAN_SPEED_GET,             /* sysfs */
-       ARMCP_PACKET_PWM_GET,                   /* sysfs */
-       ARMCP_PACKET_PWM_SET,                   /* sysfs */
-       ARMCP_PACKET_FREQUENCY_SET,             /* sysfs */
-       ARMCP_PACKET_FREQUENCY_GET,             /* sysfs */
-       ARMCP_PACKET_LED_SET,                   /* debugfs */
-       ARMCP_PACKET_I2C_WR,                    /* debugfs */
-       ARMCP_PACKET_I2C_RD,                    /* debugfs */
-       ARMCP_PACKET_INFO_GET,                  /* IOCTL */
-       ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
-       ARMCP_PACKET_UNMASK_RAZWI_IRQ,          /* internal */
-       ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY,    /* internal */
-       ARMCP_PACKET_TEST,                      /* internal */
-       ARMCP_PACKET_FREQUENCY_CURR_GET,        /* sysfs */
-       ARMCP_PACKET_MAX_POWER_GET,             /* sysfs */
-       ARMCP_PACKET_MAX_POWER_SET,             /* sysfs */
-       ARMCP_PACKET_EEPROM_DATA_GET,           /* sysfs */
-       ARMCP_RESERVED,
-       ARMCP_PACKET_TEMPERATURE_SET,           /* sysfs */
-       ARMCP_PACKET_VOLTAGE_SET,               /* sysfs */
-       ARMCP_PACKET_CURRENT_SET,               /* sysfs */
-};
-
-#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
-
-#define ARMCP_PKT_CTL_RC_SHIFT         12
-#define ARMCP_PKT_CTL_RC_MASK          0x0000F000
-
-#define ARMCP_PKT_CTL_OPCODE_SHIFT     16
-#define ARMCP_PKT_CTL_OPCODE_MASK      0x1FFF0000
-
-struct armcp_packet {
-       union {
-               __le64 value;   /* For SET packets */
-               __le64 result;  /* For GET packets */
-               __le64 addr;    /* For PQ */
-       };
-
-       __le32 ctl;
-
-       __le32 fence;           /* Signal to host that message is completed */
-
-       union {
-               struct {/* For temperature/current/voltage/fan/pwm get/set */
-                       __le16 sensor_index;
-                       __le16 type;
-               };
-
-               struct {        /* For I2C read/write */
-                       __u8 i2c_bus;
-                       __u8 i2c_addr;
-                       __u8 i2c_reg;
-                       __u8 pad; /* unused */
-               };
-
-               /* For frequency get/set */
-               __le32 pll_index;
-
-               /* For led set */
-               __le32 led_index;
-
-               /* For get Armcp info/EEPROM data */
-               __le32 data_max_size;
-       };
-
-       __le32 reserved;
-};
-
-struct armcp_unmask_irq_arr_packet {
-       struct armcp_packet armcp_pkt;
-       __le32 length;
-       __le32 irqs[0];
-};
-
-enum armcp_packet_rc {
-       armcp_packet_success,
-       armcp_packet_invalid,
-       armcp_packet_fault
-};
-
-/*
- * armcp_temp_type should adhere to hwmon_temp_attributes
- * defined in Linux kernel hwmon.h file
- */
-enum armcp_temp_type {
-       armcp_temp_input,
-       armcp_temp_max = 6,
-       armcp_temp_max_hyst,
-       armcp_temp_crit,
-       armcp_temp_crit_hyst,
-       armcp_temp_offset = 19,
-       armcp_temp_highest = 22,
-       armcp_temp_reset_history = 23
-};
-
-enum armcp_in_attributes {
-       armcp_in_input,
-       armcp_in_min,
-       armcp_in_max,
-       armcp_in_highest = 7,
-       armcp_in_reset_history
-};
-
-enum armcp_curr_attributes {
-       armcp_curr_input,
-       armcp_curr_min,
-       armcp_curr_max,
-       armcp_curr_highest = 7,
-       armcp_curr_reset_history
-};
-
-enum armcp_fan_attributes {
-       armcp_fan_input,
-       armcp_fan_min = 2,
-       armcp_fan_max
-};
-
-enum armcp_pwm_attributes {
-       armcp_pwm_input,
-       armcp_pwm_enable
-};
-
-/* Event Queue Packets */
-
-struct eq_generic_event {
-       __le64 data[7];
-};
-
-/*
- * ArmCP info
- */
-
-#define CARD_NAME_MAX_LEN              16
-#define VERSION_MAX_LEN                        128
-#define ARMCP_MAX_SENSORS              128
-
-struct armcp_sensor {
-       __le32 type;
-       __le32 flags;
-};
-
-/**
- * struct armcp_card_types - ASIC card type.
- * @armcp_card_type_pci: PCI card.
- * @armcp_card_type_pmc: PCI Mezzanine Card.
- */
-enum armcp_card_types {
-       armcp_card_type_pci,
-       armcp_card_type_pmc
-};
-
-/**
- * struct armcp_info - Info from ArmCP that is necessary to the host's driver
- * @sensors: available sensors description.
- * @kernel_version: ArmCP linux kernel version.
- * @reserved: reserved field.
- * @card_type: card configuration type.
- * @card_location: in a server, each card has different connections topology
- *                 depending on its location (relevant for PMC card type)
- * @cpld_version: CPLD programmed F/W version.
- * @infineon_version: Infineon main DC-DC version.
- * @fuse_version: silicon production FUSE information.
- * @thermal_version: thermald S/W version.
- * @armcp_version: ArmCP S/W version.
- * @dram_size: available DRAM size.
- * @card_name: card name that will be displayed in HWMON subsystem on the host
- */
-struct armcp_info {
-       struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
-       __u8 kernel_version[VERSION_MAX_LEN];
-       __le32 reserved;
-       __le32 card_type;
-       __le32 card_location;
-       __le32 cpld_version;
-       __le32 infineon_version;
-       __u8 fuse_version[VERSION_MAX_LEN];
-       __u8 thermal_version[VERSION_MAX_LEN];
-       __u8 armcp_version[VERSION_MAX_LEN];
-       __le64 dram_size;
-       char card_name[CARD_NAME_MAX_LEN];
-};
-
-#endif /* ARMCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
new file mode 100644 (file)
index 0000000..07f9972
--- /dev/null
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef ARMCP_IF_H
+#define ARMCP_IF_H
+
+#include <linux/types.h>
+
+/*
+ * EVENT QUEUE
+ */
+
+struct hl_eq_header {
+       __le32 reserved;
+       __le32 ctl;
+};
+
+struct hl_eq_ecc_data {
+       __le64 ecc_address;
+       __le64 ecc_syndrom;
+       __u8 memory_wrapper_idx;
+       __u8 pad[7];
+};
+
+struct hl_eq_entry {
+       struct hl_eq_header hdr;
+       union {
+               struct hl_eq_ecc_data ecc_data;
+               __le64 data[7];
+       };
+};
+
+#define HL_EQ_ENTRY_SIZE               sizeof(struct hl_eq_entry)
+
+#define EQ_CTL_READY_SHIFT             31
+#define EQ_CTL_READY_MASK              0x80000000
+
+#define EQ_CTL_EVENT_TYPE_SHIFT                16
+#define EQ_CTL_EVENT_TYPE_MASK         0x03FF0000
+
+enum pq_init_status {
+       PQ_INIT_STATUS_NA = 0,
+       PQ_INIT_STATUS_READY_FOR_CP,
+       PQ_INIT_STATUS_READY_FOR_HOST,
+       PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
+};
+
+/*
+ * ArmCP Primary Queue Packets
+ *
+ * During normal operation, the host's kernel driver needs to send various
+ * messages to ArmCP, usually either to SET some value into a H/W periphery or
+ * to GET the current value of some H/W periphery. For example, SET the
+ * frequency of MME/TPC and GET the value of the thermal sensor.
+ *
+ * These messages can be initiated either by the User application or by the
+ * host's driver itself, e.g. power management code. In either case, the
+ * communication from the host's driver to ArmCP will *always* be in
+ * synchronous mode, meaning that the host will send a single message and poll
+ * until the message was acknowledged and the results are ready (if results are
+ * needed).
+ *
+ * This means that only a single message can be sent at a time and the host's
+ * driver must wait for its result before sending the next message. Having said
+ * that, because these are control messages which are sent in a relatively low
+ * frequency, this limitation seems acceptable. It's important to note that
+ * in case of multiple devices, messages to different devices *can* be sent
+ * at the same time.
+ *
+ * The message, inputs/outputs (if relevant) and fence object will be located
+ * on the device DDR at an address that will be determined by the host's driver.
+ * During device initialization phase, the host will pass to ArmCP that address.
+ * Most of the message types will contain inputs/outputs inside the message
+ * itself. The common part of each message will contain the opcode of the
+ * message (its type) and a field representing a fence object.
+ *
+ * When the host's driver wishes to send a message to ArmCP, it will write the
+ * message contents to the device DDR, clear the fence object and then write the
+ * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
+ * the 484 interrupt-id to the ARM core.
+ *
+ * Upon receiving the 484 interrupt-id, ArmCP will read the message from the
+ * DDR. In case the message is a SET operation, ArmCP will first perform the
+ * operation and then write to the fence object on the device DDR. In case the
+ * message is a GET operation, ArmCP will first fill the results section on the
+ * device DDR and then write to the fence object. If an error occurred, ArmCP
+ * will fill the rc field with the right error code.
+ *
+ * In the meantime, the host's driver will poll on the fence object. Once the
+ * host sees that the fence object is signaled, it will read the results from
+ * the device DDR (if relevant) and resume the code execution in the host's
+ * driver.
+ *
+ * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
+ * so the value being put by the host's driver matches the value read by ArmCP
+ *
+ * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
+ *
+ * Detailed description:
+ *
+ * ARMCP_PACKET_DISABLE_PCI_ACCESS -
+ *       After receiving this packet the embedded CPU must NOT issue PCI
+ *       transactions (read/write) towards the Host CPU. This also include
+ *       sending MSI-X interrupts.
+ *       This packet is usually sent before the device is moved to D3Hot state.
+ *
+ * ARMCP_PACKET_ENABLE_PCI_ACCESS -
+ *       After receiving this packet the embedded CPU is allowed to issue PCI
+ *       transactions towards the Host CPU, including sending MSI-X interrupts.
+ *       This packet is usually send after the device is moved to D0 state.
+ *
+ * ARMCP_PACKET_TEMPERATURE_GET -
+ *       Fetch the current temperature / Max / Max Hyst / Critical /
+ *       Critical Hyst of a specified thermal sensor. The packet's
+ *       arguments specify the desired sensor and the field to get.
+ *
+ * ARMCP_PACKET_VOLTAGE_GET -
+ *       Fetch the voltage / Max / Min of a specified sensor. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_CURRENT_GET -
+ *       Fetch the current / Max / Min of a specified sensor. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_FAN_SPEED_GET -
+ *       Fetch the speed / Max / Min of a specified fan. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_PWM_GET -
+ *       Fetch the pwm value / mode of a specified pwm. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_PWM_SET -
+ *       Set the pwm value / mode of a specified pwm. The packet's
+ *       arguments specify the sensor, type and value.
+ *
+ * ARMCP_PACKET_FREQUENCY_SET -
+ *       Set the frequency of a specified PLL. The packet's arguments specify
+ *       the PLL and the desired frequency. The actual frequency in the device
+ *       might differ from the requested frequency.
+ *
+ * ARMCP_PACKET_FREQUENCY_GET -
+ *       Fetch the frequency of a specified PLL. The packet's arguments specify
+ *       the PLL.
+ *
+ * ARMCP_PACKET_LED_SET -
+ *       Set the state of a specified led. The packet's arguments
+ *       specify the led and the desired state.
+ *
+ * ARMCP_PACKET_I2C_WR -
+ *       Write 32-bit value to I2C device. The packet's arguments specify the
+ *       I2C bus, address and value.
+ *
+ * ARMCP_PACKET_I2C_RD -
+ *       Read 32-bit value from I2C device. The packet's arguments specify the
+ *       I2C bus and address.
+ *
+ * ARMCP_PACKET_INFO_GET -
+ *       Fetch information from the device as specified in the packet's
+ *       structure. The host's driver passes the max size it allows the ArmCP to
+ *       write to the structure, to prevent data corruption in case of
+ *       mismatched driver/FW versions.
+ *
+ * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
+ *
+ * ARMCP_PACKET_UNMASK_RAZWI_IRQ -
+ *       Unmask the given IRQ. The IRQ number is specified in the value field.
+ *       The packet is sent after receiving an interrupt and printing its
+ *       relevant information.
+ *
+ * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
+ *       Unmask the given IRQs. The IRQs numbers are specified in an array right
+ *       after the armcp_packet structure, where its first element is the array
+ *       length. The packet is sent after a soft reset was done in order to
+ *       handle any interrupts that were sent during the reset process.
+ *
+ * ARMCP_PACKET_TEST -
+ *       Test packet for ArmCP connectivity. The CPU will put the fence value
+ *       in the result field.
+ *
+ * ARMCP_PACKET_FREQUENCY_CURR_GET -
+ *       Fetch the current frequency of a specified PLL. The packet's arguments
+ *       specify the PLL.
+ *
+ * ARMCP_PACKET_MAX_POWER_GET -
+ *       Fetch the maximal power of the device.
+ *
+ * ARMCP_PACKET_MAX_POWER_SET -
+ *       Set the maximal power of the device. The packet's arguments specify
+ *       the power.
+ *
+ * ARMCP_PACKET_EEPROM_DATA_GET -
+ *       Get EEPROM data from the ArmCP kernel. The buffer is specified in the
+ *       addr field. The CPU will put the returned data size in the result
+ *       field. In addition, the host's driver passes the max size it allows the
+ *       ArmCP to write to the structure, to prevent data corruption in case of
+ *       mismatched driver/FW versions.
+ *
+ * ARMCP_PACKET_TEMPERATURE_SET -
+ *       Set the value of the offset property of a specified thermal sensor.
+ *       The packet's arguments specify the desired sensor and the field to
+ *       set.
+ *
+ * ARMCP_PACKET_VOLTAGE_SET -
+ *       Trigger the reset_history property of a specified voltage sensor.
+ *       The packet's arguments specify the desired sensor and the field to
+ *       set.
+ *
+ * ARMCP_PACKET_CURRENT_SET -
+ *       Trigger the reset_history property of a specified current sensor.
+ *       The packet's arguments specify the desired sensor and the field to
+ *       set.
+ */
+
+enum armcp_packet_id {
+       ARMCP_PACKET_DISABLE_PCI_ACCESS = 1,    /* internal */
+       ARMCP_PACKET_ENABLE_PCI_ACCESS,         /* internal */
+       ARMCP_PACKET_TEMPERATURE_GET,           /* sysfs */
+       ARMCP_PACKET_VOLTAGE_GET,               /* sysfs */
+       ARMCP_PACKET_CURRENT_GET,               /* sysfs */
+       ARMCP_PACKET_FAN_SPEED_GET,             /* sysfs */
+       ARMCP_PACKET_PWM_GET,                   /* sysfs */
+       ARMCP_PACKET_PWM_SET,                   /* sysfs */
+       ARMCP_PACKET_FREQUENCY_SET,             /* sysfs */
+       ARMCP_PACKET_FREQUENCY_GET,             /* sysfs */
+       ARMCP_PACKET_LED_SET,                   /* debugfs */
+       ARMCP_PACKET_I2C_WR,                    /* debugfs */
+       ARMCP_PACKET_I2C_RD,                    /* debugfs */
+       ARMCP_PACKET_INFO_GET,                  /* IOCTL */
+       ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
+       ARMCP_PACKET_UNMASK_RAZWI_IRQ,          /* internal */
+       ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY,    /* internal */
+       ARMCP_PACKET_TEST,                      /* internal */
+       ARMCP_PACKET_FREQUENCY_CURR_GET,        /* sysfs */
+       ARMCP_PACKET_MAX_POWER_GET,             /* sysfs */
+       ARMCP_PACKET_MAX_POWER_SET,             /* sysfs */
+       ARMCP_PACKET_EEPROM_DATA_GET,           /* sysfs */
+       ARMCP_RESERVED,
+       ARMCP_PACKET_TEMPERATURE_SET,           /* sysfs */
+       ARMCP_PACKET_VOLTAGE_SET,               /* sysfs */
+       ARMCP_PACKET_CURRENT_SET,               /* sysfs */
+};
+
+#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
+
+#define ARMCP_PKT_CTL_RC_SHIFT         12
+#define ARMCP_PKT_CTL_RC_MASK          0x0000F000
+
+#define ARMCP_PKT_CTL_OPCODE_SHIFT     16
+#define ARMCP_PKT_CTL_OPCODE_MASK      0x1FFF0000
+
+struct armcp_packet {
+       union {
+               __le64 value;   /* For SET packets */
+               __le64 result;  /* For GET packets */
+               __le64 addr;    /* For PQ */
+       };
+
+       __le32 ctl;
+
+       __le32 fence;           /* Signal to host that message is completed */
+
+       union {
+               struct {/* For temperature/current/voltage/fan/pwm get/set */
+                       __le16 sensor_index;
+                       __le16 type;
+               };
+
+               struct {        /* For I2C read/write */
+                       __u8 i2c_bus;
+                       __u8 i2c_addr;
+                       __u8 i2c_reg;
+                       __u8 pad; /* unused */
+               };
+
+               /* For frequency get/set */
+               __le32 pll_index;
+
+               /* For led set */
+               __le32 led_index;
+
+               /* For get Armcp info/EEPROM data */
+               __le32 data_max_size;
+       };
+
+       __le32 reserved;
+};
+
+struct armcp_unmask_irq_arr_packet {
+       struct armcp_packet armcp_pkt;
+       __le32 length;
+       __le32 irqs[0];
+};
+
+enum armcp_packet_rc {
+       armcp_packet_success,
+       armcp_packet_invalid,
+       armcp_packet_fault
+};
+
+/*
+ * armcp_temp_type should adhere to hwmon_temp_attributes
+ * defined in Linux kernel hwmon.h file
+ */
+enum armcp_temp_type {
+       armcp_temp_input,
+       armcp_temp_max = 6,
+       armcp_temp_max_hyst,
+       armcp_temp_crit,
+       armcp_temp_crit_hyst,
+       armcp_temp_offset = 19,
+       armcp_temp_highest = 22,
+       armcp_temp_reset_history = 23
+};
+
+enum armcp_in_attributes {
+       armcp_in_input,
+       armcp_in_min,
+       armcp_in_max,
+       armcp_in_highest = 7,
+       armcp_in_reset_history
+};
+
+enum armcp_curr_attributes {
+       armcp_curr_input,
+       armcp_curr_min,
+       armcp_curr_max,
+       armcp_curr_highest = 7,
+       armcp_curr_reset_history
+};
+
+enum armcp_fan_attributes {
+       armcp_fan_input,
+       armcp_fan_min = 2,
+       armcp_fan_max
+};
+
+enum armcp_pwm_attributes {
+       armcp_pwm_input,
+       armcp_pwm_enable
+};
+
+/* Event Queue Packets */
+
+struct eq_generic_event {
+       __le64 data[7];
+};
+
+/*
+ * ArmCP info
+ */
+
+#define CARD_NAME_MAX_LEN              16
+#define VERSION_MAX_LEN                        128
+#define ARMCP_MAX_SENSORS              128
+
+struct armcp_sensor {
+       __le32 type;
+       __le32 flags;
+};
+
+/**
+ * struct armcp_card_types - ASIC card type.
+ * @armcp_card_type_pci: PCI card.
+ * @armcp_card_type_pmc: PCI Mezzanine Card.
+ */
+enum armcp_card_types {
+       armcp_card_type_pci,
+       armcp_card_type_pmc
+};
+
+/**
+ * struct armcp_info - Info from ArmCP that is necessary to the host's driver
+ * @sensors: available sensors description.
+ * @kernel_version: ArmCP linux kernel version.
+ * @reserved: reserved field.
+ * @card_type: card configuration type.
+ * @card_location: in a server, each card has different connections topology
+ *                 depending on its location (relevant for PMC card type)
+ * @cpld_version: CPLD programmed F/W version.
+ * @infineon_version: Infineon main DC-DC version.
+ * @fuse_version: silicon production FUSE information.
+ * @thermal_version: thermald S/W version.
+ * @armcp_version: ArmCP S/W version.
+ * @dram_size: available DRAM size.
+ * @card_name: card name that will be displayed in HWMON subsystem on the host
+ */
+struct armcp_info {
+       struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
+       __u8 kernel_version[VERSION_MAX_LEN];
+       __le32 reserved;
+       __le32 card_type;
+       __le32 card_location;
+       __le32 cpld_version;
+       __le32 infineon_version;
+       __u8 fuse_version[VERSION_MAX_LEN];
+       __u8 thermal_version[VERSION_MAX_LEN];
+       __u8 armcp_version[VERSION_MAX_LEN];
+       __le64 dram_size;
+       char card_name[CARD_NAME_MAX_LEN];
+};
+
+#endif /* ARMCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
new file mode 100644 (file)
index 0000000..c22d134
--- /dev/null
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2018-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef HL_BOOT_IF_H
+#define HL_BOOT_IF_H
+
+#define LKD_HARD_RESET_MAGIC           0xED7BD694
+#define HL_POWER9_HOST_MAGIC           0x1DA30009
+
+#define BOOT_FIT_SRAM_OFFSET           0x200000
+
+/*
+ * CPU error bits in BOOT_ERROR registers
+ *
+ * CPU_BOOT_ERR0_DRAM_INIT_FAIL                DRAM initialization failed.
+ *                                     DRAM is not reliable to use.
+ *
+ * CPU_BOOT_ERR0_FIT_CORRUPTED         FIT data integrity verification of the
+ *                                     image provided by the host has failed.
+ *
+ * CPU_BOOT_ERR0_TS_INIT_FAIL          Thermal Sensor initialization failed.
+ *                                     Boot continues as usual, but keep in
+ *                                     mind this is a warning.
+ *
+ * CPU_BOOT_ERR0_DRAM_SKIPPED          DRAM initialization has been skipped.
+ *                                     Skipping DRAM initialization has been
+ *                                     requested (e.g. strap, command, etc.)
+ *                                     and FW skipped the DRAM initialization.
+ *                                     Host can initialize the DRAM.
+ *
+ * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED      Waiting for BMC data will be skipped.
+ *                                     Meaning the BMC data might not be
+ *                                     available until reset.
+ *
+ * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY      NIC data from BMC is not ready.
+ *                                     BMC has not provided the NIC data yet.
+ *                                     Once provided this bit will be cleared.
+ *
+ * CPU_BOOT_ERR0_NIC_FW_FAIL           NIC FW loading failed.
+ *                                     The NIC FW loading and initialization
+ *                                     failed. This means NICs are not usable.
+ *
+ * CPU_BOOT_ERR0_ENABLED               Error registers enabled.
+ *                                     This is a main indication that the
+ *                                     running FW populates the error
+ *                                     registers. Meaning the error bits are
+ *                                     not garbage, but actual error statuses.
+ */
+#define CPU_BOOT_ERR0_DRAM_INIT_FAIL           (1 << 0)
+#define CPU_BOOT_ERR0_FIT_CORRUPTED            (1 << 1)
+#define CPU_BOOT_ERR0_TS_INIT_FAIL             (1 << 2)
+#define CPU_BOOT_ERR0_DRAM_SKIPPED             (1 << 3)
+#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED         (1 << 4)
+#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY         (1 << 5)
+#define CPU_BOOT_ERR0_NIC_FW_FAIL              (1 << 6)
+#define CPU_BOOT_ERR0_ENABLED                  (1 << 31)
+
+enum cpu_boot_status {
+       CPU_BOOT_STATUS_NA = 0,         /* Default value after reset of chip */
+       CPU_BOOT_STATUS_IN_WFE = 1,
+       CPU_BOOT_STATUS_DRAM_RDY = 2,
+       CPU_BOOT_STATUS_SRAM_AVAIL = 3,
+       CPU_BOOT_STATUS_IN_BTL = 4,     /* BTL is H/W FSM */
+       CPU_BOOT_STATUS_IN_PREBOOT = 5,
+       CPU_BOOT_STATUS_IN_SPL,         /* deprecated - not reported */
+       CPU_BOOT_STATUS_IN_UBOOT = 7,
+       CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
+       CPU_BOOT_STATUS_FIT_CORRUPTED,  /* deprecated - will be removed */
+       /* U-Boot console prompt activated, commands are not processed */
+       CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
+       /* Finished NICs init, reported after DRAM and NICs */
+       CPU_BOOT_STATUS_NIC_FW_RDY = 11,
+       CPU_BOOT_STATUS_TS_INIT_FAIL,   /* deprecated - will be removed */
+       CPU_BOOT_STATUS_DRAM_SKIPPED,   /* deprecated - will be removed */
+       CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
+       /* Last boot loader progress status, ready to receive commands */
+       CPU_BOOT_STATUS_READY_TO_BOOT = 15,
+       CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
+};
+
+enum kmd_msg {
+       KMD_MSG_NA = 0,
+       KMD_MSG_GOTO_WFE,
+       KMD_MSG_FIT_RDY,
+       KMD_MSG_SKIP_BMC,
+};
+
+enum cpu_msg_status {
+       CPU_MSG_CLR = 0,
+       CPU_MSG_OK,
+       CPU_MSG_ERR,
+};
+
+#endif /* HL_BOOT_IF_H */
diff --git a/drivers/misc/habanalabs/include/common/qman_if.h b/drivers/misc/habanalabs/include/common/qman_if.h
new file mode 100644 (file)
index 0000000..0fdb491
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef QMAN_IF_H
+#define QMAN_IF_H
+
+#include <linux/types.h>
+
+/*
+ * PRIMARY QUEUE
+ */
+
+struct hl_bd {
+       __le64  ptr;
+       __le32  len;
+       __le32  ctl;
+};
+
+#define HL_BD_SIZE                     sizeof(struct hl_bd)
+
+/*
+ * S/W CTL FIELDS.
+ *
+ * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
+ * valid. 1 means the repeat field is valid, 0 means not-valid,
+ * i.e. repeat == 1
+ */
+#define BD_CTL_REPEAT_VALID_SHIFT      24
+#define BD_CTL_REPEAT_VALID_MASK       0x01000000
+
+#define BD_CTL_SHADOW_INDEX_SHIFT      0
+#define BD_CTL_SHADOW_INDEX_MASK       0x00000FFF
+
+/*
+ * H/W CTL FIELDS
+ */
+
+#define BD_CTL_COMP_OFFSET_SHIFT       16
+#define BD_CTL_COMP_OFFSET_MASK                0x00FF0000
+
+#define BD_CTL_COMP_DATA_SHIFT         0
+#define BD_CTL_COMP_DATA_MASK          0x0000FFFF
+
+/*
+ * COMPLETION QUEUE
+ */
+
+struct hl_cq_entry {
+       __le32  data;
+};
+
+#define HL_CQ_ENTRY_SIZE               sizeof(struct hl_cq_entry)
+
+#define CQ_ENTRY_READY_SHIFT                   31
+#define CQ_ENTRY_READY_MASK                    0x80000000
+
+#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT      30
+#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK       0x40000000
+
+#define CQ_ENTRY_SHADOW_INDEX_SHIFT            BD_CTL_SHADOW_INDEX_SHIFT
+#define CQ_ENTRY_SHADOW_INDEX_MASK             BD_CTL_SHADOW_INDEX_MASK
+
+
+#endif /* QMAN_IF_H */
diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h
deleted file mode 100644 (file)
index c22d134..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2018-2020 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef HL_BOOT_IF_H
-#define HL_BOOT_IF_H
-
-#define LKD_HARD_RESET_MAGIC           0xED7BD694
-#define HL_POWER9_HOST_MAGIC           0x1DA30009
-
-#define BOOT_FIT_SRAM_OFFSET           0x200000
-
-/*
- * CPU error bits in BOOT_ERROR registers
- *
- * CPU_BOOT_ERR0_DRAM_INIT_FAIL                DRAM initialization failed.
- *                                     DRAM is not reliable to use.
- *
- * CPU_BOOT_ERR0_FIT_CORRUPTED         FIT data integrity verification of the
- *                                     image provided by the host has failed.
- *
- * CPU_BOOT_ERR0_TS_INIT_FAIL          Thermal Sensor initialization failed.
- *                                     Boot continues as usual, but keep in
- *                                     mind this is a warning.
- *
- * CPU_BOOT_ERR0_DRAM_SKIPPED          DRAM initialization has been skipped.
- *                                     Skipping DRAM initialization has been
- *                                     requested (e.g. strap, command, etc.)
- *                                     and FW skipped the DRAM initialization.
- *                                     Host can initialize the DRAM.
- *
- * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED      Waiting for BMC data will be skipped.
- *                                     Meaning the BMC data might not be
- *                                     available until reset.
- *
- * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY      NIC data from BMC is not ready.
- *                                     BMC has not provided the NIC data yet.
- *                                     Once provided this bit will be cleared.
- *
- * CPU_BOOT_ERR0_NIC_FW_FAIL           NIC FW loading failed.
- *                                     The NIC FW loading and initialization
- *                                     failed. This means NICs are not usable.
- *
- * CPU_BOOT_ERR0_ENABLED               Error registers enabled.
- *                                     This is a main indication that the
- *                                     running FW populates the error
- *                                     registers. Meaning the error bits are
- *                                     not garbage, but actual error statuses.
- */
-#define CPU_BOOT_ERR0_DRAM_INIT_FAIL           (1 << 0)
-#define CPU_BOOT_ERR0_FIT_CORRUPTED            (1 << 1)
-#define CPU_BOOT_ERR0_TS_INIT_FAIL             (1 << 2)
-#define CPU_BOOT_ERR0_DRAM_SKIPPED             (1 << 3)
-#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED         (1 << 4)
-#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY         (1 << 5)
-#define CPU_BOOT_ERR0_NIC_FW_FAIL              (1 << 6)
-#define CPU_BOOT_ERR0_ENABLED                  (1 << 31)
-
-enum cpu_boot_status {
-       CPU_BOOT_STATUS_NA = 0,         /* Default value after reset of chip */
-       CPU_BOOT_STATUS_IN_WFE = 1,
-       CPU_BOOT_STATUS_DRAM_RDY = 2,
-       CPU_BOOT_STATUS_SRAM_AVAIL = 3,
-       CPU_BOOT_STATUS_IN_BTL = 4,     /* BTL is H/W FSM */
-       CPU_BOOT_STATUS_IN_PREBOOT = 5,
-       CPU_BOOT_STATUS_IN_SPL,         /* deprecated - not reported */
-       CPU_BOOT_STATUS_IN_UBOOT = 7,
-       CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
-       CPU_BOOT_STATUS_FIT_CORRUPTED,  /* deprecated - will be removed */
-       /* U-Boot console prompt activated, commands are not processed */
-       CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
-       /* Finished NICs init, reported after DRAM and NICs */
-       CPU_BOOT_STATUS_NIC_FW_RDY = 11,
-       CPU_BOOT_STATUS_TS_INIT_FAIL,   /* deprecated - will be removed */
-       CPU_BOOT_STATUS_DRAM_SKIPPED,   /* deprecated - will be removed */
-       CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
-       /* Last boot loader progress status, ready to receive commands */
-       CPU_BOOT_STATUS_READY_TO_BOOT = 15,
-       CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
-};
-
-enum kmd_msg {
-       KMD_MSG_NA = 0,
-       KMD_MSG_GOTO_WFE,
-       KMD_MSG_FIT_RDY,
-       KMD_MSG_SKIP_BMC,
-};
-
-enum cpu_msg_status {
-       CPU_MSG_CLR = 0,
-       CPU_MSG_OK,
-       CPU_MSG_ERR,
-};
-
-#endif /* HL_BOOT_IF_H */
diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h
deleted file mode 100644 (file)
index 0fdb491..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2018 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef QMAN_IF_H
-#define QMAN_IF_H
-
-#include <linux/types.h>
-
-/*
- * PRIMARY QUEUE
- */
-
-struct hl_bd {
-       __le64  ptr;
-       __le32  len;
-       __le32  ctl;
-};
-
-#define HL_BD_SIZE                     sizeof(struct hl_bd)
-
-/*
- * S/W CTL FIELDS.
- *
- * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
- * valid. 1 means the repeat field is valid, 0 means not-valid,
- * i.e. repeat == 1
- */
-#define BD_CTL_REPEAT_VALID_SHIFT      24
-#define BD_CTL_REPEAT_VALID_MASK       0x01000000
-
-#define BD_CTL_SHADOW_INDEX_SHIFT      0
-#define BD_CTL_SHADOW_INDEX_MASK       0x00000FFF
-
-/*
- * H/W CTL FIELDS
- */
-
-#define BD_CTL_COMP_OFFSET_SHIFT       16
-#define BD_CTL_COMP_OFFSET_MASK                0x00FF0000
-
-#define BD_CTL_COMP_DATA_SHIFT         0
-#define BD_CTL_COMP_DATA_MASK          0x0000FFFF
-
-/*
- * COMPLETION QUEUE
- */
-
-struct hl_cq_entry {
-       __le32  data;
-};
-
-#define HL_CQ_ENTRY_SIZE               sizeof(struct hl_cq_entry)
-
-#define CQ_ENTRY_READY_SHIFT                   31
-#define CQ_ENTRY_READY_MASK                    0x80000000
-
-#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT      30
-#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK       0x40000000
-
-#define CQ_ENTRY_SHADOW_INDEX_SHIFT            BD_CTL_SHADOW_INDEX_SHIFT
-#define CQ_ENTRY_SHADOW_INDEX_MASK             BD_CTL_SHADOW_INDEX_MASK
-
-
-#endif /* QMAN_IF_H */
diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
deleted file mode 100644 (file)
index c8db717..0000000
+++ /dev/null
@@ -1,320 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-/**
- * struct hl_eqe_work - This structure is used to schedule work of EQ
- *                      entry and armcp_reset event
- *
- * @eq_work:          workqueue object to run when EQ entry is received
- * @hdev:             pointer to device structure
- * @eq_entry:         copy of the EQ entry
- */
-struct hl_eqe_work {
-       struct work_struct      eq_work;
-       struct hl_device        *hdev;
-       struct hl_eq_entry      eq_entry;
-};
-
-/**
- * hl_cq_inc_ptr - increment ci or pi of cq
- *
- * @ptr: the current ci or pi value of the completion queue
- *
- * Increment ptr by 1. If it reaches the number of completion queue
- * entries, set it to 0
- */
-inline u32 hl_cq_inc_ptr(u32 ptr)
-{
-       ptr++;
-       if (unlikely(ptr == HL_CQ_LENGTH))
-               ptr = 0;
-       return ptr;
-}
-
-/**
- * hl_eq_inc_ptr - increment ci of eq
- *
- * @ptr: the current ci value of the event queue
- *
- * Increment ptr by 1. If it reaches the number of event queue
- * entries, set it to 0
- */
-inline u32 hl_eq_inc_ptr(u32 ptr)
-{
-       ptr++;
-       if (unlikely(ptr == HL_EQ_LENGTH))
-               ptr = 0;
-       return ptr;
-}
-
-static void irq_handle_eqe(struct work_struct *work)
-{
-       struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
-                                                       eq_work);
-       struct hl_device *hdev = eqe_work->hdev;
-
-       hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
-
-       kfree(eqe_work);
-}
-
-/**
- * hl_irq_handler_cq - irq handler for completion queue
- *
- * @irq: irq number
- * @arg: pointer to completion queue structure
- *
- */
-irqreturn_t hl_irq_handler_cq(int irq, void *arg)
-{
-       struct hl_cq *cq = arg;
-       struct hl_device *hdev = cq->hdev;
-       struct hl_hw_queue *queue;
-       struct hl_cs_job *job;
-       bool shadow_index_valid;
-       u16 shadow_index;
-       struct hl_cq_entry *cq_entry, *cq_base;
-
-       if (hdev->disabled) {
-               dev_dbg(hdev->dev,
-                       "Device disabled but received IRQ %d for CQ %d\n",
-                       irq, cq->hw_queue_id);
-               return IRQ_HANDLED;
-       }
-
-       cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
-
-       while (1) {
-               bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
-                                       CQ_ENTRY_READY_MASK)
-                                               >> CQ_ENTRY_READY_SHIFT);
-
-               if (!entry_ready)
-                       break;
-
-               cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
-
-               /* Make sure we read CQ entry contents after we've
-                * checked the ownership bit.
-                */
-               dma_rmb();
-
-               shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
-                                       CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
-                                       >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
-
-               shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
-                                       CQ_ENTRY_SHADOW_INDEX_MASK)
-                                       >> CQ_ENTRY_SHADOW_INDEX_SHIFT);
-
-               queue = &hdev->kernel_queues[cq->hw_queue_id];
-
-               if ((shadow_index_valid) && (!hdev->disabled)) {
-                       job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
-                       queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
-               }
-
-               atomic_inc(&queue->ci);
-
-               /* Clear CQ entry ready bit */
-               cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
-                                               ~CQ_ENTRY_READY_MASK);
-
-               cq->ci = hl_cq_inc_ptr(cq->ci);
-
-               /* Increment free slots */
-               atomic_inc(&cq->free_slots_cnt);
-       }
-
-       return IRQ_HANDLED;
-}
-
-/**
- * hl_irq_handler_eq - irq handler for event queue
- *
- * @irq: irq number
- * @arg: pointer to event queue structure
- *
- */
-irqreturn_t hl_irq_handler_eq(int irq, void *arg)
-{
-       struct hl_eq *eq = arg;
-       struct hl_device *hdev = eq->hdev;
-       struct hl_eq_entry *eq_entry;
-       struct hl_eq_entry *eq_base;
-       struct hl_eqe_work *handle_eqe_work;
-
-       eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address;
-
-       while (1) {
-               bool entry_ready =
-                       ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
-                               EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
-
-               if (!entry_ready)
-                       break;
-
-               eq_entry = &eq_base[eq->ci];
-
-               /*
-                * Make sure we read EQ entry contents after we've
-                * checked the ownership bit.
-                */
-               dma_rmb();
-
-               if (hdev->disabled) {
-                       dev_warn(hdev->dev,
-                               "Device disabled but received IRQ %d for EQ\n",
-                                       irq);
-                       goto skip_irq;
-               }
-
-               handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
-               if (handle_eqe_work) {
-                       INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
-                       handle_eqe_work->hdev = hdev;
-
-                       memcpy(&handle_eqe_work->eq_entry, eq_entry,
-                                       sizeof(*eq_entry));
-
-                       queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
-               }
-skip_irq:
-               /* Clear EQ entry ready bit */
-               eq_entry->hdr.ctl =
-                       cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
-                                                       ~EQ_CTL_READY_MASK);
-
-               eq->ci = hl_eq_inc_ptr(eq->ci);
-
-               hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
-       }
-
-       return IRQ_HANDLED;
-}
-
-/**
- * hl_cq_init - main initialization function for an cq object
- *
- * @hdev: pointer to device structure
- * @q: pointer to cq structure
- * @hw_queue_id: The H/W queue ID this completion queue belongs to
- *
- * Allocate dma-able memory for the completion queue and initialize fields
- * Returns 0 on success
- */
-int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
-{
-       void *p;
-
-       p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
-                               &q->bus_address, GFP_KERNEL | __GFP_ZERO);
-       if (!p)
-               return -ENOMEM;
-
-       q->hdev = hdev;
-       q->kernel_address = (u64) (uintptr_t) p;
-       q->hw_queue_id = hw_queue_id;
-       q->ci = 0;
-       q->pi = 0;
-
-       atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
-
-       return 0;
-}
-
-/**
- * hl_cq_fini - destroy completion queue
- *
- * @hdev: pointer to device structure
- * @q: pointer to cq structure
- *
- * Free the completion queue memory
- */
-void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
-{
-       hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
-                       (void *) (uintptr_t) q->kernel_address, q->bus_address);
-}
-
-void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
-{
-       q->ci = 0;
-       q->pi = 0;
-
-       atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
-
-       /*
-        * It's not enough to just reset the PI/CI because the H/W may have
-        * written valid completion entries before it was halted and therefore
-        * we need to clean the actual queues so we won't process old entries
-        * when the device is operational again
-        */
-
-       memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
-}
-
-/**
- * hl_eq_init - main initialization function for an event queue object
- *
- * @hdev: pointer to device structure
- * @q: pointer to eq structure
- *
- * Allocate dma-able memory for the event queue and initialize fields
- * Returns 0 on success
- */
-int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
-{
-       void *p;
-
-       p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
-                                                       HL_EQ_SIZE_IN_BYTES,
-                                                       &q->bus_address);
-       if (!p)
-               return -ENOMEM;
-
-       q->hdev = hdev;
-       q->kernel_address = (u64) (uintptr_t) p;
-       q->ci = 0;
-
-       return 0;
-}
-
-/**
- * hl_eq_fini - destroy event queue
- *
- * @hdev: pointer to device structure
- * @q: pointer to eq structure
- *
- * Free the event queue memory
- */
-void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
-{
-       flush_workqueue(hdev->eq_wq);
-
-       hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
-                                       HL_EQ_SIZE_IN_BYTES,
-                                       (void *) (uintptr_t) q->kernel_address);
-}
-
-void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
-{
-       q->ci = 0;
-
-       /*
-        * It's not enough to just reset the PI/CI because the H/W may have
-        * written valid completion entries before it was halted and therefore
-        * we need to clean the actual queues so we won't process old entries
-        * when the device is operational again
-        */
-
-       memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
-}
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
deleted file mode 100644 (file)
index e4e1693..0000000
+++ /dev/null
@@ -1,1843 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-#include <linux/genalloc.h>
-
-#define HL_MMU_DEBUG   0
-
-/*
- * The va ranges in context object contain a list with the available chunks of
- * device virtual memory.
- * There is one range for host allocations and one for DRAM allocations.
- *
- * On initialization each range contains one chunk of all of its available
- * virtual range which is a half of the total device virtual range.
- *
- * On each mapping of physical pages, a suitable virtual range chunk (with a
- * minimum size) is selected from the list. If the chunk size equals the
- * requested size, the chunk is returned. Otherwise, the chunk is split into
- * two chunks - one to return as result and a remainder to stay in the list.
- *
- * On each Unmapping of a virtual address, the relevant virtual chunk is
- * returned to the list. The chunk is added to the list and if its edges match
- * the edges of the adjacent chunks (means a contiguous chunk can be created),
- * the chunks are merged.
- *
- * On finish, the list is checked to have only one chunk of all the relevant
- * virtual range (which is a half of the device total virtual range).
- * If not (means not all mappings were unmapped), a warning is printed.
- */
-
-/*
- * alloc_device_memory - allocate device memory
- *
- * @ctx                 : current context
- * @args                : host parameters containing the requested size
- * @ret_handle          : result handle
- *
- * This function does the following:
- * - Allocate the requested size rounded up to 2MB pages
- * - Return unique handle
- */
-static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
-                               u32 *ret_handle)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_vm *vm = &hdev->vm;
-       struct hl_vm_phys_pg_pack *phys_pg_pack;
-       u64 paddr = 0, total_size, num_pgs, i;
-       u32 num_curr_pgs, page_size, page_shift;
-       int handle, rc;
-       bool contiguous;
-
-       num_curr_pgs = 0;
-       page_size = hdev->asic_prop.dram_page_size;
-       page_shift = __ffs(page_size);
-       num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
-       total_size = num_pgs << page_shift;
-
-       contiguous = args->flags & HL_MEM_CONTIGUOUS;
-
-       if (contiguous) {
-               paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
-               if (!paddr) {
-                       dev_err(hdev->dev,
-                               "failed to allocate %llu huge contiguous pages\n",
-                               num_pgs);
-                       return -ENOMEM;
-               }
-       }
-
-       phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
-       if (!phys_pg_pack) {
-               rc = -ENOMEM;
-               goto pages_pack_err;
-       }
-
-       phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
-       phys_pg_pack->asid = ctx->asid;
-       phys_pg_pack->npages = num_pgs;
-       phys_pg_pack->page_size = page_size;
-       phys_pg_pack->total_size = total_size;
-       phys_pg_pack->flags = args->flags;
-       phys_pg_pack->contiguous = contiguous;
-
-       phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
-       if (!phys_pg_pack->pages) {
-               rc = -ENOMEM;
-               goto pages_arr_err;
-       }
-
-       if (phys_pg_pack->contiguous) {
-               for (i = 0 ; i < num_pgs ; i++)
-                       phys_pg_pack->pages[i] = paddr + i * page_size;
-       } else {
-               for (i = 0 ; i < num_pgs ; i++) {
-                       phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
-                                                       vm->dram_pg_pool,
-                                                       page_size);
-                       if (!phys_pg_pack->pages[i]) {
-                               dev_err(hdev->dev,
-                                       "Failed to allocate device memory (out of memory)\n");
-                               rc = -ENOMEM;
-                               goto page_err;
-                       }
-
-                       num_curr_pgs++;
-               }
-       }
-
-       spin_lock(&vm->idr_lock);
-       handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
-                               GFP_ATOMIC);
-       spin_unlock(&vm->idr_lock);
-
-       if (handle < 0) {
-               dev_err(hdev->dev, "Failed to get handle for page\n");
-               rc = -EFAULT;
-               goto idr_err;
-       }
-
-       for (i = 0 ; i < num_pgs ; i++)
-               kref_get(&vm->dram_pg_pool_refcount);
-
-       phys_pg_pack->handle = handle;
-
-       atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
-       atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
-
-       *ret_handle = handle;
-
-       return 0;
-
-idr_err:
-page_err:
-       if (!phys_pg_pack->contiguous)
-               for (i = 0 ; i < num_curr_pgs ; i++)
-                       gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
-                                       page_size);
-
-       kvfree(phys_pg_pack->pages);
-pages_arr_err:
-       kfree(phys_pg_pack);
-pages_pack_err:
-       if (contiguous)
-               gen_pool_free(vm->dram_pg_pool, paddr, total_size);
-
-       return rc;
-}
-
-/*
- * dma_map_host_va - DMA mapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @p_userptr: pointer to result userptr structure
- *
- * This function does the following:
- * - Allocate userptr structure
- * - Pin the given host memory using the userptr structure
- * - Perform DMA mapping to have the DMA addresses of the pages
- */
-static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
-                               struct hl_userptr **p_userptr)
-{
-       struct hl_userptr *userptr;
-       int rc;
-
-       userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
-       if (!userptr) {
-               rc = -ENOMEM;
-               goto userptr_err;
-       }
-
-       rc = hl_pin_host_memory(hdev, addr, size, userptr);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to pin host memory\n");
-               goto pin_err;
-       }
-
-       rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
-                                       userptr->sgt->nents, DMA_BIDIRECTIONAL);
-       if (rc) {
-               dev_err(hdev->dev, "failed to map sgt with DMA region\n");
-               goto dma_map_err;
-       }
-
-       userptr->dma_mapped = true;
-       userptr->dir = DMA_BIDIRECTIONAL;
-       userptr->vm_type = VM_TYPE_USERPTR;
-
-       *p_userptr = userptr;
-
-       return 0;
-
-dma_map_err:
-       hl_unpin_host_memory(hdev, userptr);
-pin_err:
-       kfree(userptr);
-userptr_err:
-
-       return rc;
-}
-
-/*
- * dma_unmap_host_va - DMA unmapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @userptr: userptr to free
- *
- * This function does the following:
- * - Unpins the physical pages
- * - Frees the userptr structure
- */
-static void dma_unmap_host_va(struct hl_device *hdev,
-                               struct hl_userptr *userptr)
-{
-       hl_unpin_host_memory(hdev, userptr);
-       kfree(userptr);
-}
-
-/*
- * dram_pg_pool_do_release - free DRAM pages pool
- *
- * @ref                 : pointer to reference object
- *
- * This function does the following:
- * - Frees the idr structure of physical pages handles
- * - Frees the generic pool of DRAM physical pages
- */
-static void dram_pg_pool_do_release(struct kref *ref)
-{
-       struct hl_vm *vm = container_of(ref, struct hl_vm,
-                       dram_pg_pool_refcount);
-
-       /*
-        * free the idr here as only here we know for sure that there are no
-        * allocated physical pages and hence there are no handles in use
-        */
-       idr_destroy(&vm->phys_pg_pack_handles);
-       gen_pool_destroy(vm->dram_pg_pool);
-}
-
-/*
- * free_phys_pg_pack - free physical page pack
- * @hdev: habanalabs device structure
- * @phys_pg_pack: physical page pack to free
- *
- * This function does the following:
- * - For DRAM memory only, iterate over the pack and free each physical block
- *   structure by returning it to the general pool
- * - Free the hl_vm_phys_pg_pack structure
- */
-static void free_phys_pg_pack(struct hl_device *hdev,
-                               struct hl_vm_phys_pg_pack *phys_pg_pack)
-{
-       struct hl_vm *vm = &hdev->vm;
-       u64 i;
-
-       if (!phys_pg_pack->created_from_userptr) {
-               if (phys_pg_pack->contiguous) {
-                       gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
-                                       phys_pg_pack->total_size);
-
-                       for (i = 0; i < phys_pg_pack->npages ; i++)
-                               kref_put(&vm->dram_pg_pool_refcount,
-                                       dram_pg_pool_do_release);
-               } else {
-                       for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-                               gen_pool_free(vm->dram_pg_pool,
-                                               phys_pg_pack->pages[i],
-                                               phys_pg_pack->page_size);
-                               kref_put(&vm->dram_pg_pool_refcount,
-                                       dram_pg_pool_do_release);
-                       }
-               }
-       }
-
-       kvfree(phys_pg_pack->pages);
-       kfree(phys_pg_pack);
-}
-
-/*
- * free_device_memory - free device memory
- *
- * @ctx                  : current context
- * @handle              : handle of the memory chunk to free
- *
- * This function does the following:
- * - Free the device memory related to the given handle
- */
-static int free_device_memory(struct hl_ctx *ctx, u32 handle)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_vm *vm = &hdev->vm;
-       struct hl_vm_phys_pg_pack *phys_pg_pack;
-
-       spin_lock(&vm->idr_lock);
-       phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
-       if (phys_pg_pack) {
-               if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
-                       dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
-                               handle);
-                       spin_unlock(&vm->idr_lock);
-                       return -EINVAL;
-               }
-
-               /*
-                * must remove from idr before the freeing of the physical
-                * pages as the refcount of the pool is also the trigger of the
-                * idr destroy
-                */
-               idr_remove(&vm->phys_pg_pack_handles, handle);
-               spin_unlock(&vm->idr_lock);
-
-               atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
-               atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
-
-               free_phys_pg_pack(hdev, phys_pg_pack);
-       } else {
-               spin_unlock(&vm->idr_lock);
-               dev_err(hdev->dev,
-                       "free device memory failed, no match for handle %u\n",
-                       handle);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-/*
- * clear_va_list_locked - free virtual addresses list
- *
- * @hdev                : habanalabs device structure
- * @va_list             : list of virtual addresses to free
- *
- * This function does the following:
- * - Iterate over the list and free each virtual addresses block
- *
- * This function should be called only when va_list lock is taken
- */
-static void clear_va_list_locked(struct hl_device *hdev,
-               struct list_head *va_list)
-{
-       struct hl_vm_va_block *va_block, *tmp;
-
-       list_for_each_entry_safe(va_block, tmp, va_list, node) {
-               list_del(&va_block->node);
-               kfree(va_block);
-       }
-}
-
-/*
- * print_va_list_locked    - print virtual addresses list
- *
- * @hdev                : habanalabs device structure
- * @va_list             : list of virtual addresses to print
- *
- * This function does the following:
- * - Iterate over the list and print each virtual addresses block
- *
- * This function should be called only when va_list lock is taken
- */
-static void print_va_list_locked(struct hl_device *hdev,
-               struct list_head *va_list)
-{
-#if HL_MMU_DEBUG
-       struct hl_vm_va_block *va_block;
-
-       dev_dbg(hdev->dev, "print va list:\n");
-
-       list_for_each_entry(va_block, va_list, node)
-               dev_dbg(hdev->dev,
-                       "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
-                       va_block->start, va_block->end, va_block->size);
-#endif
-}
-
-/*
- * merge_va_blocks_locked - merge a virtual block if possible
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @va_block            : virtual block to merge with adjacent blocks
- *
- * This function does the following:
- * - Merge the given blocks with the adjacent blocks if their virtual ranges
- *   create a contiguous virtual range
- *
- * This Function should be called only when va_list lock is taken
- */
-static void merge_va_blocks_locked(struct hl_device *hdev,
-               struct list_head *va_list, struct hl_vm_va_block *va_block)
-{
-       struct hl_vm_va_block *prev, *next;
-
-       prev = list_prev_entry(va_block, node);
-       if (&prev->node != va_list && prev->end + 1 == va_block->start) {
-               prev->end = va_block->end;
-               prev->size = prev->end - prev->start;
-               list_del(&va_block->node);
-               kfree(va_block);
-               va_block = prev;
-       }
-
-       next = list_next_entry(va_block, node);
-       if (&next->node != va_list && va_block->end + 1 == next->start) {
-               next->start = va_block->start;
-               next->size = next->end - next->start;
-               list_del(&va_block->node);
-               kfree(va_block);
-       }
-}
-
-/*
- * add_va_block_locked - add a virtual block to the virtual addresses list
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @start               : start virtual address
- * @end                 : end virtual address
- *
- * This function does the following:
- * - Add the given block to the virtual blocks list and merge with other
- * blocks if a contiguous virtual block can be created
- *
- * This Function should be called only when va_list lock is taken
- */
-static int add_va_block_locked(struct hl_device *hdev,
-               struct list_head *va_list, u64 start, u64 end)
-{
-       struct hl_vm_va_block *va_block, *res = NULL;
-       u64 size = end - start;
-
-       print_va_list_locked(hdev, va_list);
-
-       list_for_each_entry(va_block, va_list, node) {
-               /* TODO: remove upon matureness */
-               if (hl_mem_area_crosses_range(start, size, va_block->start,
-                               va_block->end)) {
-                       dev_err(hdev->dev,
-                               "block crossing ranges at start 0x%llx, end 0x%llx\n",
-                               va_block->start, va_block->end);
-                       return -EINVAL;
-               }
-
-               if (va_block->end < start)
-                       res = va_block;
-       }
-
-       va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
-       if (!va_block)
-               return -ENOMEM;
-
-       va_block->start = start;
-       va_block->end = end;
-       va_block->size = size;
-
-       if (!res)
-               list_add(&va_block->node, va_list);
-       else
-               list_add(&va_block->node, &res->node);
-
-       merge_va_blocks_locked(hdev, va_list, va_block);
-
-       print_va_list_locked(hdev, va_list);
-
-       return 0;
-}
-
-/*
- * add_va_block - wrapper for add_va_block_locked
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @start               : start virtual address
- * @end                 : end virtual address
- *
- * This function does the following:
- * - Takes the list lock and calls add_va_block_locked
- */
-static inline int add_va_block(struct hl_device *hdev,
-               struct hl_va_range *va_range, u64 start, u64 end)
-{
-       int rc;
-
-       mutex_lock(&va_range->lock);
-       rc = add_va_block_locked(hdev, &va_range->list, start, end);
-       mutex_unlock(&va_range->lock);
-
-       return rc;
-}
-
-/*
- * get_va_block - get a virtual block with the requested size
- *
- * @hdev            : pointer to the habanalabs device structure
- * @va_range        : pointer to the virtual addresses range
- * @size            : requested block size
- * @hint_addr       : hint for request address by the user
- * @is_userptr      : is host or DRAM memory
- *
- * This function does the following:
- * - Iterate on the virtual block list to find a suitable virtual block for the
- *   requested size
- * - Reserve the requested block and update the list
- * - Return the start address of the virtual block
- */
-static u64 get_va_block(struct hl_device *hdev,
-                       struct hl_va_range *va_range, u64 size, u64 hint_addr,
-                       bool is_userptr)
-{
-       struct hl_vm_va_block *va_block, *new_va_block = NULL;
-       u64 valid_start, valid_size, prev_start, prev_end, page_mask,
-               res_valid_start = 0, res_valid_size = 0;
-       u32 page_size;
-       bool add_prev = false;
-
-       if (is_userptr)
-               /*
-                * We cannot know if the user allocated memory with huge pages
-                * or not, hence we continue with the biggest possible
-                * granularity.
-                */
-               page_size = hdev->asic_prop.pmmu_huge.page_size;
-       else
-               page_size = hdev->asic_prop.dmmu.page_size;
-
-       page_mask = ~((u64)page_size - 1);
-
-       mutex_lock(&va_range->lock);
-
-       print_va_list_locked(hdev, &va_range->list);
-
-       list_for_each_entry(va_block, &va_range->list, node) {
-               /* calc the first possible aligned addr */
-               valid_start = va_block->start;
-
-               if (valid_start & (page_size - 1)) {
-                       valid_start &= page_mask;
-                       valid_start += page_size;
-                       if (valid_start > va_block->end)
-                               continue;
-               }
-
-               valid_size = va_block->end - valid_start;
-
-               if (valid_size >= size &&
-                       (!new_va_block || valid_size < res_valid_size)) {
-                       new_va_block = va_block;
-                       res_valid_start = valid_start;
-                       res_valid_size = valid_size;
-               }
-
-               if (hint_addr && hint_addr >= valid_start &&
-                               ((hint_addr + size) <= va_block->end)) {
-                       new_va_block = va_block;
-                       res_valid_start = hint_addr;
-                       res_valid_size = valid_size;
-                       break;
-               }
-       }
-
-       if (!new_va_block) {
-               dev_err(hdev->dev, "no available va block for size %llu\n",
-                               size);
-               goto out;
-       }
-
-       if (res_valid_start > new_va_block->start) {
-               prev_start = new_va_block->start;
-               prev_end = res_valid_start - 1;
-
-               new_va_block->start = res_valid_start;
-               new_va_block->size = res_valid_size;
-
-               add_prev = true;
-       }
-
-       if (new_va_block->size > size) {
-               new_va_block->start += size;
-               new_va_block->size = new_va_block->end - new_va_block->start;
-       } else {
-               list_del(&new_va_block->node);
-               kfree(new_va_block);
-       }
-
-       if (add_prev)
-               add_va_block_locked(hdev, &va_range->list, prev_start,
-                               prev_end);
-
-       print_va_list_locked(hdev, &va_range->list);
-out:
-       mutex_unlock(&va_range->lock);
-
-       return res_valid_start;
-}
-
-/*
- * get_sg_info - get number of pages and the DMA address from SG list
- *
- * @sg                 : the SG list
- * @dma_addr           : pointer to DMA address to return
- *
- * Calculate the number of consecutive pages described by the SG list. Take the
- * offset of the address in the first page, add to it the length and round it up
- * to the number of needed pages.
- */
-static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
-{
-       *dma_addr = sg_dma_address(sg);
-
-       return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
-                       (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-}
-
-/*
- * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- *                                  memory
- * @ctx: current context
- * @userptr: userptr to initialize from
- * @pphys_pg_pack: result pointer
- *
- * This function does the following:
- * - Pin the physical pages related to the given virtual block
- * - Create a physical page pack from the physical pages related to the given
- *   virtual block
- */
-static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
-                               struct hl_userptr *userptr,
-                               struct hl_vm_phys_pg_pack **pphys_pg_pack)
-{
-       struct hl_vm_phys_pg_pack *phys_pg_pack;
-       struct scatterlist *sg;
-       dma_addr_t dma_addr;
-       u64 page_mask, total_npages;
-       u32 npages, page_size = PAGE_SIZE,
-               huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
-       bool first = true, is_huge_page_opt = true;
-       int rc, i, j;
-       u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
-
-       phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
-       if (!phys_pg_pack)
-               return -ENOMEM;
-
-       phys_pg_pack->vm_type = userptr->vm_type;
-       phys_pg_pack->created_from_userptr = true;
-       phys_pg_pack->asid = ctx->asid;
-       atomic_set(&phys_pg_pack->mapping_cnt, 1);
-
-       /* Only if all dma_addrs are aligned to 2MB and their
-        * sizes is at least 2MB, we can use huge page mapping.
-        * We limit the 2MB optimization to this condition,
-        * since later on we acquire the related VA range as one
-        * consecutive block.
-        */
-       total_npages = 0;
-       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
-               npages = get_sg_info(sg, &dma_addr);
-
-               total_npages += npages;
-
-               if ((npages % pgs_in_huge_page) ||
-                                       (dma_addr & (huge_page_size - 1)))
-                       is_huge_page_opt = false;
-       }
-
-       if (is_huge_page_opt) {
-               page_size = huge_page_size;
-               do_div(total_npages, pgs_in_huge_page);
-       }
-
-       page_mask = ~(((u64) page_size) - 1);
-
-       phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
-                                               GFP_KERNEL);
-       if (!phys_pg_pack->pages) {
-               rc = -ENOMEM;
-               goto page_pack_arr_mem_err;
-       }
-
-       phys_pg_pack->npages = total_npages;
-       phys_pg_pack->page_size = page_size;
-       phys_pg_pack->total_size = total_npages * page_size;
-
-       j = 0;
-       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
-               npages = get_sg_info(sg, &dma_addr);
-
-               /* align down to physical page size and save the offset */
-               if (first) {
-                       first = false;
-                       phys_pg_pack->offset = dma_addr & (page_size - 1);
-                       dma_addr &= page_mask;
-               }
-
-               while (npages) {
-                       phys_pg_pack->pages[j++] = dma_addr;
-                       dma_addr += page_size;
-
-                       if (is_huge_page_opt)
-                               npages -= pgs_in_huge_page;
-                       else
-                               npages--;
-               }
-       }
-
-       *pphys_pg_pack = phys_pg_pack;
-
-       return 0;
-
-page_pack_arr_mem_err:
-       kfree(phys_pg_pack);
-
-       return rc;
-}
-
-/*
- * map_phys_pg_pack - maps the physical page pack.
- * @ctx: current context
- * @vaddr: start address of the virtual area to map from
- * @phys_pg_pack: the pack of physical pages to map to
- *
- * This function does the following:
- * - Maps each chunk of virtual memory to matching physical chunk
- * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise
- */
-static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
-                               struct hl_vm_phys_pg_pack *phys_pg_pack)
-{
-       struct hl_device *hdev = ctx->hdev;
-       u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
-       u32 page_size = phys_pg_pack->page_size;
-       int rc = 0;
-
-       for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-               paddr = phys_pg_pack->pages[i];
-
-               rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
-                               (i + 1) == phys_pg_pack->npages);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "map failed for handle %u, npages: %llu, mapped: %llu",
-                               phys_pg_pack->handle, phys_pg_pack->npages,
-                               mapped_pg_cnt);
-                       goto err;
-               }
-
-               mapped_pg_cnt++;
-               next_vaddr += page_size;
-       }
-
-       return 0;
-
-err:
-       next_vaddr = vaddr;
-       for (i = 0 ; i < mapped_pg_cnt ; i++) {
-               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
-                                       (i + 1) == mapped_pg_cnt))
-                       dev_warn_ratelimited(hdev->dev,
-                               "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
-                                       phys_pg_pack->handle, next_vaddr,
-                                       phys_pg_pack->pages[i], page_size);
-
-               next_vaddr += page_size;
-       }
-
-       return rc;
-}
-
-/*
- * unmap_phys_pg_pack - unmaps the physical page pack
- * @ctx: current context
- * @vaddr: start address of the virtual area to unmap
- * @phys_pg_pack: the pack of physical pages to unmap
- */
-static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
-                               struct hl_vm_phys_pg_pack *phys_pg_pack)
-{
-       struct hl_device *hdev = ctx->hdev;
-       u64 next_vaddr, i;
-       u32 page_size;
-
-       page_size = phys_pg_pack->page_size;
-       next_vaddr = vaddr;
-
-       for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
-               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
-                                      (i + 1) == phys_pg_pack->npages))
-                       dev_warn_ratelimited(hdev->dev,
-                       "unmap failed for vaddr: 0x%llx\n", next_vaddr);
-
-               /*
-                * unmapping on Palladium can be really long, so avoid a CPU
-                * soft lockup bug by sleeping a little between unmapping pages
-                */
-               if (hdev->pldm)
-                       usleep_range(500, 1000);
-       }
-}
-
-static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
-                               u64 *paddr)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_vm *vm = &hdev->vm;
-       struct hl_vm_phys_pg_pack *phys_pg_pack;
-       u32 handle;
-
-       handle = lower_32_bits(args->map_device.handle);
-       spin_lock(&vm->idr_lock);
-       phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
-       if (!phys_pg_pack) {
-               spin_unlock(&vm->idr_lock);
-               dev_err(hdev->dev, "no match for handle %u\n", handle);
-               return -EINVAL;
-       }
-
-       *paddr = phys_pg_pack->pages[0];
-
-       spin_unlock(&vm->idr_lock);
-
-       return 0;
-}
-
-/*
- * map_device_va - map the given memory
- *
- * @ctx                 : current context
- * @args         : host parameters with handle/host virtual address
- * @device_addr         : pointer to result device virtual address
- *
- * This function does the following:
- * - If given a physical device memory handle, map to a device virtual block
- *   and return the start address of this block
- * - If given a host virtual address and size, find the related physical pages,
- *   map a device virtual block to this pages and return the start address of
- *   this block
- */
-static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
-               u64 *device_addr)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_vm *vm = &hdev->vm;
-       struct hl_vm_phys_pg_pack *phys_pg_pack;
-       struct hl_userptr *userptr = NULL;
-       struct hl_vm_hash_node *hnode;
-       struct hl_va_range *va_range;
-       enum vm_type_t *vm_type;
-       u64 ret_vaddr, hint_addr;
-       u32 handle = 0;
-       int rc;
-       bool is_userptr = args->flags & HL_MEM_USERPTR;
-
-       /* Assume failure */
-       *device_addr = 0;
-
-       if (is_userptr) {
-               u64 addr = args->map_host.host_virt_addr,
-                       size = args->map_host.mem_size;
-
-               rc = dma_map_host_va(hdev, addr, size, &userptr);
-               if (rc) {
-                       dev_err(hdev->dev, "failed to get userptr from va\n");
-                       return rc;
-               }
-
-               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-                               &phys_pg_pack);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "unable to init page pack for vaddr 0x%llx\n",
-                               addr);
-                       goto init_page_pack_err;
-               }
-
-               vm_type = (enum vm_type_t *) userptr;
-               hint_addr = args->map_host.hint_addr;
-               handle = phys_pg_pack->handle;
-       } else {
-               handle = lower_32_bits(args->map_device.handle);
-
-               spin_lock(&vm->idr_lock);
-               phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
-               if (!phys_pg_pack) {
-                       spin_unlock(&vm->idr_lock);
-                       dev_err(hdev->dev,
-                               "no match for handle %u\n", handle);
-                       return -EINVAL;
-               }
-
-               /* increment now to avoid freeing device memory while mapping */
-               atomic_inc(&phys_pg_pack->mapping_cnt);
-
-               spin_unlock(&vm->idr_lock);
-
-               vm_type = (enum vm_type_t *) phys_pg_pack;
-
-               hint_addr = args->map_device.hint_addr;
-       }
-
-       /*
-        * relevant for mapping device physical memory only, as host memory is
-        * implicitly shared
-        */
-       if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
-                       phys_pg_pack->asid != ctx->asid) {
-               dev_err(hdev->dev,
-                       "Failed to map memory, handle %u is not shared\n",
-                       handle);
-               rc = -EPERM;
-               goto shared_err;
-       }
-
-       hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
-       if (!hnode) {
-               rc = -ENOMEM;
-               goto hnode_err;
-       }
-
-       if (is_userptr)
-               if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
-                       va_range = ctx->host_va_range;
-               else
-                       va_range = ctx->host_huge_va_range;
-       else
-               va_range = ctx->dram_va_range;
-
-       ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
-                                       hint_addr, is_userptr);
-       if (!ret_vaddr) {
-               dev_err(hdev->dev, "no available va block for handle %u\n",
-                               handle);
-               rc = -ENOMEM;
-               goto va_block_err;
-       }
-
-       mutex_lock(&ctx->mmu_lock);
-
-       rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
-       if (rc) {
-               mutex_unlock(&ctx->mmu_lock);
-               dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
-                               handle);
-               goto map_err;
-       }
-
-       rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
-
-       mutex_unlock(&ctx->mmu_lock);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "mapping handle %u failed due to MMU cache invalidation\n",
-                       handle);
-               goto map_err;
-       }
-
-       ret_vaddr += phys_pg_pack->offset;
-
-       hnode->ptr = vm_type;
-       hnode->vaddr = ret_vaddr;
-
-       mutex_lock(&ctx->mem_hash_lock);
-       hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
-       mutex_unlock(&ctx->mem_hash_lock);
-
-       *device_addr = ret_vaddr;
-
-       if (is_userptr)
-               free_phys_pg_pack(hdev, phys_pg_pack);
-
-       return 0;
-
-map_err:
-       if (add_va_block(hdev, va_range, ret_vaddr,
-                               ret_vaddr + phys_pg_pack->total_size - 1))
-               dev_warn(hdev->dev,
-                       "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
-                               handle, ret_vaddr);
-
-va_block_err:
-       kfree(hnode);
-hnode_err:
-shared_err:
-       atomic_dec(&phys_pg_pack->mapping_cnt);
-       if (is_userptr)
-               free_phys_pg_pack(hdev, phys_pg_pack);
-init_page_pack_err:
-       if (is_userptr)
-               dma_unmap_host_va(hdev, userptr);
-
-       return rc;
-}
-
-/*
- * unmap_device_va      - unmap the given device virtual address
- *
- * @ctx                 : current context
- * @vaddr               : device virtual address to unmap
- * @ctx_free            : true if in context free flow, false otherwise.
- *
- * This function does the following:
- * - Unmap the physical pages related to the given virtual address
- * - return the device virtual block to the virtual block list
- */
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
-       struct hl_vm_hash_node *hnode = NULL;
-       struct hl_userptr *userptr = NULL;
-       struct hl_va_range *va_range;
-       enum vm_type_t *vm_type;
-       bool is_userptr;
-       int rc = 0;
-
-       /* protect from double entrance */
-       mutex_lock(&ctx->mem_hash_lock);
-       hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
-               if (vaddr == hnode->vaddr)
-                       break;
-
-       if (!hnode) {
-               mutex_unlock(&ctx->mem_hash_lock);
-               dev_err(hdev->dev,
-                       "unmap failed, no mem hnode for vaddr 0x%llx\n",
-                       vaddr);
-               return -EINVAL;
-       }
-
-       hash_del(&hnode->node);
-       mutex_unlock(&ctx->mem_hash_lock);
-
-       vm_type = hnode->ptr;
-
-       if (*vm_type == VM_TYPE_USERPTR) {
-               is_userptr = true;
-               userptr = hnode->ptr;
-               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-                                                       &phys_pg_pack);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "unable to init page pack for vaddr 0x%llx\n",
-                               vaddr);
-                       goto vm_type_err;
-               }
-
-               if (phys_pg_pack->page_size ==
-                                       hdev->asic_prop.pmmu.page_size)
-                       va_range = ctx->host_va_range;
-               else
-                       va_range = ctx->host_huge_va_range;
-       } else if (*vm_type == VM_TYPE_PHYS_PACK) {
-               is_userptr = false;
-               va_range = ctx->dram_va_range;
-               phys_pg_pack = hnode->ptr;
-       } else {
-               dev_warn(hdev->dev,
-                       "unmap failed, unknown vm desc for vaddr 0x%llx\n",
-                               vaddr);
-               rc = -EFAULT;
-               goto vm_type_err;
-       }
-
-       if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
-               dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
-               rc = -EINVAL;
-               goto mapping_cnt_err;
-       }
-
-       vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
-
-       mutex_lock(&ctx->mmu_lock);
-
-       unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
-
-       /*
-        * During context free this function is called in a loop to clean all
-        * the context mappings. Hence the cache invalidation can be called once
-        * at the loop end rather than for each iteration
-        */
-       if (!ctx_free)
-               rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
-                                                               *vm_type);
-
-       mutex_unlock(&ctx->mmu_lock);
-
-       /*
-        * If the context is closing we don't need to check for the MMU cache
-        * invalidation return code and update the VA free list as in this flow
-        * we invalidate the MMU cache outside of this unmap function and the VA
-        * free list will be freed anyway.
-        */
-       if (!ctx_free) {
-               int tmp_rc;
-
-               if (rc)
-                       dev_err(hdev->dev,
-                               "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
-                               vaddr);
-
-               tmp_rc = add_va_block(hdev, va_range, vaddr,
-                                       vaddr + phys_pg_pack->total_size - 1);
-               if (tmp_rc) {
-                       dev_warn(hdev->dev,
-                                       "add va block failed for vaddr: 0x%llx\n",
-                                       vaddr);
-                       if (!rc)
-                               rc = tmp_rc;
-               }
-       }
-
-       atomic_dec(&phys_pg_pack->mapping_cnt);
-       kfree(hnode);
-
-       if (is_userptr) {
-               free_phys_pg_pack(hdev, phys_pg_pack);
-               dma_unmap_host_va(hdev, userptr);
-       }
-
-       return rc;
-
-mapping_cnt_err:
-       if (is_userptr)
-               free_phys_pg_pack(hdev, phys_pg_pack);
-vm_type_err:
-       mutex_lock(&ctx->mem_hash_lock);
-       hash_add(ctx->mem_hash, &hnode->node, vaddr);
-       mutex_unlock(&ctx->mem_hash_lock);
-
-       return rc;
-}
-
-static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_ctx *ctx = hpriv->ctx;
-       u64 device_addr = 0;
-       u32 handle = 0;
-       int rc;
-
-       switch (args->in.op) {
-       case HL_MEM_OP_ALLOC:
-               if (args->in.alloc.mem_size == 0) {
-                       dev_err(hdev->dev,
-                               "alloc size must be larger than 0\n");
-                       rc = -EINVAL;
-                       goto out;
-               }
-
-               /* Force contiguous as there are no real MMU
-                * translations to overcome physical memory gaps
-                */
-               args->in.flags |= HL_MEM_CONTIGUOUS;
-               rc = alloc_device_memory(ctx, &args->in, &handle);
-
-               memset(args, 0, sizeof(*args));
-               args->out.handle = (__u64) handle;
-               break;
-
-       case HL_MEM_OP_FREE:
-               rc = free_device_memory(ctx, args->in.free.handle);
-               break;
-
-       case HL_MEM_OP_MAP:
-               if (args->in.flags & HL_MEM_USERPTR) {
-                       device_addr = args->in.map_host.host_virt_addr;
-                       rc = 0;
-               } else {
-                       rc = get_paddr_from_handle(ctx, &args->in,
-                                       &device_addr);
-               }
-
-               memset(args, 0, sizeof(*args));
-               args->out.device_virt_addr = device_addr;
-               break;
-
-       case HL_MEM_OP_UNMAP:
-               rc = 0;
-               break;
-
-       default:
-               dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
-               rc = -ENOTTY;
-               break;
-       }
-
-out:
-       return rc;
-}
-
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
-{
-       union hl_mem_args *args = data;
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_ctx *ctx = hpriv->ctx;
-       u64 device_addr = 0;
-       u32 handle = 0;
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               dev_warn_ratelimited(hdev->dev,
-                       "Device is %s. Can't execute MEMORY IOCTL\n",
-                       atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-               return -EBUSY;
-       }
-
-       if (!hdev->mmu_enable)
-               return mem_ioctl_no_mmu(hpriv, args);
-
-       switch (args->in.op) {
-       case HL_MEM_OP_ALLOC:
-               if (!hdev->dram_supports_virtual_memory) {
-                       dev_err(hdev->dev, "DRAM alloc is not supported\n");
-                       rc = -EINVAL;
-                       goto out;
-               }
-
-               if (args->in.alloc.mem_size == 0) {
-                       dev_err(hdev->dev,
-                               "alloc size must be larger than 0\n");
-                       rc = -EINVAL;
-                       goto out;
-               }
-               rc = alloc_device_memory(ctx, &args->in, &handle);
-
-               memset(args, 0, sizeof(*args));
-               args->out.handle = (__u64) handle;
-               break;
-
-       case HL_MEM_OP_FREE:
-               rc = free_device_memory(ctx, args->in.free.handle);
-               break;
-
-       case HL_MEM_OP_MAP:
-               rc = map_device_va(ctx, &args->in, &device_addr);
-
-               memset(args, 0, sizeof(*args));
-               args->out.device_virt_addr = device_addr;
-               break;
-
-       case HL_MEM_OP_UNMAP:
-               rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
-                                       false);
-               break;
-
-       default:
-               dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
-               rc = -ENOTTY;
-               break;
-       }
-
-out:
-       return rc;
-}
-
-static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
-                               u32 npages, u64 start, u32 offset,
-                               struct hl_userptr *userptr)
-{
-       int rc;
-
-       if (!access_ok((void __user *) (uintptr_t) addr, size)) {
-               dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
-               return -EFAULT;
-       }
-
-       userptr->vec = frame_vector_create(npages);
-       if (!userptr->vec) {
-               dev_err(hdev->dev, "Failed to create frame vector\n");
-               return -ENOMEM;
-       }
-
-       rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
-                               userptr->vec);
-
-       if (rc != npages) {
-               dev_err(hdev->dev,
-                       "Failed to map host memory, user ptr probably wrong\n");
-               if (rc < 0)
-                       goto destroy_framevec;
-               rc = -EFAULT;
-               goto put_framevec;
-       }
-
-       if (frame_vector_to_pages(userptr->vec) < 0) {
-               dev_err(hdev->dev,
-                       "Failed to translate frame vector to pages\n");
-               rc = -EFAULT;
-               goto put_framevec;
-       }
-
-       rc = sg_alloc_table_from_pages(userptr->sgt,
-                                       frame_vector_pages(userptr->vec),
-                                       npages, offset, size, GFP_ATOMIC);
-       if (rc < 0) {
-               dev_err(hdev->dev, "failed to create SG table from pages\n");
-               goto put_framevec;
-       }
-
-       return 0;
-
-put_framevec:
-       put_vaddr_frames(userptr->vec);
-destroy_framevec:
-       frame_vector_destroy(userptr->vec);
-       return rc;
-}
-
-/*
- * hl_pin_host_memory - pins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @userptr: pointer to hl_userptr structure
- *
- * This function does the following:
- * - Pins the physical pages
- * - Create an SG list from those pages
- */
-int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
-                                       struct hl_userptr *userptr)
-{
-       u64 start, end;
-       u32 npages, offset;
-       int rc;
-
-       if (!size) {
-               dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
-               return -EINVAL;
-       }
-
-       /*
-        * If the combination of the address and size requested for this memory
-        * region causes an integer overflow, return error.
-        */
-       if (((addr + size) < addr) ||
-                       PAGE_ALIGN(addr + size) < (addr + size)) {
-               dev_err(hdev->dev,
-                       "user pointer 0x%llx + %llu causes integer overflow\n",
-                       addr, size);
-               return -EINVAL;
-       }
-
-       /*
-        * This function can be called also from data path, hence use atomic
-        * always as it is not a big allocation.
-        */
-       userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
-       if (!userptr->sgt)
-               return -ENOMEM;
-
-       start = addr & PAGE_MASK;
-       offset = addr & ~PAGE_MASK;
-       end = PAGE_ALIGN(addr + size);
-       npages = (end - start) >> PAGE_SHIFT;
-
-       userptr->size = size;
-       userptr->addr = addr;
-       userptr->dma_mapped = false;
-       INIT_LIST_HEAD(&userptr->job_node);
-
-       rc = get_user_memory(hdev, addr, size, npages, start, offset,
-                               userptr);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "failed to get user memory for address 0x%llx\n",
-                       addr);
-               goto free_sgt;
-       }
-
-       hl_debugfs_add_userptr(hdev, userptr);
-
-       return 0;
-
-free_sgt:
-       kfree(userptr->sgt);
-       return rc;
-}
-
-/*
- * hl_unpin_host_memory - unpins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @userptr: pointer to hl_userptr structure
- *
- * This function does the following:
- * - Unpins the physical pages related to the host memory
- * - Free the SG list
- */
-void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
-{
-       struct page **pages;
-
-       hl_debugfs_remove_userptr(hdev, userptr);
-
-       if (userptr->dma_mapped)
-               hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
-                                                       userptr->sgt->nents,
-                                                       userptr->dir);
-
-       pages = frame_vector_pages(userptr->vec);
-       if (!IS_ERR(pages)) {
-               int i;
-
-               for (i = 0; i < frame_vector_count(userptr->vec); i++)
-                       set_page_dirty_lock(pages[i]);
-       }
-       put_vaddr_frames(userptr->vec);
-       frame_vector_destroy(userptr->vec);
-
-       list_del(&userptr->job_node);
-
-       sg_free_table(userptr->sgt);
-       kfree(userptr->sgt);
-}
-
-/*
- * hl_userptr_delete_list - clear userptr list
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr_list        : pointer to the list to clear
- *
- * This function does the following:
- * - Iterates over the list and unpins the host memory and frees the userptr
- *   structure.
- */
-void hl_userptr_delete_list(struct hl_device *hdev,
-                               struct list_head *userptr_list)
-{
-       struct hl_userptr *userptr, *tmp;
-
-       list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
-               hl_unpin_host_memory(hdev, userptr);
-               kfree(userptr);
-       }
-
-       INIT_LIST_HEAD(userptr_list);
-}
-
-/*
- * hl_userptr_is_pinned - returns whether the given userptr is pinned
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr_list        : pointer to the list to clear
- * @userptr             : pointer to userptr to check
- *
- * This function does the following:
- * - Iterates over the list and checks if the given userptr is in it, means is
- *   pinned. If so, returns true, otherwise returns false.
- */
-bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
-                               u32 size, struct list_head *userptr_list,
-                               struct hl_userptr **userptr)
-{
-       list_for_each_entry((*userptr), userptr_list, job_node) {
-               if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
-                       return true;
-       }
-
-       return false;
-}
-
-/*
- * va_range_init - initialize virtual addresses range
- * @hdev: pointer to the habanalabs device structure
- * @va_range: pointer to the range to initialize
- * @start: range start address
- * @end: range end address
- *
- * This function does the following:
- * - Initializes the virtual addresses list of the given range with the given
- *   addresses.
- */
-static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
-                               u64 start, u64 end)
-{
-       int rc;
-
-       INIT_LIST_HEAD(&va_range->list);
-
-       /* PAGE_SIZE alignment */
-
-       if (start & (PAGE_SIZE - 1)) {
-               start &= PAGE_MASK;
-               start += PAGE_SIZE;
-       }
-
-       if (end & (PAGE_SIZE - 1))
-               end &= PAGE_MASK;
-
-       if (start >= end) {
-               dev_err(hdev->dev, "too small vm range for va list\n");
-               return -EFAULT;
-       }
-
-       rc = add_va_block(hdev, va_range, start, end);
-
-       if (rc) {
-               dev_err(hdev->dev, "Failed to init host va list\n");
-               return rc;
-       }
-
-       va_range->start_addr = start;
-       va_range->end_addr = end;
-
-       return 0;
-}
-
-/*
- * va_range_fini() - clear a virtual addresses range
- * @hdev: pointer to the habanalabs structure
- * va_range: pointer to virtual addresses range
- *
- * This function does the following:
- * - Frees the virtual addresses block list and its lock
- */
-static void va_range_fini(struct hl_device *hdev,
-               struct hl_va_range *va_range)
-{
-       mutex_lock(&va_range->lock);
-       clear_va_list_locked(hdev, &va_range->list);
-       mutex_unlock(&va_range->lock);
-
-       mutex_destroy(&va_range->lock);
-       kfree(va_range);
-}
-
-/*
- * vm_ctx_init_with_ranges() - initialize virtual memory for context
- * @ctx: pointer to the habanalabs context structure
- * @host_range_start: host virtual addresses range start.
- * @host_range_end: host virtual addresses range end.
- * @host_huge_range_start: host virtual addresses range start for memory
- *                          allocated with huge pages.
- * @host_huge_range_end: host virtual addresses range end for memory allocated
- *                        with huge pages.
- * @dram_range_start: dram virtual addresses range start.
- * @dram_range_end: dram virtual addresses range end.
- *
- * This function initializes the following:
- * - MMU for context
- * - Virtual address to area descriptor hashtable
- * - Virtual block list of available virtual memory
- */
-static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
-                                       u64 host_range_start,
-                                       u64 host_range_end,
-                                       u64 host_huge_range_start,
-                                       u64 host_huge_range_end,
-                                       u64 dram_range_start,
-                                       u64 dram_range_end)
-{
-       struct hl_device *hdev = ctx->hdev;
-       int rc;
-
-       ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
-       if (!ctx->host_va_range)
-               return -ENOMEM;
-
-       ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
-                                               GFP_KERNEL);
-       if (!ctx->host_huge_va_range) {
-               rc =  -ENOMEM;
-               goto host_huge_va_range_err;
-       }
-
-       ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
-       if (!ctx->dram_va_range) {
-               rc = -ENOMEM;
-               goto dram_va_range_err;
-       }
-
-       rc = hl_mmu_ctx_init(ctx);
-       if (rc) {
-               dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
-               goto mmu_ctx_err;
-       }
-
-       mutex_init(&ctx->mem_hash_lock);
-       hash_init(ctx->mem_hash);
-
-       mutex_init(&ctx->host_va_range->lock);
-
-       rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
-                               host_range_end);
-       if (rc) {
-               dev_err(hdev->dev, "failed to init host vm range\n");
-               goto host_page_range_err;
-       }
-
-       if (hdev->pmmu_huge_range) {
-               mutex_init(&ctx->host_huge_va_range->lock);
-
-               rc = va_range_init(hdev, ctx->host_huge_va_range,
-                                       host_huge_range_start,
-                                       host_huge_range_end);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "failed to init host huge vm range\n");
-                       goto host_hpage_range_err;
-               }
-       } else {
-               ctx->host_huge_va_range = ctx->host_va_range;
-       }
-
-       mutex_init(&ctx->dram_va_range->lock);
-
-       rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
-                       dram_range_end);
-       if (rc) {
-               dev_err(hdev->dev, "failed to init dram vm range\n");
-               goto dram_vm_err;
-       }
-
-       hl_debugfs_add_ctx_mem_hash(hdev, ctx);
-
-       return 0;
-
-dram_vm_err:
-       mutex_destroy(&ctx->dram_va_range->lock);
-
-       if (hdev->pmmu_huge_range) {
-               mutex_lock(&ctx->host_huge_va_range->lock);
-               clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
-               mutex_unlock(&ctx->host_huge_va_range->lock);
-       }
-host_hpage_range_err:
-       if (hdev->pmmu_huge_range)
-               mutex_destroy(&ctx->host_huge_va_range->lock);
-       mutex_lock(&ctx->host_va_range->lock);
-       clear_va_list_locked(hdev, &ctx->host_va_range->list);
-       mutex_unlock(&ctx->host_va_range->lock);
-host_page_range_err:
-       mutex_destroy(&ctx->host_va_range->lock);
-       mutex_destroy(&ctx->mem_hash_lock);
-       hl_mmu_ctx_fini(ctx);
-mmu_ctx_err:
-       kfree(ctx->dram_va_range);
-dram_va_range_err:
-       kfree(ctx->host_huge_va_range);
-host_huge_va_range_err:
-       kfree(ctx->host_va_range);
-
-       return rc;
-}
-
-int hl_vm_ctx_init(struct hl_ctx *ctx)
-{
-       struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
-       u64 host_range_start, host_range_end, host_huge_range_start,
-               host_huge_range_end, dram_range_start, dram_range_end;
-
-       atomic64_set(&ctx->dram_phys_mem, 0);
-
-       /*
-        * - If MMU is enabled, init the ranges as usual.
-        * - If MMU is disabled, in case of host mapping, the returned address
-        *   is the given one.
-        *   In case of DRAM mapping, the returned address is the physical
-        *   address of the memory related to the given handle.
-        */
-       if (ctx->hdev->mmu_enable) {
-               dram_range_start = prop->dmmu.start_addr;
-               dram_range_end = prop->dmmu.end_addr;
-               host_range_start = prop->pmmu.start_addr;
-               host_range_end = prop->pmmu.end_addr;
-               host_huge_range_start = prop->pmmu_huge.start_addr;
-               host_huge_range_end = prop->pmmu_huge.end_addr;
-       } else {
-               dram_range_start = prop->dram_user_base_address;
-               dram_range_end = prop->dram_end_address;
-               host_range_start = prop->dram_user_base_address;
-               host_range_end = prop->dram_end_address;
-               host_huge_range_start = prop->dram_user_base_address;
-               host_huge_range_end = prop->dram_end_address;
-       }
-
-       return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
-                                       host_huge_range_start,
-                                       host_huge_range_end,
-                                       dram_range_start,
-                                       dram_range_end);
-}
-
-/*
- * hl_vm_ctx_fini       - virtual memory teardown of context
- *
- * @ctx                 : pointer to the habanalabs context structure
- *
- * This function perform teardown the following:
- * - Virtual block list of available virtual memory
- * - Virtual address to area descriptor hashtable
- * - MMU for context
- *
- * In addition this function does the following:
- * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
- *   hashtable should be empty as no valid mappings should exist at this
- *   point.
- * - Frees any existing physical page list from the idr which relates to the
- *   current context asid.
- * - This function checks the virtual block list for correctness. At this point
- *   the list should contain one element which describes the whole virtual
- *   memory range of the context. Otherwise, a warning is printed.
- */
-void hl_vm_ctx_fini(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct hl_vm *vm = &hdev->vm;
-       struct hl_vm_phys_pg_pack *phys_pg_list;
-       struct hl_vm_hash_node *hnode;
-       struct hlist_node *tmp_node;
-       int i;
-
-       hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
-
-       /*
-        * Clearly something went wrong on hard reset so no point in printing
-        * another side effect error
-        */
-       if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
-               dev_notice(hdev->dev,
-                       "user released device without removing its memory mappings\n");
-
-       hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
-               dev_dbg(hdev->dev,
-                       "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
-                       hnode->vaddr, ctx->asid);
-               unmap_device_va(ctx, hnode->vaddr, true);
-       }
-
-       /* invalidate the cache once after the unmapping loop */
-       hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
-       hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
-
-       spin_lock(&vm->idr_lock);
-       idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
-               if (phys_pg_list->asid == ctx->asid) {
-                       dev_dbg(hdev->dev,
-                               "page list 0x%px of asid %d is still alive\n",
-                               phys_pg_list, ctx->asid);
-                       atomic64_sub(phys_pg_list->total_size,
-                                       &hdev->dram_used_mem);
-                       free_phys_pg_pack(hdev, phys_pg_list);
-                       idr_remove(&vm->phys_pg_pack_handles, i);
-               }
-       spin_unlock(&vm->idr_lock);
-
-       va_range_fini(hdev, ctx->dram_va_range);
-       if (hdev->pmmu_huge_range)
-               va_range_fini(hdev, ctx->host_huge_va_range);
-       va_range_fini(hdev, ctx->host_va_range);
-
-       mutex_destroy(&ctx->mem_hash_lock);
-       hl_mmu_ctx_fini(ctx);
-}
-
-/*
- * hl_vm_init           - initialize virtual memory module
- *
- * @hdev                : pointer to the habanalabs device structure
- *
- * This function initializes the following:
- * - MMU module
- * - DRAM physical pages pool of 2MB
- * - Idr for device memory allocation handles
- */
-int hl_vm_init(struct hl_device *hdev)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_vm *vm = &hdev->vm;
-       int rc;
-
-       vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
-       if (!vm->dram_pg_pool) {
-               dev_err(hdev->dev, "Failed to create dram page pool\n");
-               return -ENOMEM;
-       }
-
-       kref_init(&vm->dram_pg_pool_refcount);
-
-       rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
-                       prop->dram_end_address - prop->dram_user_base_address,
-                       -1);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to add memory to dram page pool %d\n", rc);
-               goto pool_add_err;
-       }
-
-       spin_lock_init(&vm->idr_lock);
-       idr_init(&vm->phys_pg_pack_handles);
-
-       atomic64_set(&hdev->dram_used_mem, 0);
-
-       vm->init_done = true;
-
-       return 0;
-
-pool_add_err:
-       gen_pool_destroy(vm->dram_pg_pool);
-
-       return rc;
-}
-
-/*
- * hl_vm_fini           - virtual memory module teardown
- *
- * @hdev                : pointer to the habanalabs device structure
- *
- * This function perform teardown to the following:
- * - Idr for device memory allocation handles
- * - DRAM physical pages pool of 2MB
- * - MMU module
- */
-void hl_vm_fini(struct hl_device *hdev)
-{
-       struct hl_vm *vm = &hdev->vm;
-
-       if (!vm->init_done)
-               return;
-
-       /*
-        * At this point all the contexts should be freed and hence no DRAM
-        * memory should be in use. Hence the DRAM pool should be freed here.
-        */
-       if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
-               dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
-                               __func__);
-
-       vm->init_done = false;
-}
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
deleted file mode 100644 (file)
index 0430395..0000000
+++ /dev/null
@@ -1,1037 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-
-#include <linux/genalloc.h>
-#include <linux/slab.h>
-
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
-
-static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
-{
-       struct pgt_info *pgt_info = NULL;
-
-       hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
-                               (unsigned long) hop_addr)
-               if (hop_addr == pgt_info->shadow_addr)
-                       break;
-
-       return pgt_info;
-}
-
-static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
-{
-       struct hl_device *hdev = ctx->hdev;
-
-       gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
-                       hdev->asic_prop.mmu_hop_table_size);
-       hash_del(&pgt_info->node);
-       kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
-       kfree(pgt_info);
-}
-
-static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
-{
-       struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
-
-       _free_hop(ctx, pgt_info);
-}
-
-static u64 alloc_hop(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct pgt_info *pgt_info;
-       u64 phys_addr, shadow_addr;
-
-       pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
-       if (!pgt_info)
-               return ULLONG_MAX;
-
-       phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
-                                       prop->mmu_hop_table_size);
-       if (!phys_addr) {
-               dev_err(hdev->dev, "failed to allocate page\n");
-               goto pool_add_err;
-       }
-
-       shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
-                                               GFP_KERNEL);
-       if (!shadow_addr)
-               goto shadow_err;
-
-       pgt_info->phys_addr = phys_addr;
-       pgt_info->shadow_addr = shadow_addr;
-       pgt_info->ctx = ctx;
-       pgt_info->num_of_ptes = 0;
-       hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
-
-       return shadow_addr;
-
-shadow_err:
-       gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
-pool_add_err:
-       kfree(pgt_info);
-
-       return ULLONG_MAX;
-}
-
-static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
-{
-       return ctx->hdev->asic_prop.mmu_pgt_addr +
-                       (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
-       return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
-                       (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline void flush(struct hl_ctx *ctx)
-{
-       /* flush all writes from all cores to reach PCI */
-       mb();
-       ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
-}
-
-/* transform the value to physical address when writing to H/W */
-static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
-{
-       /*
-        * The value to write is actually the address of the next shadow hop +
-        * flags at the 12 LSBs.
-        * Hence in order to get the value to write to the physical PTE, we
-        * clear the 12 LSBs and translate the shadow hop to its associated
-        * physical hop, and add back the original 12 LSBs.
-        */
-       u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
-                               (val & FLAGS_MASK);
-
-       ctx->hdev->asic_funcs->write_pte(ctx->hdev,
-                                       get_phys_addr(ctx, shadow_pte_addr),
-                                       phys_val);
-
-       *(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* do not transform the value to physical address when writing to H/W */
-static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
-                                       u64 val)
-{
-       ctx->hdev->asic_funcs->write_pte(ctx->hdev,
-                                       get_phys_addr(ctx, shadow_pte_addr),
-                                       val);
-       *(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* clear the last and present bits */
-static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
-{
-       /* no need to transform the value to physical address */
-       write_final_pte(ctx, pte_addr, 0);
-}
-
-static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
-       get_pgt_info(ctx, hop_addr)->num_of_ptes++;
-}
-
-/*
- * put_pte - decrement the num of ptes and free the hop if possible
- *
- * @ctx: pointer to the context structure
- * @hop_addr: addr of the hop
- *
- * This function returns the number of ptes left on this hop. If the number is
- * 0, it means the pte was freed.
- */
-static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
-       struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
-       int num_of_ptes_left;
-
-       pgt_info->num_of_ptes--;
-
-       /*
-        * Need to save the number of ptes left because free_hop might free
-        * the pgt_info
-        */
-       num_of_ptes_left = pgt_info->num_of_ptes;
-       if (!num_of_ptes_left)
-               _free_hop(ctx, pgt_info);
-
-       return num_of_ptes_left;
-}
-
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-                                       u64 virt_addr, u64 mask, u64 shift)
-{
-       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_prop,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
-                                       mmu_prop->hop0_shift);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_prop,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
-                                       mmu_prop->hop1_shift);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_prop,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
-                                       mmu_prop->hop2_shift);
-}
-
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_prop,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
-                                       mmu_prop->hop3_shift);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
-                                       struct hl_mmu_properties *mmu_prop,
-                                       u64 hop_addr, u64 vaddr)
-{
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
-                                       mmu_prop->hop4_shift);
-}
-
-static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
-{
-       if (curr_pte & PAGE_PRESENT_MASK)
-               return curr_pte & HOP_PHYS_ADDR_MASK;
-       else
-               return ULLONG_MAX;
-}
-
-static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
-                                               bool *is_new_hop)
-{
-       u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
-
-       if (hop_addr == ULLONG_MAX) {
-               hop_addr = alloc_hop(ctx);
-               *is_new_hop = (hop_addr != ULLONG_MAX);
-       }
-
-       return hop_addr;
-}
-
-/* translates shadow address inside hop to a physical address */
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
-{
-       u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
-       u64 shadow_hop_addr = shadow_addr & ~page_mask;
-       u64 pte_offset = shadow_addr & page_mask;
-       u64 phys_hop_addr;
-
-       if (shadow_hop_addr != get_hop0_addr(ctx))
-               phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
-       else
-               phys_hop_addr = get_phys_hop0_addr(ctx);
-
-       return phys_hop_addr + pte_offset;
-}
-
-static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-
-       return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-                                       prop->dmmu.start_addr,
-                                       prop->dmmu.end_addr);
-}
-
-static int dram_default_mapping_init(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
-               hop2_pte_addr, hop3_pte_addr, pte_val;
-       int rc, i, j, hop3_allocated = 0;
-
-       if ((!hdev->dram_supports_virtual_memory) ||
-                       (!hdev->dram_default_page_mapping) ||
-                       (ctx->asid == HL_KERNEL_ASID_ID))
-               return 0;
-
-       num_of_hop3 = prop->dram_size_for_default_page_mapping;
-       do_div(num_of_hop3, prop->dram_page_size);
-       do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
-
-       /* add hop1 and hop2 */
-       total_hops = num_of_hop3 + 2;
-
-       ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
-       if (!ctx->dram_default_hops)
-               return -ENOMEM;
-
-       hop0_addr = get_hop0_addr(ctx);
-
-       hop1_addr = alloc_hop(ctx);
-       if (hop1_addr == ULLONG_MAX) {
-               dev_err(hdev->dev, "failed to alloc hop 1\n");
-               rc = -ENOMEM;
-               goto hop1_err;
-       }
-
-       ctx->dram_default_hops[total_hops - 1] = hop1_addr;
-
-       hop2_addr = alloc_hop(ctx);
-       if (hop2_addr == ULLONG_MAX) {
-               dev_err(hdev->dev, "failed to alloc hop 2\n");
-               rc = -ENOMEM;
-               goto hop2_err;
-       }
-
-       ctx->dram_default_hops[total_hops - 2] = hop2_addr;
-
-       for (i = 0 ; i < num_of_hop3 ; i++) {
-               ctx->dram_default_hops[i] = alloc_hop(ctx);
-               if (ctx->dram_default_hops[i] == ULLONG_MAX) {
-                       dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
-                       rc = -ENOMEM;
-                       goto hop3_err;
-               }
-               hop3_allocated++;
-       }
-
-       /* need only pte 0 in hops 0 and 1 */
-       pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-       write_pte(ctx, hop0_addr, pte_val);
-
-       pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-       write_pte(ctx, hop1_addr, pte_val);
-       get_pte(ctx, hop1_addr);
-
-       hop2_pte_addr = hop2_addr;
-       for (i = 0 ; i < num_of_hop3 ; i++) {
-               pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
-                               PAGE_PRESENT_MASK;
-               write_pte(ctx, hop2_pte_addr, pte_val);
-               get_pte(ctx, hop2_addr);
-               hop2_pte_addr += HL_PTE_SIZE;
-       }
-
-       pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
-                       LAST_MASK | PAGE_PRESENT_MASK;
-
-       for (i = 0 ; i < num_of_hop3 ; i++) {
-               hop3_pte_addr = ctx->dram_default_hops[i];
-               for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
-                       write_final_pte(ctx, hop3_pte_addr, pte_val);
-                       get_pte(ctx, ctx->dram_default_hops[i]);
-                       hop3_pte_addr += HL_PTE_SIZE;
-               }
-       }
-
-       flush(ctx);
-
-       return 0;
-
-hop3_err:
-       for (i = 0 ; i < hop3_allocated ; i++)
-               free_hop(ctx, ctx->dram_default_hops[i]);
-
-       free_hop(ctx, hop2_addr);
-hop2_err:
-       free_hop(ctx, hop1_addr);
-hop1_err:
-       kfree(ctx->dram_default_hops);
-
-       return rc;
-}
-
-static void dram_default_mapping_fini(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
-               hop2_pte_addr, hop3_pte_addr;
-       int i, j;
-
-       if ((!hdev->dram_supports_virtual_memory) ||
-                       (!hdev->dram_default_page_mapping) ||
-                       (ctx->asid == HL_KERNEL_ASID_ID))
-               return;
-
-       num_of_hop3 = prop->dram_size_for_default_page_mapping;
-       do_div(num_of_hop3, prop->dram_page_size);
-       do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
-
-       hop0_addr = get_hop0_addr(ctx);
-       /* add hop1 and hop2 */
-       total_hops = num_of_hop3 + 2;
-       hop1_addr = ctx->dram_default_hops[total_hops - 1];
-       hop2_addr = ctx->dram_default_hops[total_hops - 2];
-
-       for (i = 0 ; i < num_of_hop3 ; i++) {
-               hop3_pte_addr = ctx->dram_default_hops[i];
-               for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
-                       clear_pte(ctx, hop3_pte_addr);
-                       put_pte(ctx, ctx->dram_default_hops[i]);
-                       hop3_pte_addr += HL_PTE_SIZE;
-               }
-       }
-
-       hop2_pte_addr = hop2_addr;
-       hop2_pte_addr = hop2_addr;
-       for (i = 0 ; i < num_of_hop3 ; i++) {
-               clear_pte(ctx, hop2_pte_addr);
-               put_pte(ctx, hop2_addr);
-               hop2_pte_addr += HL_PTE_SIZE;
-       }
-
-       clear_pte(ctx, hop1_addr);
-       put_pte(ctx, hop1_addr);
-       clear_pte(ctx, hop0_addr);
-
-       kfree(ctx->dram_default_hops);
-
-       flush(ctx);
-}
-
-/**
- * hl_mmu_init() - initialize the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
- * Return: 0 for success, non-zero for failure.
- */
-int hl_mmu_init(struct hl_device *hdev)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       int rc;
-
-       if (!hdev->mmu_enable)
-               return 0;
-
-       hdev->mmu_pgt_pool =
-                       gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
-
-       if (!hdev->mmu_pgt_pool) {
-               dev_err(hdev->dev, "Failed to create page gen pool\n");
-               return -ENOMEM;
-       }
-
-       rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
-                       prop->mmu_hop0_tables_total_size,
-                       prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
-                       -1);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
-               goto err_pool_add;
-       }
-
-       hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
-                                       prop->mmu_hop_table_size,
-                                       GFP_KERNEL | __GFP_ZERO);
-       if (!hdev->mmu_shadow_hop0) {
-               rc = -ENOMEM;
-               goto err_pool_add;
-       }
-
-       /* MMU H/W init will be done in device hw_init() */
-
-       return 0;
-
-err_pool_add:
-       gen_pool_destroy(hdev->mmu_pgt_pool);
-
-       return rc;
-}
-
-/**
- * hl_mmu_fini() - release the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Disable MMU in H/W.
- * - Free the pgt_infos pool.
- *
- * All contexts should be freed before calling this function.
- */
-void hl_mmu_fini(struct hl_device *hdev)
-{
-       if (!hdev->mmu_enable)
-               return;
-
-       /* MMU H/W fini was already done in device hw_fini() */
-
-       kvfree(hdev->mmu_shadow_hop0);
-       gen_pool_destroy(hdev->mmu_pgt_pool);
-}
-
-/**
- * hl_mmu_ctx_init() - initialize a context for using the MMU module.
- * @ctx: pointer to the context structure to initialize.
- *
- * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
- * page tables hops related to this context.
- * Return: 0 on success, non-zero otherwise.
- */
-int hl_mmu_ctx_init(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-
-       if (!hdev->mmu_enable)
-               return 0;
-
-       mutex_init(&ctx->mmu_lock);
-       hash_init(ctx->mmu_shadow_hash);
-
-       return dram_default_mapping_init(ctx);
-}
-
-/*
- * hl_mmu_ctx_fini - disable a ctx from using the mmu module
- *
- * @ctx: pointer to the context structure
- *
- * This function does the following:
- * - Free any pgts which were not freed yet
- * - Free the mutex
- * - Free DRAM default page mapping hops
- */
-void hl_mmu_ctx_fini(struct hl_ctx *ctx)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct pgt_info *pgt_info;
-       struct hlist_node *tmp;
-       int i;
-
-       if (!hdev->mmu_enable)
-               return;
-
-       dram_default_mapping_fini(ctx);
-
-       if (!hash_empty(ctx->mmu_shadow_hash))
-               dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
-                       ctx->asid);
-
-       hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
-               dev_err_ratelimited(hdev->dev,
-                       "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
-                       pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
-               _free_hop(ctx, pgt_info);
-       }
-
-       mutex_destroy(&ctx->mmu_lock);
-}
-
-static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_mmu_properties *mmu_prop;
-       u64 hop0_addr = 0, hop0_pte_addr = 0,
-               hop1_addr = 0, hop1_pte_addr = 0,
-               hop2_addr = 0, hop2_pte_addr = 0,
-               hop3_addr = 0, hop3_pte_addr = 0,
-               hop4_addr = 0, hop4_pte_addr = 0,
-               curr_pte;
-       bool is_huge, clear_hop3 = true;
-
-       /* shifts and masks are the same in PMMU and HPMMU, use one of them */
-       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
-       hop0_addr = get_hop0_addr(ctx);
-       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-
-       curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
-       hop1_addr = get_next_hop_addr(ctx, curr_pte);
-
-       if (hop1_addr == ULLONG_MAX)
-               goto not_mapped;
-
-       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-
-       curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
-       hop2_addr = get_next_hop_addr(ctx, curr_pte);
-
-       if (hop2_addr == ULLONG_MAX)
-               goto not_mapped;
-
-       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-
-       curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
-
-       hop3_addr = get_next_hop_addr(ctx, curr_pte);
-
-       if (hop3_addr == ULLONG_MAX)
-               goto not_mapped;
-
-       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
-
-       curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
-
-       is_huge = curr_pte & LAST_MASK;
-
-       if (is_dram_addr && !is_huge) {
-               dev_err(hdev->dev,
-                               "DRAM unmapping should use huge pages only\n");
-               return -EFAULT;
-       }
-
-       if (!is_huge) {
-               hop4_addr = get_next_hop_addr(ctx, curr_pte);
-
-               if (hop4_addr == ULLONG_MAX)
-                       goto not_mapped;
-
-               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
-                                                       virt_addr);
-
-               curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
-
-               clear_hop3 = false;
-       }
-
-       if (hdev->dram_default_page_mapping && is_dram_addr) {
-               u64 default_pte = (prop->mmu_dram_default_page_addr &
-                               HOP_PHYS_ADDR_MASK) | LAST_MASK |
-                                       PAGE_PRESENT_MASK;
-               if (curr_pte == default_pte) {
-                       dev_err(hdev->dev,
-                               "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
-                                       virt_addr);
-                       goto not_mapped;
-               }
-
-               if (!(curr_pte & PAGE_PRESENT_MASK)) {
-                       dev_err(hdev->dev,
-                               "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
-                                       virt_addr);
-                       goto not_mapped;
-               }
-
-               write_final_pte(ctx, hop3_pte_addr, default_pte);
-               put_pte(ctx, hop3_addr);
-       } else {
-               if (!(curr_pte & PAGE_PRESENT_MASK))
-                       goto not_mapped;
-
-               if (hop4_addr)
-                       clear_pte(ctx, hop4_pte_addr);
-               else
-                       clear_pte(ctx, hop3_pte_addr);
-
-               if (hop4_addr && !put_pte(ctx, hop4_addr))
-                       clear_hop3 = true;
-
-               if (!clear_hop3)
-                       goto mapped;
-
-               clear_pte(ctx, hop3_pte_addr);
-
-               if (put_pte(ctx, hop3_addr))
-                       goto mapped;
-
-               clear_pte(ctx, hop2_pte_addr);
-
-               if (put_pte(ctx, hop2_addr))
-                       goto mapped;
-
-               clear_pte(ctx, hop1_pte_addr);
-
-               if (put_pte(ctx, hop1_addr))
-                       goto mapped;
-
-               clear_pte(ctx, hop0_pte_addr);
-       }
-
-mapped:
-       return 0;
-
-not_mapped:
-       dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
-               virt_addr);
-
-       return -EINVAL;
-}
-
-/*
- * hl_mmu_unmap - unmaps a virtual addr
- *
- * @ctx: pointer to the context structure
- * @virt_addr: virt addr to map from
- * @page_size: size of the page to unmap
- * @flush_pte: whether to do a PCI flush
- *
- * This function does the following:
- * - Check that the virt addr is mapped
- * - Unmap the virt addr and frees pgts if possible
- * - Returns 0 on success, -EINVAL if the given addr is not mapped
- *
- * Because this function changes the page tables in the device and because it
- * changes the MMU hash, it must be protected by a lock.
- * However, because it maps only a single page, the lock should be implemented
- * in a higher level in order to protect the entire mapping of the memory area
- *
- * For optimization reasons PCI flush may be requested once after unmapping of
- * large area.
- */
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
-               bool flush_pte)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_mmu_properties *mmu_prop;
-       u64 real_virt_addr;
-       u32 real_page_size, npages;
-       int i, rc = 0;
-       bool is_dram_addr;
-
-       if (!hdev->mmu_enable)
-               return 0;
-
-       is_dram_addr = is_dram_va(hdev, virt_addr);
-
-       if (is_dram_addr)
-               mmu_prop = &prop->dmmu;
-       else if ((page_size % prop->pmmu_huge.page_size) == 0)
-               mmu_prop = &prop->pmmu_huge;
-       else
-               mmu_prop = &prop->pmmu;
-
-       /*
-        * The H/W handles mapping of specific page sizes. Hence if the page
-        * size is bigger, we break it to sub-pages and unmap them separately.
-        */
-       if ((page_size % mmu_prop->page_size) == 0) {
-               real_page_size = mmu_prop->page_size;
-       } else {
-               dev_err(hdev->dev,
-                       "page size of %u is not %uKB aligned, can't unmap\n",
-                       page_size, mmu_prop->page_size >> 10);
-
-               return -EFAULT;
-       }
-
-       npages = page_size / real_page_size;
-       real_virt_addr = virt_addr;
-
-       for (i = 0 ; i < npages ; i++) {
-               rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
-               if (rc)
-                       break;
-
-               real_virt_addr += real_page_size;
-       }
-
-       if (flush_pte)
-               flush(ctx);
-
-       return rc;
-}
-
-static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
-                       u32 page_size, bool is_dram_addr)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_mmu_properties *mmu_prop;
-       u64 hop0_addr = 0, hop0_pte_addr = 0,
-               hop1_addr = 0, hop1_pte_addr = 0,
-               hop2_addr = 0, hop2_pte_addr = 0,
-               hop3_addr = 0, hop3_pte_addr = 0,
-               hop4_addr = 0, hop4_pte_addr = 0,
-               curr_pte = 0;
-       bool hop1_new = false, hop2_new = false, hop3_new = false,
-               hop4_new = false, is_huge;
-       int rc = -ENOMEM;
-
-       /*
-        * This mapping function can map a page or a huge page. For huge page
-        * there are only 3 hops rather than 4. Currently the DRAM allocation
-        * uses huge pages only but user memory could have been allocated with
-        * one of the two page sizes. Since this is a common code for all the
-        * three cases, we need this hugs page check.
-        */
-       if (is_dram_addr) {
-               mmu_prop = &prop->dmmu;
-               is_huge = true;
-       } else if (page_size == prop->pmmu_huge.page_size) {
-               mmu_prop = &prop->pmmu_huge;
-               is_huge = true;
-       } else {
-               mmu_prop = &prop->pmmu;
-               is_huge = false;
-       }
-
-       hop0_addr = get_hop0_addr(ctx);
-       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-       curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
-       hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
-       if (hop1_addr == ULLONG_MAX)
-               goto err;
-
-       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-       curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
-       hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
-       if (hop2_addr == ULLONG_MAX)
-               goto err;
-
-       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-       curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
-
-       hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
-       if (hop3_addr == ULLONG_MAX)
-               goto err;
-
-       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
-       curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
-
-       if (!is_huge) {
-               hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
-               if (hop4_addr == ULLONG_MAX)
-                       goto err;
-
-               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
-                                                       virt_addr);
-               curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
-       }
-
-       if (hdev->dram_default_page_mapping && is_dram_addr) {
-               u64 default_pte = (prop->mmu_dram_default_page_addr &
-                                       HOP_PHYS_ADDR_MASK) | LAST_MASK |
-                                               PAGE_PRESENT_MASK;
-
-               if (curr_pte != default_pte) {
-                       dev_err(hdev->dev,
-                               "DRAM: mapping already exists for virt_addr 0x%llx\n",
-                                       virt_addr);
-                       rc = -EINVAL;
-                       goto err;
-               }
-
-               if (hop1_new || hop2_new || hop3_new || hop4_new) {
-                       dev_err(hdev->dev,
-                               "DRAM mapping should not allocate more hops\n");
-                       rc = -EFAULT;
-                       goto err;
-               }
-       } else if (curr_pte & PAGE_PRESENT_MASK) {
-               dev_err(hdev->dev,
-                       "mapping already exists for virt_addr 0x%llx\n",
-                               virt_addr);
-
-               dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
-                       *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
-               dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
-                       *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
-               dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
-                       *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
-               dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
-                       *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
-
-               if (!is_huge)
-                       dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
-                               *(u64 *) (uintptr_t) hop4_pte_addr,
-                               hop4_pte_addr);
-
-               rc = -EINVAL;
-               goto err;
-       }
-
-       curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
-                       | PAGE_PRESENT_MASK;
-
-       if (is_huge)
-               write_final_pte(ctx, hop3_pte_addr, curr_pte);
-       else
-               write_final_pte(ctx, hop4_pte_addr, curr_pte);
-
-       if (hop1_new) {
-               curr_pte =
-                       (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-               write_pte(ctx, hop0_pte_addr, curr_pte);
-       }
-       if (hop2_new) {
-               curr_pte =
-                       (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-               write_pte(ctx, hop1_pte_addr, curr_pte);
-               get_pte(ctx, hop1_addr);
-       }
-       if (hop3_new) {
-               curr_pte =
-                       (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-               write_pte(ctx, hop2_pte_addr, curr_pte);
-               get_pte(ctx, hop2_addr);
-       }
-
-       if (!is_huge) {
-               if (hop4_new) {
-                       curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
-                                       PAGE_PRESENT_MASK;
-                       write_pte(ctx, hop3_pte_addr, curr_pte);
-                       get_pte(ctx, hop3_addr);
-               }
-
-               get_pte(ctx, hop4_addr);
-       } else {
-               get_pte(ctx, hop3_addr);
-       }
-
-       return 0;
-
-err:
-       if (hop4_new)
-               free_hop(ctx, hop4_addr);
-       if (hop3_new)
-               free_hop(ctx, hop3_addr);
-       if (hop2_new)
-               free_hop(ctx, hop2_addr);
-       if (hop1_new)
-               free_hop(ctx, hop1_addr);
-
-       return rc;
-}
-
-/*
- * hl_mmu_map - maps a virtual addr to physical addr
- *
- * @ctx: pointer to the context structure
- * @virt_addr: virt addr to map from
- * @phys_addr: phys addr to map to
- * @page_size: physical page size
- * @flush_pte: whether to do a PCI flush
- *
- * This function does the following:
- * - Check that the virt addr is not mapped
- * - Allocate pgts as necessary in order to map the virt addr to the phys
- * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
- *
- * Because this function changes the page tables in the device and because it
- * changes the MMU hash, it must be protected by a lock.
- * However, because it maps only a single page, the lock should be implemented
- * in a higher level in order to protect the entire mapping of the memory area
- *
- * For optimization reasons PCI flush may be requested once after mapping of
- * large area.
- */
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
-               bool flush_pte)
-{
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       struct hl_mmu_properties *mmu_prop;
-       u64 real_virt_addr, real_phys_addr;
-       u32 real_page_size, npages;
-       int i, rc, mapped_cnt = 0;
-       bool is_dram_addr;
-
-       if (!hdev->mmu_enable)
-               return 0;
-
-       is_dram_addr = is_dram_va(hdev, virt_addr);
-
-       if (is_dram_addr)
-               mmu_prop = &prop->dmmu;
-       else if ((page_size % prop->pmmu_huge.page_size) == 0)
-               mmu_prop = &prop->pmmu_huge;
-       else
-               mmu_prop = &prop->pmmu;
-
-       /*
-        * The H/W handles mapping of specific page sizes. Hence if the page
-        * size is bigger, we break it to sub-pages and map them separately.
-        */
-       if ((page_size % mmu_prop->page_size) == 0) {
-               real_page_size = mmu_prop->page_size;
-       } else {
-               dev_err(hdev->dev,
-                       "page size of %u is not %uKB aligned, can't unmap\n",
-                       page_size, mmu_prop->page_size >> 10);
-
-               return -EFAULT;
-       }
-
-       WARN_ONCE((phys_addr & (real_page_size - 1)),
-               "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
-               phys_addr, real_page_size);
-
-       npages = page_size / real_page_size;
-       real_virt_addr = virt_addr;
-       real_phys_addr = phys_addr;
-
-       for (i = 0 ; i < npages ; i++) {
-               rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
-                               real_page_size, is_dram_addr);
-               if (rc)
-                       goto err;
-
-               real_virt_addr += real_page_size;
-               real_phys_addr += real_page_size;
-               mapped_cnt++;
-       }
-
-       if (flush_pte)
-               flush(ctx);
-
-       return 0;
-
-err:
-       real_virt_addr = virt_addr;
-       for (i = 0 ; i < mapped_cnt ; i++) {
-               if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
-                       dev_warn_ratelimited(hdev->dev,
-                               "failed to unmap va: 0x%llx\n", real_virt_addr);
-
-               real_virt_addr += real_page_size;
-       }
-
-       flush(ctx);
-
-       return rc;
-}
-
-/*
- * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
- *
- * @ctx: pointer to the context structure
- *
- */
-void hl_mmu_swap_out(struct hl_ctx *ctx)
-{
-
-}
-
-/*
- * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
- *
- * @ctx: pointer to the context structure
- *
- */
-void hl_mmu_swap_in(struct hl_ctx *ctx)
-{
-
-}
diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c
deleted file mode 100644 (file)
index 1791f66..0000000
+++ /dev/null
@@ -1,400 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hw_ip/pci/pci_general.h"
-
-#include <linux/pci.h>
-#include <linux/bitfield.h>
-
-#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC  (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
-
-#define IATU_REGION_CTRL_REGION_EN_MASK                BIT(31)
-#define IATU_REGION_CTRL_MATCH_MODE_MASK       BIT(30)
-#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK     BIT(19)
-#define IATU_REGION_CTRL_BAR_NUM_MASK          GENMASK(10, 8)
-
-/**
- * hl_pci_bars_map() - Map PCI BARs.
- * @hdev: Pointer to hl_device structure.
- * @name: Array of BAR names.
- * @is_wc: Array with flag per BAR whether a write-combined mapping is needed.
- *
- * Request PCI regions and map them to kernel virtual addresses.
- *
- * Return: 0 on success, non-zero for failure.
- */
-int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
-                       bool is_wc[3])
-{
-       struct pci_dev *pdev = hdev->pdev;
-       int rc, i, bar;
-
-       rc = pci_request_regions(pdev, HL_NAME);
-       if (rc) {
-               dev_err(hdev->dev, "Cannot obtain PCI resources\n");
-               return rc;
-       }
-
-       for (i = 0 ; i < 3 ; i++) {
-               bar = i * 2; /* 64-bit BARs */
-               hdev->pcie_bar[bar] = is_wc[i] ?
-                               pci_ioremap_wc_bar(pdev, bar) :
-                               pci_ioremap_bar(pdev, bar);
-               if (!hdev->pcie_bar[bar]) {
-                       dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n",
-                                       is_wc[i] ? "_wc" : "", name[i]);
-                       rc = -ENODEV;
-                       goto err;
-               }
-       }
-
-       return 0;
-
-err:
-       for (i = 2 ; i >= 0 ; i--) {
-               bar = i * 2; /* 64-bit BARs */
-               if (hdev->pcie_bar[bar])
-                       iounmap(hdev->pcie_bar[bar]);
-       }
-
-       pci_release_regions(pdev);
-
-       return rc;
-}
-
-/**
- * hl_pci_bars_unmap() - Unmap PCI BARS.
- * @hdev: Pointer to hl_device structure.
- *
- * Release all PCI BARs and unmap their virtual addresses.
- */
-static void hl_pci_bars_unmap(struct hl_device *hdev)
-{
-       struct pci_dev *pdev = hdev->pdev;
-       int i, bar;
-
-       for (i = 2 ; i >= 0 ; i--) {
-               bar = i * 2; /* 64-bit BARs */
-               iounmap(hdev->pcie_bar[bar]);
-       }
-
-       pci_release_regions(pdev);
-}
-
-/**
- * hl_pci_elbi_write() - Write through the ELBI interface.
- * @hdev: Pointer to hl_device structure.
- * @addr: Address to write to
- * @data: Data to write
- *
- * Return: 0 on success, negative value for failure.
- */
-static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
-{
-       struct pci_dev *pdev = hdev->pdev;
-       ktime_t timeout;
-       u64 msec;
-       u32 val;
-
-       if (hdev->pldm)
-               msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
-       else
-               msec = HL_PCI_ELBI_TIMEOUT_MSEC;
-
-       /* Clear previous status */
-       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
-
-       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
-       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
-       pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
-                               PCI_CONFIG_ELBI_CTRL_WRITE);
-
-       timeout = ktime_add_ms(ktime_get(), msec);
-       for (;;) {
-               pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
-               if (val & PCI_CONFIG_ELBI_STS_MASK)
-                       break;
-               if (ktime_compare(ktime_get(), timeout) > 0) {
-                       pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
-                                               &val);
-                       break;
-               }
-
-               usleep_range(300, 500);
-       }
-
-       if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
-               return 0;
-
-       if (val & PCI_CONFIG_ELBI_STS_ERR) {
-               dev_err(hdev->dev, "Error writing to ELBI\n");
-               return -EIO;
-       }
-
-       if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
-               dev_err(hdev->dev, "ELBI write didn't finish in time\n");
-               return -EIO;
-       }
-
-       dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
-       return -EIO;
-}
-
-/**
- * hl_pci_iatu_write() - iatu write routine.
- * @hdev: Pointer to hl_device structure.
- * @addr: Address to write to
- * @data: Data to write
- *
- * Return: 0 on success, negative value for failure.
- */
-int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u32 dbi_offset;
-       int rc;
-
-       dbi_offset = addr & 0xFFF;
-
-       rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
-                               data);
-
-       if (rc)
-               return -EIO;
-
-       return 0;
-}
-
-/**
- * hl_pci_reset_link_through_bridge() - Reset PCI link.
- * @hdev: Pointer to hl_device structure.
- */
-static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
-{
-       struct pci_dev *pdev = hdev->pdev;
-       struct pci_dev *parent_port;
-       u16 val;
-
-       parent_port = pdev->bus->self;
-       pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
-       val |= PCI_BRIDGE_CTL_BUS_RESET;
-       pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
-       ssleep(1);
-
-       val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
-       pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
-       ssleep(3);
-}
-
-/**
- * hl_pci_set_inbound_region() - Configure inbound region
- * @hdev: Pointer to hl_device structure.
- * @region: Inbound region number.
- * @pci_region: Inbound region parameters.
- *
- * Configure the iATU inbound region.
- *
- * Return: 0 on success, negative value for failure.
- */
-int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
-               struct hl_inbound_pci_region *pci_region)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 bar_phys_base, region_base, region_end_address;
-       u32 offset, ctrl_reg_val;
-       int rc = 0;
-
-       /* region offset */
-       offset = (0x200 * region) + 0x100;
-
-       if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
-               bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
-               region_base = bar_phys_base + pci_region->offset_in_bar;
-               region_end_address = region_base + pci_region->size - 1;
-
-               rc |= hl_pci_iatu_write(hdev, offset + 0x8,
-                               lower_32_bits(region_base));
-               rc |= hl_pci_iatu_write(hdev, offset + 0xC,
-                               upper_32_bits(region_base));
-               rc |= hl_pci_iatu_write(hdev, offset + 0x10,
-                               lower_32_bits(region_end_address));
-       }
-
-       /* Point to the specified address */
-       rc = hl_pci_iatu_write(hdev, offset + 0x14,
-                       lower_32_bits(pci_region->addr));
-       rc |= hl_pci_iatu_write(hdev, offset + 0x18,
-                       upper_32_bits(pci_region->addr));
-       rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
-
-       /* Enable + bar/address match + match enable + bar number */
-       ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
-       ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
-                       pci_region->mode);
-       ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
-
-       if (pci_region->mode == PCI_BAR_MATCH_MODE)
-               ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
-                               pci_region->bar);
-
-       rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
-
-       /* Return the DBI window to the default location */
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
-
-       if (rc)
-               dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
-                               pci_region->bar, pci_region->addr);
-
-       return rc;
-}
-
-/**
- * hl_pci_set_outbound_region() - Configure outbound region 0
- * @hdev: Pointer to hl_device structure.
- * @pci_region: Outbound region parameters.
- *
- * Configure the iATU outbound region 0.
- *
- * Return: 0 on success, negative value for failure.
- */
-int hl_pci_set_outbound_region(struct hl_device *hdev,
-               struct hl_outbound_pci_region *pci_region)
-{
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 outbound_region_end_address;
-       int rc = 0;
-
-       /* Outbound Region 0 */
-       outbound_region_end_address =
-                       pci_region->addr + pci_region->size - 1;
-       rc |= hl_pci_iatu_write(hdev, 0x008,
-                               lower_32_bits(pci_region->addr));
-       rc |= hl_pci_iatu_write(hdev, 0x00C,
-                               upper_32_bits(pci_region->addr));
-       rc |= hl_pci_iatu_write(hdev, 0x010,
-                               lower_32_bits(outbound_region_end_address));
-       rc |= hl_pci_iatu_write(hdev, 0x014, 0);
-
-       if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
-               rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
-       else
-               rc |= hl_pci_iatu_write(hdev, 0x018, 0);
-
-       rc |= hl_pci_iatu_write(hdev, 0x020,
-                               upper_32_bits(outbound_region_end_address));
-       /* Increase region size */
-       rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
-       /* Enable */
-       rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
-
-       /* Return the DBI window to the default location */
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
-
-       return rc;
-}
-
-/**
- * hl_pci_set_dma_mask() - Set DMA masks for the device.
- * @hdev: Pointer to hl_device structure.
- *
- * This function sets the DMA masks (regular and consistent) for a specified
- * value. If it doesn't succeed, it tries to set it to a fall-back value
- *
- * Return: 0 on success, non-zero for failure.
- */
-static int hl_pci_set_dma_mask(struct hl_device *hdev)
-{
-       struct pci_dev *pdev = hdev->pdev;
-       int rc;
-
-       /* set DMA mask */
-       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to set pci dma mask to %d bits, error %d\n",
-                       hdev->dma_mask, rc);
-               return rc;
-       }
-
-       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to set pci consistent dma mask to %d bits, error %d\n",
-                       hdev->dma_mask, rc);
-               return rc;
-       }
-
-       return 0;
-}
-
-/**
- * hl_pci_init() - PCI initialization code.
- * @hdev: Pointer to hl_device structure.
- *
- * Set DMA masks, initialize the PCI controller and map the PCI BARs.
- *
- * Return: 0 on success, non-zero for failure.
- */
-int hl_pci_init(struct hl_device *hdev)
-{
-       struct pci_dev *pdev = hdev->pdev;
-       int rc;
-
-       if (hdev->reset_pcilink)
-               hl_pci_reset_link_through_bridge(hdev);
-
-       rc = pci_enable_device_mem(pdev);
-       if (rc) {
-               dev_err(hdev->dev, "can't enable PCI device\n");
-               return rc;
-       }
-
-       pci_set_master(pdev);
-
-       rc = hdev->asic_funcs->pci_bars_map(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
-               goto disable_device;
-       }
-
-       rc = hdev->asic_funcs->init_iatu(hdev);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to initialize iATU\n");
-               goto disable_device;
-       }
-
-       rc = hl_pci_set_dma_mask(hdev);
-       if (rc)
-               goto disable_device;
-
-       return 0;
-
-disable_device:
-       pci_clear_master(pdev);
-       pci_disable_device(pdev);
-
-       return rc;
-}
-
-/**
- * hl_fw_fini() - PCI finalization code.
- * @hdev: Pointer to hl_device structure
- *
- * Unmap PCI bars and disable PCI device.
- */
-void hl_pci_fini(struct hl_device *hdev)
-{
-       hl_pci_bars_unmap(hdev);
-
-       pci_clear_master(hdev->pdev);
-       pci_disable_device(hdev->pdev);
-}
diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c
deleted file mode 100644 (file)
index 5d78d5e..0000000
+++ /dev/null
@@ -1,442 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-
-#define SET_CLK_PKT_TIMEOUT    1000000 /* 1s */
-#define SET_PWR_PKT_TIMEOUT    1000000 /* 1s */
-
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
-{
-       struct armcp_packet pkt;
-       long result;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       if (curr)
-               pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
-                                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       else
-               pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
-                                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.pll_index = cpu_to_le32(pll_index);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SET_CLK_PKT_TIMEOUT, &result);
-
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to get frequency of PLL %d, error %d\n",
-                       pll_index, rc);
-               result = rc;
-       }
-
-       return result;
-}
-
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
-                                       ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.pll_index = cpu_to_le32(pll_index);
-       pkt.value = cpu_to_le64(freq);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SET_CLK_PKT_TIMEOUT, NULL);
-
-       if (rc)
-               dev_err(hdev->dev,
-                       "Failed to set frequency to PLL %d, error %d\n",
-                       pll_index, rc);
-}
-
-u64 hl_get_max_power(struct hl_device *hdev)
-{
-       struct armcp_packet pkt;
-       long result;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SET_PWR_PKT_TIMEOUT, &result);
-
-       if (rc) {
-               dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
-               result = rc;
-       }
-
-       return result;
-}
-
-void hl_set_max_power(struct hl_device *hdev, u64 value)
-{
-       struct armcp_packet pkt;
-       int rc;
-
-       memset(&pkt, 0, sizeof(pkt));
-
-       pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
-                               ARMCP_PKT_CTL_OPCODE_SHIFT);
-       pkt.value = cpu_to_le64(value);
-
-       rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SET_PWR_PKT_TIMEOUT, NULL);
-
-       if (rc)
-               dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
-}
-
-static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver);
-}
-
-static ssize_t armcp_kernel_ver_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
-}
-
-static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
-}
-
-static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "0x%08x\n",
-                       hdev->asic_prop.armcp_info.cpld_version);
-}
-
-static ssize_t infineon_ver_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "0x%04x\n",
-                       hdev->asic_prop.armcp_info.infineon_version);
-}
-
-static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
-}
-
-static ssize_t thermal_ver_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
-}
-
-static ssize_t preboot_btl_ver_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver);
-}
-
-static ssize_t soft_reset_store(struct device *dev,
-                               struct device_attribute *attr, const char *buf,
-                               size_t count)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       long value;
-       int rc;
-
-       rc = kstrtoul(buf, 0, &value);
-
-       if (rc) {
-               count = -EINVAL;
-               goto out;
-       }
-
-       if (!hdev->supports_soft_reset) {
-               dev_err(hdev->dev, "Device does not support soft-reset\n");
-               goto out;
-       }
-
-       dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
-
-       hl_device_reset(hdev, false, false);
-
-out:
-       return count;
-}
-
-static ssize_t hard_reset_store(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t count)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       long value;
-       int rc;
-
-       rc = kstrtoul(buf, 0, &value);
-
-       if (rc) {
-               count = -EINVAL;
-               goto out;
-       }
-
-       dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
-
-       hl_device_reset(hdev, true, false);
-
-out:
-       return count;
-}
-
-static ssize_t device_type_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       char *str;
-
-       switch (hdev->asic_type) {
-       case ASIC_GOYA:
-               str = "GOYA";
-               break;
-       case ASIC_GAUDI:
-               str = "GAUDI";
-               break;
-       default:
-               dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
-                               hdev->asic_type);
-               return -EINVAL;
-       }
-
-       return sprintf(buf, "%s\n", str);
-}
-
-static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%04x:%02x:%02x.%x\n",
-                       pci_domain_nr(hdev->pdev->bus),
-                       hdev->pdev->bus->number,
-                       PCI_SLOT(hdev->pdev->devfn),
-                       PCI_FUNC(hdev->pdev->devfn));
-}
-
-static ssize_t status_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       char *str;
-
-       if (atomic_read(&hdev->in_reset))
-               str = "In reset";
-       else if (hdev->disabled)
-               str = "Malfunction";
-       else
-               str = "Operational";
-
-       return sprintf(buf, "%s\n", str);
-}
-
-static ssize_t soft_reset_cnt_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
-}
-
-static ssize_t hard_reset_cnt_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
-}
-
-static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
-                               char *buf)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       long val;
-
-       if (hl_device_disabled_or_in_reset(hdev))
-               return -ENODEV;
-
-       val = hl_get_max_power(hdev);
-
-       return sprintf(buf, "%lu\n", val);
-}
-
-static ssize_t max_power_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t count)
-{
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       unsigned long value;
-       int rc;
-
-       if (hl_device_disabled_or_in_reset(hdev)) {
-               count = -ENODEV;
-               goto out;
-       }
-
-       rc = kstrtoul(buf, 0, &value);
-
-       if (rc) {
-               count = -EINVAL;
-               goto out;
-       }
-
-       hdev->max_power = value;
-       hl_set_max_power(hdev, value);
-
-out:
-       return count;
-}
-
-static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
-                       struct bin_attribute *attr, char *buf, loff_t offset,
-                       size_t max_size)
-{
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct hl_device *hdev = dev_get_drvdata(dev);
-       char *data;
-       int rc;
-
-       if (!max_size)
-               return -EINVAL;
-
-       data = kzalloc(max_size, GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size);
-       if (rc)
-               goto out;
-
-       memcpy(buf, data, max_size);
-
-out:
-       kfree(data);
-
-       return max_size;
-}
-
-static DEVICE_ATTR_RO(armcp_kernel_ver);
-static DEVICE_ATTR_RO(armcp_ver);
-static DEVICE_ATTR_RO(cpld_ver);
-static DEVICE_ATTR_RO(device_type);
-static DEVICE_ATTR_RO(fuse_ver);
-static DEVICE_ATTR_WO(hard_reset);
-static DEVICE_ATTR_RO(hard_reset_cnt);
-static DEVICE_ATTR_RO(infineon_ver);
-static DEVICE_ATTR_RW(max_power);
-static DEVICE_ATTR_RO(pci_addr);
-static DEVICE_ATTR_RO(preboot_btl_ver);
-static DEVICE_ATTR_WO(soft_reset);
-static DEVICE_ATTR_RO(soft_reset_cnt);
-static DEVICE_ATTR_RO(status);
-static DEVICE_ATTR_RO(thermal_ver);
-static DEVICE_ATTR_RO(uboot_ver);
-
-static struct bin_attribute bin_attr_eeprom = {
-       .attr = {.name = "eeprom", .mode = (0444)},
-       .size = PAGE_SIZE,
-       .read = eeprom_read_handler
-};
-
-static struct attribute *hl_dev_attrs[] = {
-       &dev_attr_armcp_kernel_ver.attr,
-       &dev_attr_armcp_ver.attr,
-       &dev_attr_cpld_ver.attr,
-       &dev_attr_device_type.attr,
-       &dev_attr_fuse_ver.attr,
-       &dev_attr_hard_reset.attr,
-       &dev_attr_hard_reset_cnt.attr,
-       &dev_attr_infineon_ver.attr,
-       &dev_attr_max_power.attr,
-       &dev_attr_pci_addr.attr,
-       &dev_attr_preboot_btl_ver.attr,
-       &dev_attr_soft_reset.attr,
-       &dev_attr_soft_reset_cnt.attr,
-       &dev_attr_status.attr,
-       &dev_attr_thermal_ver.attr,
-       &dev_attr_uboot_ver.attr,
-       NULL,
-};
-
-static struct bin_attribute *hl_dev_bin_attrs[] = {
-       &bin_attr_eeprom,
-       NULL
-};
-
-static struct attribute_group hl_dev_attr_group = {
-       .attrs = hl_dev_attrs,
-       .bin_attrs = hl_dev_bin_attrs,
-};
-
-static struct attribute_group hl_dev_clks_attr_group;
-
-static const struct attribute_group *hl_dev_attr_groups[] = {
-       &hl_dev_attr_group,
-       &hl_dev_clks_attr_group,
-       NULL,
-};
-
-int hl_sysfs_init(struct hl_device *hdev)
-{
-       int rc;
-
-       if (hdev->asic_type == ASIC_GOYA)
-               hdev->pm_mng_profile = PM_AUTO;
-       else
-               hdev->pm_mng_profile = PM_MANUAL;
-       hdev->max_power = hdev->asic_prop.max_power_default;
-
-       hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
-
-       rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to add groups to device, error %d\n", rc);
-               return rc;
-       }
-
-       return 0;
-}
-
-void hl_sysfs_fini(struct hl_device *hdev)
-{
-       device_remove_groups(hdev->dev, hl_dev_attr_groups);
-}